File size: 2,536 Bytes
063372b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import torch
from huggingface_hub import hf_hub_download

from iopaint.const import ANYTEXT_NAME
from iopaint.model.anytext.anytext_pipeline import AnyTextPipeline
from iopaint.model.base import DiffusionInpaintModel
from iopaint.model.utils import get_torch_dtype, is_local_files_only
from iopaint.schema import InpaintRequest


class AnyText(DiffusionInpaintModel):
    name = ANYTEXT_NAME
    pad_mod = 64
    is_erase_model = False

    @staticmethod
    def download(local_files_only=False):
        hf_hub_download(
            repo_id=ANYTEXT_NAME,
            filename="model_index.json",
            local_files_only=local_files_only,
        )
        ckpt_path = hf_hub_download(
            repo_id=ANYTEXT_NAME,
            filename="pytorch_model.fp16.safetensors",
            local_files_only=local_files_only,
        )
        font_path = hf_hub_download(
            repo_id=ANYTEXT_NAME,
            filename="SourceHanSansSC-Medium.otf",
            local_files_only=local_files_only,
        )
        return ckpt_path, font_path

    def init_model(self, device, **kwargs):
        local_files_only = is_local_files_only(**kwargs)
        ckpt_path, font_path = self.download(local_files_only)
        use_gpu, torch_dtype = get_torch_dtype(device, kwargs.get("no_half", False))
        self.model = AnyTextPipeline(
            ckpt_path=ckpt_path,
            font_path=font_path,
            device=device,
            use_fp16=torch_dtype == torch.float16,
        )
        self.callback = kwargs.pop("callback", None)

    def forward(self, image, mask, config: InpaintRequest):
        """Input image and output image have same size
        image: [H, W, C] RGB
        mask: [H, W, 1] 255 means area to inpainting
        return: BGR IMAGE
        """
        height, width = image.shape[:2]
        mask = mask.astype("float32") / 255.0
        masked_image = image * (1 - mask)

        # list of rgb ndarray
        results, rtn_code, rtn_warning = self.model(
            image=image,
            masked_image=masked_image,
            prompt=config.prompt,
            negative_prompt=config.negative_prompt,
            num_inference_steps=config.sd_steps,
            strength=config.sd_strength,
            guidance_scale=config.sd_guidance_scale,
            height=height,
            width=width,
            seed=config.sd_seed,
            sort_priority="y",
            callback=self.callback
        )
        inpainted_rgb_image = results[0][..., ::-1]
        return inpainted_rgb_image