File size: 2,372 Bytes
82612ce
ad11739
82612ce
 
002a488
35143eb
ad11739
 
82612ce
 
ad11739
 
82612ce
ad11739
82612ce
ad11739
82612ce
 
 
 
 
 
 
7c88e57
82612ce
 
 
 
002a488
cf32f12
 
 
 
 
 
 
 
 
 
 
 
 
ad11739
35143eb
 
 
 
 
 
 
 
ad11739
 
 
 
 
 
 
 
 
82612ce
 
cf32f12
82612ce
ad11739
35143eb
 
 
82612ce
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from typing import Dict, List, Any
import torch
from huggingface_hub import hf_hub_download
from diffusers import DiffusionPipeline
from safetensors.torch import load_file
from transformers import pipeline


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device ~>", device)


class EndpointHandler:
    def __init__(self, path=""):
        print("path ~>", path)

        self.pipe = DiffusionPipeline.from_pretrained(
            "stabilityai/stable-diffusion-xl-base-1.0",
            torch_dtype=torch.float16 if device.type == "cuda" else None,
            variant="fp16",
        ).to(device)

        self.pipe.load_lora_weights("SvenN/sdxl-emoji", weight_name="lora.safetensors")
        self.pipe.fuse_lora(lora_scale=0.6)

        embedding_path = hf_hub_download(
            repo_id="SvenN/sdxl-emoji", filename="embeddings.pti", repo_type="model"
        )
        state_dict = load_file(embedding_path)

        self.pipe.load_textual_inversion(
            state_dict["text_encoders_0"],
            token=["<s0>", "<s1>"],
            text_encoder=self.pipe.text_encoder,
            tokenizer=self.pipe.tokenizer,
        )
        self.pipe.load_textual_inversion(
            state_dict["text_encoders_1"],
            token=["<s0>", "<s1>"],
            text_encoder=self.pipe.text_encoder_2,
            tokenizer=self.pipe.tokenizer_2,
        )

        self.remove_bg = pipeline(
            "image-segmentation",
            model="briaai/RMBG-1.4",
            device=device,
            revision="22532afbdabdc36b2d30a334076720ac72a06f83",
            trust_remote_code=True,
        )

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        """
        Args:
            data (:obj:):
                includes the input data and the parameters for the inference.
        Return:
            A :obj:`dict`:. base64 encoded image
        """
        inputs = data.pop("inputs", data)

        # Automatically add trigger tokens to the beginning of the prompt
        images = self.pipe(inputs, **data["parameters"]).images
        image = images[0]

        image_no_bg = self.remove_bg(image)

        return image_no_bg


if __name__ == "__main__":
    handler = EndpointHandler()
    print(handler)
    output = handler({"inputs": "emoji of a tiger face, white background"})
    print(output)