Spaces:
Runtime error
Runtime error
alfredplpl
commited on
Commit
•
3e6f170
1
Parent(s):
119dfed
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, EulerAncestralDiscreteScheduler
|
2 |
+
from transformers import CLIPFeatureExtractor
|
3 |
+
import gradio as gr
|
4 |
+
import torch
|
5 |
+
from PIL import Image
|
6 |
+
import random
|
7 |
+
import os
|
8 |
+
from huggingface_hub import hf_hub_download
|
9 |
+
|
10 |
+
|
11 |
+
model_id = 'aipicasso/picasso-diffusion-1-1'
|
12 |
+
auth_token=os.environ.get("ACCESS_TOKEN")
|
13 |
+
|
14 |
+
scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler", use_auth_token=auth_token)
|
15 |
+
feature_extractor = CLIPFeatureExtractor.from_pretrained(model_id, use_auth_token=auth_token)
|
16 |
+
|
17 |
+
pipe_merged = StableDiffusionPipeline.from_pretrained(
|
18 |
+
model_id,
|
19 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
20 |
+
scheduler=scheduler, use_auth_token=auth_token)
|
21 |
+
|
22 |
+
pipe_i2i_merged = StableDiffusionImg2ImgPipeline.from_pretrained(
|
23 |
+
model_id,
|
24 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
25 |
+
scheduler=scheduler,
|
26 |
+
requires_safety_checker=False,
|
27 |
+
safety_checker=None,
|
28 |
+
feature_extractor=feature_extractor, use_auth_token=auth_token
|
29 |
+
)
|
30 |
+
|
31 |
+
pipe=pipe_merged.to("cuda")
|
32 |
+
pipe_i2i=pipe_i2i_merged.to("cuda")
|
33 |
+
pipe.enable_xformers_memory_efficient_attention()
|
34 |
+
pipe_i2i.enable_xformers_memory_efficient_attention()
|
35 |
+
|
36 |
+
embeddings_path=hf_hub_download(repo_id=model_id, filename="embeddings/negative/nfixer.pt", use_auth_token=auth_token)
|
37 |
+
embeddings_dict=torch.load(embeddings_path)
|
38 |
+
print(embeddings_dict)
|
39 |
+
if "string_to_param" in embeddings_dict:
|
40 |
+
embeddings = next(iter(embeddings_dict['string_to_param'].values()))
|
41 |
+
nfixer = ""
|
42 |
+
for i, emb in enumerate(embeddings):
|
43 |
+
token = f"sksd{chr(i+65)}"
|
44 |
+
nfixer += token
|
45 |
+
pipe.tokenizer.add_tokens(token)
|
46 |
+
pipe.text_encoder.resize_token_embeddings(len(pipe.tokenizer))
|
47 |
+
token_id = pipe.tokenizer.convert_tokens_to_ids(token)
|
48 |
+
pipe.text_encoder.get_input_embeddings().weight.data[token_id] = emb
|
49 |
+
else:
|
50 |
+
nfixer = list(embeddings_dict.keys())[0]
|
51 |
+
embeddings = embeddings_dict[nfixer].to(pipe.text_encoder.get_input_embeddings().weight.dtype)
|
52 |
+
pipe.tokenizer.add_tokens(placeholder_token)
|
53 |
+
pipe.text_encoder.resize_token_embeddings(len(pipe.tokenizer))
|
54 |
+
placeholder_token_id = pipe.tokenizer.convert_tokens_to_ids(placeholder_token)
|
55 |
+
pipe.text_encoder.get_input_embeddings().weight.data[placeholder_token_id] = embeddings
|
56 |
+
|
57 |
+
embeddings_path=hf_hub_download(repo_id=model_id, filename="embeddings/positive/embellish1.pt", use_auth_token=auth_token)
|
58 |
+
embeddings_dict=torch.load(embeddings_path)
|
59 |
+
print(embeddings_dict)
|
60 |
+
if "string_to_param" in embeddings_dict:
|
61 |
+
embeddings = next(iter(embeddings_dict['string_to_param'].values()))
|
62 |
+
embellish1 = ""
|
63 |
+
for i, emb in enumerate(embeddings):
|
64 |
+
token = f"kskd{chr(i%26+65)}{chr(i//26+65)}"
|
65 |
+
embellish1 += token
|
66 |
+
pipe.tokenizer.add_tokens(token)
|
67 |
+
pipe.text_encoder.resize_token_embeddings(len(pipe.tokenizer))
|
68 |
+
token_id = pipe.tokenizer.convert_tokens_to_ids(token)
|
69 |
+
pipe.text_encoder.get_input_embeddings().weight.data[token_id] = emb
|
70 |
+
else:
|
71 |
+
embellish1 = list(embeddings_dict.keys())[0]
|
72 |
+
embeddings = embeddings_dict[embellish2].to(pipe.text_encoder.get_input_embeddings().weight.dtype)
|
73 |
+
pipe.tokenizer.add_tokens(placeholder_token)
|
74 |
+
pipe.text_encoder.resize_token_embeddings(len(pipe.tokenizer))
|
75 |
+
placeholder_token_id = pipe.tokenizer.convert_tokens_to_ids(placeholder_token)
|
76 |
+
pipe.text_encoder.get_input_embeddings().weight.data[placeholder_token_id] = embeddings
|
77 |
+
|
78 |
+
def error_str(error, title="Error"):
|
79 |
+
return f"""#### {title}
|
80 |
+
{error}""" if error else ""
|
81 |
+
|
82 |
+
def inference(prompt, guidance, steps, image_size="Square", seed=0, img=None, strength=0.5, neg_prompt="", disable_auto_prompt_correction=False, image_style="Animetic", original_model=False):
|
83 |
+
global pipe,pipe_i2i
|
84 |
+
|
85 |
+
generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
|
86 |
+
|
87 |
+
prompt,neg_prompt=auto_prompt_correction(prompt,neg_prompt,disable_auto_prompt_correction,image_style)
|
88 |
+
|
89 |
+
if(image_size=="Portrait"):
|
90 |
+
height=1024
|
91 |
+
width=768
|
92 |
+
elif(image_size=="Landscape"):
|
93 |
+
height=768
|
94 |
+
width=1024
|
95 |
+
elif(image_size=="Highreso."):
|
96 |
+
height=1024
|
97 |
+
width=1024
|
98 |
+
else:
|
99 |
+
height=768
|
100 |
+
width=768
|
101 |
+
|
102 |
+
print(prompt,neg_prompt)
|
103 |
+
|
104 |
+
try:
|
105 |
+
if img is not None:
|
106 |
+
return img_to_img(prompt, neg_prompt, img, strength, guidance, steps, width, height, generator), None
|
107 |
+
else:
|
108 |
+
return txt_to_img(prompt, neg_prompt, guidance, steps, width, height, generator), None
|
109 |
+
except Exception as e:
|
110 |
+
return None, error_str(e)
|
111 |
+
def auto_prompt_correction(prompt_ui,neg_prompt_ui,disable_auto_prompt_correction,image_style):
|
112 |
+
# auto prompt correction
|
113 |
+
prompt=str(prompt_ui)
|
114 |
+
neg_prompt=str(neg_prompt_ui)
|
115 |
+
prompt=prompt.lower()
|
116 |
+
neg_prompt=neg_prompt.lower()
|
117 |
+
|
118 |
+
if(image_style=="Animetic"):
|
119 |
+
style="anime"
|
120 |
+
else:
|
121 |
+
style=f"anime,{embellish1}"
|
122 |
+
|
123 |
+
if(disable_auto_prompt_correction):
|
124 |
+
prompt=f"{style}, {prompt}"
|
125 |
+
return prompt, neg_prompt
|
126 |
+
|
127 |
+
if(prompt=="" and neg_prompt==""):
|
128 |
+
prompt=f"{style}, masterpiece, portrait, a girl with flowers, good pupil, detailed"
|
129 |
+
neg_prompt=f"{nfixer},(((deformed))), blurry, ((((bad anatomy)))),3d, cg, text , bad pupil, disfigured, poorly drawn face, mutation, mutated, (extra limb), (ugly), (poorly drawn hands), bad hands, fused fingers, messy drawing, broken legs censor, low quality, (mutated hands and fingers:1.5), (long body :1.3), (mutation, poorly drawn :1.2), ((bad eyes)), ui, error, missing fingers, fused fingers, one hand with more than 5 fingers, one hand with less than 5 fingers, one hand with more than 5 digit, one hand with less than 5 digit, extra digit, fewer digits, fused digit, missing digit, bad digit, liquid digit, long body, uncoordinated body, unnatural body, lowres, jpeg artifacts"
|
130 |
+
return prompt, neg_prompt
|
131 |
+
|
132 |
+
splited_prompt=prompt.replace(","," ").replace("_"," ").split(" ")
|
133 |
+
|
134 |
+
human_words=["1girl","girl","maid","maids","female","1woman","woman","girls","2girls","3girls","4girls","5girls","a couple of girls","women","1boy","boy","boys","a couple of boys","2boys","male","1man","1handsome","1bishounen","man","men","guy","guys"]
|
135 |
+
for word in human_words:
|
136 |
+
if( word in splited_prompt):
|
137 |
+
prompt=f"{style}, masterpiece, {prompt}, good pupil, detailed"
|
138 |
+
neg_prompt=f"{nfixer},(((deformed))), blurry, ((((bad anatomy)))), {neg_prompt}, 3d, cg, text, bad pupil, disfigured, poorly drawn face, mutation, mutated, (extra limb), (ugly), (poorly drawn hands), bad hands, fused fingers, messy drawing, broken legs censor, low quality, (mutated hands and fingers:1.5), (long body :1.3), (mutation, poorly drawn :1.2), ((bad eyes)), ui, error, missing fingers, fused fingers, one hand with more than 5 fingers, one hand with less than 5 fingers, one hand with more than 5 digit, one hand with less than 5 digit, extra digit, fewer digits, fused digit, missing digit, bad digit, liquid digit, long body, uncoordinated body, unnatural body, lowres, jpeg artifacts"
|
139 |
+
return prompt, neg_prompt
|
140 |
+
|
141 |
+
animal_words=["cat","dog","bird","pigeon","rabbit","bunny","horse"]
|
142 |
+
for word in animal_words:
|
143 |
+
if( word in splited_prompt):
|
144 |
+
prompt=f"{style}, a {prompt}, 4k, detailed"
|
145 |
+
neg_prompt=f"{nfixer}, girl, (((deformed))), blurry, ((((bad anatomy)))), {neg_prompt}, 3d, cg, text, bad pupil, disfigured, poorly drawn face, mutation, mutated, (extra limb), (ugly), (poorly drawn hands), bad hands, fused fingers, messy drawing, broken legs censor, low quality, (mutated hands and fingers:1.5), (long body :1.3), (mutation, poorly drawn :1.2), ((bad eyes)), ui, error, missing fingers, fused fingers, one hand with more than 5 fingers, one hand with less than 5 fingers, one hand with more than 5 digit, one hand with less than 5 digit, extra digit, fewer digits, fused digit, missing digit, bad digit, liquid digit, long body, uncoordinated body, unnatural body, lowres, jpeg artifacts"
|
146 |
+
return prompt, neg_prompt
|
147 |
+
|
148 |
+
background_words=["mount fuji","mt. fuji","building", "buildings", "tokyo", "kyoto", "nara", "shibuya", "shinjuku"]
|
149 |
+
for word in background_words:
|
150 |
+
if( word in splited_prompt):
|
151 |
+
prompt=f"{style}, shinkai makoto, {word}, highly detailed"
|
152 |
+
neg_prompt=f"girl, (((deformed))), {neg_prompt}, girl, boy, photo, people, low quality, ui, error, lowres, jpeg artifacts, 2d, 3d, cg, text"
|
153 |
+
return prompt, neg_prompt
|
154 |
+
|
155 |
+
return prompt,neg_prompt
|
156 |
+
|
157 |
+
def txt_to_img(prompt, neg_prompt, guidance, steps, width, height, generator):
|
158 |
+
result = pipe(
|
159 |
+
prompt,
|
160 |
+
negative_prompt = neg_prompt,
|
161 |
+
num_inference_steps = int(steps),
|
162 |
+
guidance_scale = guidance,
|
163 |
+
width = width,
|
164 |
+
height = height,
|
165 |
+
generator = generator)
|
166 |
+
|
167 |
+
return result.images[0]
|
168 |
+
|
169 |
+
def img_to_img(prompt, neg_prompt, img, strength, guidance, steps, width, height, generator):
|
170 |
+
ratio = min(height / img.height, width / img.width)
|
171 |
+
img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
|
172 |
+
result = pipe_i2i(
|
173 |
+
prompt,
|
174 |
+
negative_prompt = neg_prompt,
|
175 |
+
image = img,
|
176 |
+
num_inference_steps = int(steps),
|
177 |
+
strength = strength,
|
178 |
+
guidance_scale = guidance,
|
179 |
+
generator = generator)
|
180 |
+
|
181 |
+
return result.images[0]
|
182 |
+
|
183 |
+
css = """.main-div div{display:inline-flex;align-items:center;gap:.8rem;font-size:1.75rem}.main-div div h1{font-weight:900;margin-bottom:7px}.main-div p{margin-bottom:10px;font-size:94%}a{text-decoration:underline}.tabs{margin-top:0;margin-bottom:0}#gallery{min-height:20rem}
|
184 |
+
"""
|
185 |
+
with gr.Blocks(css=css) as demo:
|
186 |
+
gr.HTML(
|
187 |
+
f"""
|
188 |
+
<div class="main-div">
|
189 |
+
<div>
|
190 |
+
<h1>Picasso Diffusion 1.1 Demo</h1>
|
191 |
+
</div>
|
192 |
+
<p>
|
193 |
+
Demo for <a href="https://huggingface.co/alfredplpl/picasso-diffusion-1-1">Picasso Diffusion 1.1</a><br>
|
194 |
+
</p>
|
195 |
+
<p>
|
196 |
+
サンプル: そのままGenerateボタンを押してください。<br>
|
197 |
+
sample : Click "Generate" button without any prompts.
|
198 |
+
</p>
|
199 |
+
<p>
|
200 |
+
sample prompt1 : girl, kimono
|
201 |
+
</p>
|
202 |
+
<p>
|
203 |
+
sample prompt2 : boy, armor
|
204 |
+
</p>
|
205 |
+
Running on {"<b>GPU 🔥</b>" if torch.cuda.is_available() else f"<b>CPU 🥶</b>. For faster inference it is recommended to <b>upgrade to GPU in <a href='https://huggingface.co/spaces/akhaliq/cool-japan-diffusion-2-1-0/settings'>Settings</a></b>"} <br>
|
206 |
+
<a style="display:inline-block" href="https://huggingface.co/spaces/aipicasso/picasso-diffusion-latest-demo?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> to say goodbye from waiting for the generating.
|
207 |
+
</div>
|
208 |
+
"""
|
209 |
+
)
|
210 |
+
with gr.Row():
|
211 |
+
|
212 |
+
with gr.Column(scale=55):
|
213 |
+
with gr.Group():
|
214 |
+
with gr.Row():
|
215 |
+
image_style=gr.Radio(["Realistic","Animetic"])
|
216 |
+
image_style.show_label=False
|
217 |
+
image_style.value="Animetic"
|
218 |
+
|
219 |
+
with gr.Row():
|
220 |
+
prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder="[your prompt]").style(container=False)
|
221 |
+
generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))
|
222 |
+
|
223 |
+
image_out = gr.Image(height=768,width=768)
|
224 |
+
error_output = gr.Markdown()
|
225 |
+
|
226 |
+
with gr.Column(scale=45):
|
227 |
+
with gr.Tab("Options"):
|
228 |
+
with gr.Group():
|
229 |
+
neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")
|
230 |
+
disable_auto_prompt_correction = gr.Checkbox(label="Disable auto prompt corretion.")
|
231 |
+
#original_model = gr.Checkbox(label="Change the model into the original model.")
|
232 |
+
with gr.Row():
|
233 |
+
image_size=gr.Radio(["Portrait","Landscape","Square","Highreso."])
|
234 |
+
image_size.show_label=False
|
235 |
+
image_size.value="Square"
|
236 |
+
|
237 |
+
with gr.Row():
|
238 |
+
guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
|
239 |
+
steps = gr.Slider(label="Steps", value=20, minimum=2, maximum=75, step=1)
|
240 |
+
|
241 |
+
seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
|
242 |
+
|
243 |
+
with gr.Tab("Image to image"):
|
244 |
+
with gr.Group():
|
245 |
+
image = gr.Image(label="Image", height=256, tool="editor", type="pil")
|
246 |
+
strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)
|
247 |
+
|
248 |
+
inputs = [prompt, guidance, steps, image_size, seed, image, strength, neg_prompt, disable_auto_prompt_correction,image_style]#, original_model]
|
249 |
+
|
250 |
+
outputs = [image_out, error_output]
|
251 |
+
prompt.submit(inference, inputs=inputs, outputs=outputs)
|
252 |
+
generate.click(inference, inputs=inputs, outputs=outputs)#,api_name="generate"
|
253 |
+
|
254 |
+
demo.queue(concurrency_count=1)
|
255 |
+
demo.launch()
|