fantos commited on
Commit
78d6af0
โ€ข
1 Parent(s): 1b21c76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -74
app.py CHANGED
@@ -5,20 +5,20 @@ import cv2
5
  import gradio as gr
6
  import numpy as np
7
  from huggingface_hub import snapshot_download
8
- from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
9
  from diffusers.utils import load_image
10
  from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
11
  from kolors.models.modeling_chatglm import ChatGLMModel
12
  from kolors.models.tokenization_chatglm import ChatGLMTokenizer
13
  from kolors.models.controlnet import ControlNetModel
14
- from diffusers import AutoencoderKL
15
  from kolors.models.unet_2d_condition import UNet2DConditionModel
16
  from diffusers import EulerDiscreteScheduler
17
  from PIL import Image
18
  from annotator.midas import MidasDetector
19
  from annotator.dwpose import DWposeDetector
20
  from annotator.util import resize_image, HWC3
21
-
22
 
23
  device = "cuda"
24
  ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
@@ -37,7 +37,7 @@ controlnet_pose = ControlNetModel.from_pretrained(f"{ckpt_dir_pose}", revision=N
37
 
38
  pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
39
  vae=vae,
40
- controlnet = controlnet_depth,
41
  text_encoder=text_encoder,
42
  tokenizer=tokenizer,
43
  unet=unet,
@@ -47,7 +47,7 @@ pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
47
 
48
  pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
49
  vae=vae,
50
- controlnet = controlnet_canny,
51
  text_encoder=text_encoder,
52
  tokenizer=tokenizer,
53
  unet=unet,
@@ -57,7 +57,7 @@ pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
57
 
58
  pipe_pose = StableDiffusionXLControlNetImg2ImgPipeline(
59
  vae=vae,
60
- controlnet = controlnet_pose,
61
  text_encoder=text_encoder,
62
  tokenizer=tokenizer,
63
  unet=unet,
@@ -65,6 +65,16 @@ pipe_pose = StableDiffusionXLControlNetImg2ImgPipeline(
65
  force_zeros_for_empty_prompt=False
66
  )
67
 
 
 
 
 
 
 
 
 
 
 
68
  @spaces.GPU
69
  def process_canny_condition(image, canny_threods=[100,200]):
70
  np_image = image.copy()
@@ -90,7 +100,7 @@ def process_dwpose_condition(image, res=1024):
90
  img = resize_image(HWC3(image), res)
91
  out_res, out_img = model_dwpose(image)
92
  result = HWC3(out_img)
93
- result = cv2.resize( result, (w,h) )
94
  return Image.fromarray(result)
95
 
96
  MAX_SEED = np.iinfo(np.int32).max
@@ -99,7 +109,7 @@ MAX_IMAGE_SIZE = 1024
99
  @spaces.GPU
100
  def infer_depth(prompt,
101
  image = None,
102
- negative_prompt = "nsfw๏ผŒ่„ธ้ƒจ้˜ดๅฝฑ๏ผŒไฝŽๅˆ†่พจ็Ž‡๏ผŒjpegไผชๅฝฑใ€ๆจก็ณŠใ€็ณŸ็ณ•๏ผŒ้ป‘่„ธ๏ผŒ้œ“่™น็ฏ",
103
  seed = 397886929,
104
  randomize_seed = False,
105
  guidance_scale = 6.0,
@@ -108,31 +118,32 @@ def infer_depth(prompt,
108
  control_guidance_end = 0.9,
109
  strength = 1.0
110
  ):
 
111
  if randomize_seed:
112
  seed = random.randint(0, MAX_SEED)
113
  generator = torch.Generator().manual_seed(seed)
114
- init_image = resize_image(image, MAX_IMAGE_SIZE)
115
  pipe = pipe_depth.to("cuda")
116
- condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
117
  image = pipe(
118
- prompt= prompt ,
119
- image = init_image,
120
- controlnet_conditioning_scale = controlnet_conditioning_scale,
121
- control_guidance_end = control_guidance_end,
122
- strength= strength ,
123
- control_image = condi_img,
124
- negative_prompt= negative_prompt ,
125
- num_inference_steps= num_inference_steps,
126
- guidance_scale= guidance_scale,
127
  num_images_per_prompt=1,
128
  generator=generator,
129
  ).images[0]
130
- return [condi_img, image], seed
131
 
132
  @spaces.GPU
133
  def infer_canny(prompt,
134
  image = None,
135
- negative_prompt = "nsfw๏ผŒ่„ธ้ƒจ้˜ดๅฝฑ๏ผŒไฝŽๅˆ†่พจ็Ž‡๏ผŒjpegไผชๅฝฑใ€ๆจก็ณŠใ€็ณŸ๏ฟฝ๏ฟฝ๏ฟฝ๏ผŒ้ป‘่„ธ๏ผŒ้œ“่™น็ฏ",
136
  seed = 397886929,
137
  randomize_seed = False,
138
  guidance_scale = 6.0,
@@ -141,31 +152,32 @@ def infer_canny(prompt,
141
  control_guidance_end = 0.9,
142
  strength = 1.0
143
  ):
 
144
  if randomize_seed:
145
  seed = random.randint(0, MAX_SEED)
146
  generator = torch.Generator().manual_seed(seed)
147
- init_image = resize_image(image, MAX_IMAGE_SIZE)
148
  pipe = pipe_canny.to("cuda")
149
  condi_img = process_canny_condition(np.array(init_image))
150
  image = pipe(
151
- prompt= prompt ,
152
- image = init_image,
153
- controlnet_conditioning_scale = controlnet_conditioning_scale,
154
- control_guidance_end = control_guidance_end,
155
- strength= strength ,
156
- control_image = condi_img,
157
- negative_prompt= negative_prompt ,
158
- num_inference_steps= num_inference_steps,
159
- guidance_scale= guidance_scale,
160
  num_images_per_prompt=1,
161
  generator=generator,
162
  ).images[0]
163
- return [condi_img, image], seed
164
 
165
  @spaces.GPU
166
  def infer_pose(prompt,
167
  image = None,
168
- negative_prompt = "nsfw๏ผŒ่„ธ้ƒจ้˜ดๅฝฑ๏ผŒไฝŽๅˆ†่พจ็Ž‡๏ผŒjpegไผชๅฝฑใ€ๆจก็ณŠใ€็ณŸ็ณ•๏ผŒ้ป‘่„ธ๏ผŒ้œ“่™น็ฏ",
169
  seed = 66,
170
  randomize_seed = False,
171
  guidance_scale = 6.0,
@@ -174,27 +186,27 @@ def infer_pose(prompt,
174
  control_guidance_end = 0.9,
175
  strength = 1.0
176
  ):
 
177
  if randomize_seed:
178
  seed = random.randint(0, MAX_SEED)
179
  generator = torch.Generator().manual_seed(seed)
180
- init_image = resize_image(image, MAX_IMAGE_SIZE)
181
  pipe = pipe_pose.to("cuda")
182
  condi_img = process_dwpose_condition(np.array(init_image), MAX_IMAGE_SIZE)
183
  image = pipe(
184
- prompt= prompt ,
185
- image = init_image,
186
- controlnet_conditioning_scale = controlnet_conditioning_scale,
187
- control_guidance_end = control_guidance_end,
188
- strength= strength ,
189
- control_image = condi_img,
190
- negative_prompt= negative_prompt ,
191
- num_inference_steps= num_inference_steps,
192
- guidance_scale= guidance_scale,
193
  num_images_per_prompt=1,
194
  generator=generator,
195
  ).images[0]
196
- return [condi_img, image], seed
197
-
198
 
199
  canny_examples = [
200
  ["์•„๋ฆ„๋‹ค์šด ์†Œ๋…€, ๊ณ ํ’ˆ์งˆ, ๋งค์šฐ ์„ ๋ช…, ์ƒ์ƒํ•œ ์ƒ‰์ƒ, ์ดˆ๊ณ ํ•ด์ƒ๋„, ์ตœ์ƒ์˜ ํ’ˆ์งˆ, 8k, ๊ณ ํ™”์งˆ, 4K",
@@ -223,7 +235,6 @@ footer {
223
  }
224
  """
225
 
226
-
227
  def load_description(fp):
228
  with open(fp, 'r', encoding='utf-8') as f:
229
  content = f.read()
@@ -235,7 +246,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as Kolors:
235
  with gr.Row():
236
  prompt = gr.Textbox(
237
  label="ํ”„๋กฌํ”„ํŠธ",
238
- placeholder="ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”",
239
  lines=2
240
  )
241
  with gr.Row():
@@ -301,51 +312,51 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as Kolors:
301
  with gr.Column(elem_id="col-right"):
302
  result = gr.Gallery(label="๊ฒฐ๊ณผ", show_label=False, columns=2)
303
  seed_used = gr.Number(label="์‚ฌ์šฉ๋œ ์‹œ๋“œ")
304
-
305
-
306
 
307
  with gr.Row():
308
  gr.Examples(
309
- fn = infer_canny,
310
- examples = canny_examples,
311
- inputs = [prompt, image],
312
- outputs = [result, seed_used],
313
- label = "Canny"
314
  )
315
  with gr.Row():
316
  gr.Examples(
317
- fn = infer_depth,
318
- examples = depth_examples,
319
- inputs = [prompt, image],
320
- outputs = [result, seed_used],
321
- label = "Depth"
322
  )
323
 
324
  with gr.Row():
325
  gr.Examples(
326
- fn = infer_pose,
327
- examples = pose_examples,
328
- inputs = [prompt, image],
329
- outputs = [result, seed_used],
330
- label = "Pose"
331
  )
332
 
333
  canny_button.click(
334
- fn = infer_canny,
335
- inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
336
- outputs = [result, seed_used]
337
  )
338
 
339
  depth_button.click(
340
- fn = infer_depth,
341
- inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
342
- outputs = [result, seed_used]
343
  )
344
 
345
  pose_button.click(
346
- fn = infer_pose,
347
- inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
348
- outputs = [result, seed_used]
349
  )
350
 
351
- Kolors.queue().launch(debug=True)
 
5
  import gradio as gr
6
  import numpy as np
7
  from huggingface_hub import snapshot_download
8
+ from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
9
  from diffusers.utils import load_image
10
  from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
11
  from kolors.models.modeling_chatglm import ChatGLMModel
12
  from kolors.models.tokenization_chatglm import ChatGLMTokenizer
13
  from kolors.models.controlnet import ControlNetModel
14
+ from diffusers import AutoencoderKL
15
  from kolors.models.unet_2d_condition import UNet2DConditionModel
16
  from diffusers import EulerDiscreteScheduler
17
  from PIL import Image
18
  from annotator.midas import MidasDetector
19
  from annotator.dwpose import DWposeDetector
20
  from annotator.util import resize_image, HWC3
21
+ from transformers import pipeline
22
 
23
  device = "cuda"
24
  ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
 
37
 
38
  pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
39
  vae=vae,
40
+ controlnet=controlnet_depth,
41
  text_encoder=text_encoder,
42
  tokenizer=tokenizer,
43
  unet=unet,
 
47
 
48
  pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
49
  vae=vae,
50
+ controlnet=controlnet_canny,
51
  text_encoder=text_encoder,
52
  tokenizer=tokenizer,
53
  unet=unet,
 
57
 
58
  pipe_pose = StableDiffusionXLControlNetImg2ImgPipeline(
59
  vae=vae,
60
+ controlnet=controlnet_pose,
61
  text_encoder=text_encoder,
62
  tokenizer=tokenizer,
63
  unet=unet,
 
65
  force_zeros_for_empty_prompt=False
66
  )
67
 
68
+ # ๋ฒˆ์—ญ ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
69
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
70
+
71
+ # prompt๋ฅผ ์ฒ˜๋ฆฌํ•˜๋Š” ํ•จ์ˆ˜ ์ถ”๊ฐ€
72
+ def process_prompt(prompt):
73
+ if any('\u3131' <= char <= '\u3163' or '\uac00' <= char <= '\ud7a3' for char in prompt):
74
+ translated = translator(prompt)[0]['translation_text']
75
+ return prompt, translated
76
+ return prompt, prompt
77
+
78
  @spaces.GPU
79
  def process_canny_condition(image, canny_threods=[100,200]):
80
  np_image = image.copy()
 
100
  img = resize_image(HWC3(image), res)
101
  out_res, out_img = model_dwpose(image)
102
  result = HWC3(out_img)
103
+ result = cv2.resize(result, (w,h))
104
  return Image.fromarray(result)
105
 
106
  MAX_SEED = np.iinfo(np.int32).max
 
109
  @spaces.GPU
110
  def infer_depth(prompt,
111
  image = None,
112
+ negative_prompt = "NSFW, facial shadow, low resolution, JPEG artifacts, blurry, poor quality, blackface, neon lights.",
113
  seed = 397886929,
114
  randomize_seed = False,
115
  guidance_scale = 6.0,
 
118
  control_guidance_end = 0.9,
119
  strength = 1.0
120
  ):
121
+ original_prompt, english_prompt = process_prompt(prompt)
122
  if randomize_seed:
123
  seed = random.randint(0, MAX_SEED)
124
  generator = torch.Generator().manual_seed(seed)
125
+ init_image = resize_image(image, MAX_IMAGE_SIZE)
126
  pipe = pipe_depth.to("cuda")
127
+ condi_img = process_depth_condition_midas(np.array(init_image), MAX_IMAGE_SIZE)
128
  image = pipe(
129
+ prompt=english_prompt,
130
+ image=init_image,
131
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
132
+ control_guidance_end=control_guidance_end,
133
+ strength=strength,
134
+ control_image=condi_img,
135
+ negative_prompt=negative_prompt,
136
+ num_inference_steps=num_inference_steps,
137
+ guidance_scale=guidance_scale,
138
  num_images_per_prompt=1,
139
  generator=generator,
140
  ).images[0]
141
+ return [condi_img, image], seed, original_prompt, english_prompt
142
 
143
  @spaces.GPU
144
  def infer_canny(prompt,
145
  image = None,
146
+ negative_prompt = "NSFW, facial shadow, low resolution, JPEG artifacts, blurry, poor quality, blackface, neon lights.",
147
  seed = 397886929,
148
  randomize_seed = False,
149
  guidance_scale = 6.0,
 
152
  control_guidance_end = 0.9,
153
  strength = 1.0
154
  ):
155
+ original_prompt, english_prompt = process_prompt(prompt)
156
  if randomize_seed:
157
  seed = random.randint(0, MAX_SEED)
158
  generator = torch.Generator().manual_seed(seed)
159
+ init_image = resize_image(image, MAX_IMAGE_SIZE)
160
  pipe = pipe_canny.to("cuda")
161
  condi_img = process_canny_condition(np.array(init_image))
162
  image = pipe(
163
+ prompt=english_prompt,
164
+ image=init_image,
165
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
166
+ control_guidance_end=control_guidance_end,
167
+ strength=strength,
168
+ control_image=condi_img,
169
+ negative_prompt=negative_prompt,
170
+ num_inference_steps=num_inference_steps,
171
+ guidance_scale=guidance_scale,
172
  num_images_per_prompt=1,
173
  generator=generator,
174
  ).images[0]
175
+ return [condi_img, image], seed, original_prompt, english_prompt
176
 
177
  @spaces.GPU
178
  def infer_pose(prompt,
179
  image = None,
180
+ negative_prompt = "NSFW, facial shadow, low resolution, JPEG artifacts, blurry, poor quality, blackface, neon lights.",
181
  seed = 66,
182
  randomize_seed = False,
183
  guidance_scale = 6.0,
 
186
  control_guidance_end = 0.9,
187
  strength = 1.0
188
  ):
189
+ original_prompt, english_prompt = process_prompt(prompt)
190
  if randomize_seed:
191
  seed = random.randint(0, MAX_SEED)
192
  generator = torch.Generator().manual_seed(seed)
193
+ init_image = resize_image(image, MAX_IMAGE_SIZE)
194
  pipe = pipe_pose.to("cuda")
195
  condi_img = process_dwpose_condition(np.array(init_image), MAX_IMAGE_SIZE)
196
  image = pipe(
197
+ prompt=english_prompt,
198
+ image=init_image,
199
+ controlnet_conditioning_scale=controlnet_conditioning_scale,
200
+ control_guidance_end=control_guidance_end,
201
+ strength=strength,
202
+ control_image=condi_img,
203
+ negative_prompt=negative_prompt,
204
+ num_inference_steps=num_inference_steps,
205
+ guidance_scale=guidance_scale,
206
  num_images_per_prompt=1,
207
  generator=generator,
208
  ).images[0]
209
+ return [condi_img, image], seed, original_prompt, english_prompt
 
210
 
211
  canny_examples = [
212
  ["์•„๋ฆ„๋‹ค์šด ์†Œ๋…€, ๊ณ ํ’ˆ์งˆ, ๋งค์šฐ ์„ ๋ช…, ์ƒ์ƒํ•œ ์ƒ‰์ƒ, ์ดˆ๊ณ ํ•ด์ƒ๋„, ์ตœ์ƒ์˜ ํ’ˆ์งˆ, 8k, ๊ณ ํ™”์งˆ, 4K",
 
235
  }
236
  """
237
 
 
238
  def load_description(fp):
239
  with open(fp, 'r', encoding='utf-8') as f:
240
  content = f.read()
 
246
  with gr.Row():
247
  prompt = gr.Textbox(
248
  label="ํ”„๋กฌํ”„ํŠธ",
249
+ placeholder="ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š” (ํ•œ๊ธ€ ๋˜๋Š” ์˜์–ด)",
250
  lines=2
251
  )
252
  with gr.Row():
 
312
  with gr.Column(elem_id="col-right"):
313
  result = gr.Gallery(label="๊ฒฐ๊ณผ", show_label=False, columns=2)
314
  seed_used = gr.Number(label="์‚ฌ์šฉ๋œ ์‹œ๋“œ")
315
+ original_prompt_display = gr.Textbox(label="์›๋ณธ ํ”„๋กฌํ”„ํŠธ")
316
+ english_prompt_display = gr.Textbox(label="์˜์–ด ํ”„๋กฌํ”„ํŠธ")
317
 
318
  with gr.Row():
319
  gr.Examples(
320
+ fn=infer_canny,
321
+ examples=canny_examples,
322
+ inputs=[prompt, image],
323
+ outputs=[result, seed_used, original_prompt_display, english_prompt_display],
324
+ label="Canny"
325
  )
326
  with gr.Row():
327
  gr.Examples(
328
+ fn=infer_depth,
329
+ examples=depth_examples,
330
+ inputs=[prompt, image],
331
+ outputs=[result, seed_used, original_prompt_display, english_prompt_display],
332
+ label="Depth"
333
  )
334
 
335
  with gr.Row():
336
  gr.Examples(
337
+ fn=infer_pose,
338
+ examples=pose_examples,
339
+ inputs=[prompt, image],
340
+ outputs=[result, seed_used, original_prompt_display, english_prompt_display],
341
+ label="Pose"
342
  )
343
 
344
  canny_button.click(
345
+ fn=infer_canny,
346
+ inputs=[prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
347
+ outputs=[result, seed_used, original_prompt_display, english_prompt_display]
348
  )
349
 
350
  depth_button.click(
351
+ fn=infer_depth,
352
+ inputs=[prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
353
+ outputs=[result, seed_used, original_prompt_display, english_prompt_display]
354
  )
355
 
356
  pose_button.click(
357
+ fn=infer_pose,
358
+ inputs=[prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
359
+ outputs=[result, seed_used, original_prompt_display, english_prompt_display]
360
  )
361
 
362
+ Kolors.queue().launch(debug=True)