kolcontrl

Running on Zero

App Files Files Community

fantos commited on Sep 5, 2024

Commit

78d6af0

verified ·

1 Parent(s): 1b21c76

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -74

app.py CHANGED Viewed

@@ -5,20 +5,20 @@ import cv2
 import gradio as gr
 import numpy as np
 from huggingface_hub import snapshot_download
-from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
 from diffusers.utils import load_image
 from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
 from kolors.models.modeling_chatglm import ChatGLMModel
 from kolors.models.tokenization_chatglm import ChatGLMTokenizer
 from kolors.models.controlnet import ControlNetModel
-from diffusers import  AutoencoderKL
 from kolors.models.unet_2d_condition import UNet2DConditionModel
 from diffusers import EulerDiscreteScheduler
 from PIL import Image
 from annotator.midas import MidasDetector
 from annotator.dwpose import DWposeDetector
 from annotator.util import resize_image, HWC3
 device = "cuda"
 ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
@@ -37,7 +37,7 @@ controlnet_pose = ControlNetModel.from_pretrained(f"{ckpt_dir_pose}", revision=N
 pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
     vae=vae,
-    controlnet = controlnet_depth,
     text_encoder=text_encoder,
     tokenizer=tokenizer,
     unet=unet,
@@ -47,7 +47,7 @@ pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
 pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
     vae=vae,
-    controlnet = controlnet_canny,
     text_encoder=text_encoder,
     tokenizer=tokenizer,
     unet=unet,
@@ -57,7 +57,7 @@ pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
 pipe_pose = StableDiffusionXLControlNetImg2ImgPipeline(
     vae=vae,
-    controlnet = controlnet_pose,
     text_encoder=text_encoder,
     tokenizer=tokenizer,
     unet=unet,
@@ -65,6 +65,16 @@ pipe_pose = StableDiffusionXLControlNetImg2ImgPipeline(
     force_zeros_for_empty_prompt=False
 )
 @spaces.GPU
 def process_canny_condition(image, canny_threods=[100,200]):
     np_image = image.copy()
@@ -90,7 +100,7 @@ def process_dwpose_condition(image, res=1024):
     img = resize_image(HWC3(image), res)
     out_res, out_img = model_dwpose(image)
     result = HWC3(out_img)
-    result = cv2.resize( result, (w,h) )
     return Image.fromarray(result)
 MAX_SEED = np.iinfo(np.int32).max
@@ -99,7 +109,7 @@ MAX_IMAGE_SIZE = 1024
 @spaces.GPU
 def infer_depth(prompt,
           image = None,
-          negative_prompt = "nsfw，脸部阴影，低分辨率，jpeg伪影、模糊、糟糕，黑脸，霓虹灯",
           seed = 397886929,
           randomize_seed = False,
           guidance_scale = 6.0,
@@ -108,31 +118,32 @@ def infer_depth(prompt,
           control_guidance_end = 0.9,
           strength = 1.0
         ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
-    init_image = resize_image(image,  MAX_IMAGE_SIZE)
     pipe = pipe_depth.to("cuda")
-    condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
     image = pipe(
-        prompt= prompt ,
-        image = init_image,
-        controlnet_conditioning_scale = controlnet_conditioning_scale,
-        control_guidance_end = control_guidance_end,
-        strength= strength ,
-        control_image = condi_img,
-        negative_prompt= negative_prompt ,
-        num_inference_steps= num_inference_steps,
-        guidance_scale= guidance_scale,
         num_images_per_prompt=1,
         generator=generator,
     ).images[0]
-    return [condi_img, image], seed
 @spaces.GPU
 def infer_canny(prompt,
           image = None,
-          negative_prompt = "nsfw，脸部阴影，低分辨率，jpeg伪影、模糊、糟���，黑脸，霓虹灯",
           seed = 397886929,
           randomize_seed = False,
           guidance_scale = 6.0,
@@ -141,31 +152,32 @@ def infer_canny(prompt,
           control_guidance_end = 0.9,
           strength = 1.0
         ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
-    init_image = resize_image(image,  MAX_IMAGE_SIZE)
     pipe = pipe_canny.to("cuda")
     condi_img = process_canny_condition(np.array(init_image))
     image = pipe(
-        prompt= prompt ,
-        image = init_image,
-        controlnet_conditioning_scale = controlnet_conditioning_scale,
-        control_guidance_end = control_guidance_end,
-        strength= strength ,
-        control_image = condi_img,
-        negative_prompt= negative_prompt ,
-        num_inference_steps= num_inference_steps,
-        guidance_scale= guidance_scale,
         num_images_per_prompt=1,
         generator=generator,
     ).images[0]
-    return [condi_img, image], seed
 @spaces.GPU
 def infer_pose(prompt,
           image = None,
-          negative_prompt = "nsfw，脸部阴影，低分辨率，jpeg伪影、模糊、糟糕，黑脸，霓虹灯",
           seed = 66,
           randomize_seed = False,
           guidance_scale = 6.0,
@@ -174,27 +186,27 @@ def infer_pose(prompt,
           control_guidance_end = 0.9,
           strength = 1.0
         ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
-    init_image = resize_image(image,  MAX_IMAGE_SIZE)
     pipe = pipe_pose.to("cuda")
     condi_img = process_dwpose_condition(np.array(init_image), MAX_IMAGE_SIZE)
     image = pipe(
-        prompt= prompt ,
-        image = init_image,
-        controlnet_conditioning_scale = controlnet_conditioning_scale,
-        control_guidance_end = control_guidance_end,
-        strength= strength ,
-        control_image = condi_img,
-        negative_prompt= negative_prompt ,
-        num_inference_steps= num_inference_steps,
-        guidance_scale= guidance_scale,
         num_images_per_prompt=1,
         generator=generator,
     ).images[0]
-    return [condi_img, image], seed
 canny_examples = [
     ["아름다운 소녀, 고품질, 매우 선명, 생생한 색상, 초고해상도, 최상의 품질, 8k, 고화질, 4K",
@@ -223,7 +235,6 @@ footer {
 }
 """
 def load_description(fp):
     with open(fp, 'r', encoding='utf-8') as f:
         content = f.read()
@@ -235,7 +246,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as Kolors:
             with gr.Row():
                 prompt = gr.Textbox(
                     label="프롬프트",
-                    placeholder="프롬프트를 입력하세요",
                     lines=2
                 )
             with gr.Row():
@@ -301,51 +312,51 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as Kolors:
         with gr.Column(elem_id="col-right"):
             result = gr.Gallery(label="결과", show_label=False, columns=2)
             seed_used = gr.Number(label="사용된 시드")
     with gr.Row():
         gr.Examples(
-                fn = infer_canny,
-                examples = canny_examples,
-                inputs = [prompt, image],
-                outputs = [result, seed_used],
-                label = "Canny"
             )
     with gr.Row():
         gr.Examples(
-                fn = infer_depth,
-                examples = depth_examples,
-                inputs = [prompt, image],
-                outputs = [result, seed_used],
-                label = "Depth"
             )
     with gr.Row():
         gr.Examples(
-                fn = infer_pose,
-                examples = pose_examples,
-                inputs = [prompt, image],
-                outputs = [result, seed_used],
-                label = "Pose"
             )
     canny_button.click(
-        fn = infer_canny,
-        inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
-        outputs = [result, seed_used]
     )
     depth_button.click(
-        fn = infer_depth,
-        inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
-        outputs = [result, seed_used]
     )
     pose_button.click(
-        fn = infer_pose,
-        inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
-        outputs = [result, seed_used]
     )
-Kolors.queue().launch(debug=True)

 import gradio as gr
 import numpy as np
 from huggingface_hub import snapshot_download
+from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
 from diffusers.utils import load_image
 from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
 from kolors.models.modeling_chatglm import ChatGLMModel
 from kolors.models.tokenization_chatglm import ChatGLMTokenizer
 from kolors.models.controlnet import ControlNetModel
+from diffusers import AutoencoderKL
 from kolors.models.unet_2d_condition import UNet2DConditionModel
 from diffusers import EulerDiscreteScheduler
 from PIL import Image
 from annotator.midas import MidasDetector
 from annotator.dwpose import DWposeDetector
 from annotator.util import resize_image, HWC3
+from transformers import pipeline
 device = "cuda"
 ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
 pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
     vae=vae,
+    controlnet=controlnet_depth,
     text_encoder=text_encoder,
     tokenizer=tokenizer,
     unet=unet,
 pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
     vae=vae,
+    controlnet=controlnet_canny,
     text_encoder=text_encoder,
     tokenizer=tokenizer,
     unet=unet,
 pipe_pose = StableDiffusionXLControlNetImg2ImgPipeline(
     vae=vae,
+    controlnet=controlnet_pose,
     text_encoder=text_encoder,
     tokenizer=tokenizer,
     unet=unet,
     force_zeros_for_empty_prompt=False
 )
+# 번역 모델 초기화
+translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
+# prompt를 처리하는 함수 추가
+def process_prompt(prompt):
+    if any('\u3131' <= char <= '\u3163' or '\uac00' <= char <= '\ud7a3' for char in prompt):
+        translated = translator(prompt)[0]['translation_text']
+        return prompt, translated
+    return prompt, prompt
 @spaces.GPU
 def process_canny_condition(image, canny_threods=[100,200]):
     np_image = image.copy()
     img = resize_image(HWC3(image), res)
     out_res, out_img = model_dwpose(image)
     result = HWC3(out_img)
+    result = cv2.resize(result, (w,h))
     return Image.fromarray(result)
 MAX_SEED = np.iinfo(np.int32).max
 @spaces.GPU
 def infer_depth(prompt,
           image = None,
+          negative_prompt = "NSFW, facial shadow, low resolution, JPEG artifacts, blurry, poor quality, blackface, neon lights.",
           seed = 397886929,
           randomize_seed = False,
           guidance_scale = 6.0,
           control_guidance_end = 0.9,
           strength = 1.0
         ):
+    original_prompt, english_prompt = process_prompt(prompt)
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
+    init_image = resize_image(image, MAX_IMAGE_SIZE)
     pipe = pipe_depth.to("cuda")
+    condi_img = process_depth_condition_midas(np.array(init_image), MAX_IMAGE_SIZE)
     image = pipe(
+        prompt=english_prompt,
+        image=init_image,
+        controlnet_conditioning_scale=controlnet_conditioning_scale,
+        control_guidance_end=control_guidance_end,
+        strength=strength,
+        control_image=condi_img,
+        negative_prompt=negative_prompt,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
         num_images_per_prompt=1,
         generator=generator,
     ).images[0]
+    return [condi_img, image], seed, original_prompt, english_prompt
 @spaces.GPU
 def infer_canny(prompt,
           image = None,
+          negative_prompt = "NSFW, facial shadow, low resolution, JPEG artifacts, blurry, poor quality, blackface, neon lights.",
           seed = 397886929,
           randomize_seed = False,
           guidance_scale = 6.0,
           control_guidance_end = 0.9,
           strength = 1.0
         ):
+    original_prompt, english_prompt = process_prompt(prompt)
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
+    init_image = resize_image(image, MAX_IMAGE_SIZE)
     pipe = pipe_canny.to("cuda")
     condi_img = process_canny_condition(np.array(init_image))
     image = pipe(
+        prompt=english_prompt,
+        image=init_image,
+        controlnet_conditioning_scale=controlnet_conditioning_scale,
+        control_guidance_end=control_guidance_end,
+        strength=strength,
+        control_image=condi_img,
+        negative_prompt=negative_prompt,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
         num_images_per_prompt=1,
         generator=generator,
     ).images[0]
+    return [condi_img, image], seed, original_prompt, english_prompt
 @spaces.GPU
 def infer_pose(prompt,
           image = None,
+          negative_prompt = "NSFW, facial shadow, low resolution, JPEG artifacts, blurry, poor quality, blackface, neon lights.",
           seed = 66,
           randomize_seed = False,
           guidance_scale = 6.0,
           control_guidance_end = 0.9,
           strength = 1.0
         ):
+    original_prompt, english_prompt = process_prompt(prompt)
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
+    init_image = resize_image(image, MAX_IMAGE_SIZE)
     pipe = pipe_pose.to("cuda")
     condi_img = process_dwpose_condition(np.array(init_image), MAX_IMAGE_SIZE)
     image = pipe(
+        prompt=english_prompt,
+        image=init_image,
+        controlnet_conditioning_scale=controlnet_conditioning_scale,
+        control_guidance_end=control_guidance_end,
+        strength=strength,
+        control_image=condi_img,
+        negative_prompt=negative_prompt,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
         num_images_per_prompt=1,
         generator=generator,
     ).images[0]
+    return [condi_img, image], seed, original_prompt, english_prompt
 canny_examples = [
     ["아름다운 소녀, 고품질, 매우 선명, 생생한 색상, 초고해상도, 최상의 품질, 8k, 고화질, 4K",
 }
 """
 def load_description(fp):
     with open(fp, 'r', encoding='utf-8') as f:
         content = f.read()
             with gr.Row():
                 prompt = gr.Textbox(
                     label="프롬프트",
+                    placeholder="프롬프트를 입력하세요 (한글 또는 영어)",
                     lines=2
                 )
             with gr.Row():
         with gr.Column(elem_id="col-right"):
             result = gr.Gallery(label="결과", show_label=False, columns=2)
             seed_used = gr.Number(label="사용된 시드")
+            original_prompt_display = gr.Textbox(label="원본 프롬프트")
+            english_prompt_display = gr.Textbox(label="영어 프롬프트")
     with gr.Row():
         gr.Examples(
+                fn=infer_canny,
+                examples=canny_examples,
+                inputs=[prompt, image],
+                outputs=[result, seed_used, original_prompt_display, english_prompt_display],
+                label="Canny"
             )
     with gr.Row():
         gr.Examples(
+                fn=infer_depth,
+                examples=depth_examples,
+                inputs=[prompt, image],
+                outputs=[result, seed_used, original_prompt_display, english_prompt_display],
+                label="Depth"
             )
     with gr.Row():
         gr.Examples(
+                fn=infer_pose,
+                examples=pose_examples,
+                inputs=[prompt, image],
+                outputs=[result, seed_used, original_prompt_display, english_prompt_display],
+                label="Pose"
             )
     canny_button.click(
+        fn=infer_canny,
+        inputs=[prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
+        outputs=[result, seed_used, original_prompt_display, english_prompt_display]
     )
     depth_button.click(
+        fn=infer_depth,
+        inputs=[prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
+        outputs=[result, seed_used, original_prompt_display, english_prompt_display]
     )
     pose_button.click(
+        fn=infer_pose,
+        inputs=[prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
+        outputs=[result, seed_used, original_prompt_display, english_prompt_display]
     )
+Kolors.queue().launch(debug=True)