StyleShot

Build error

App Files Files Community

Bugrahan Donmez commited on Dec 20, 2024

Commit

97e39c4

1 Parent(s): 8b79fdf

Add canny detector

Browse files

Files changed (2) hide show

annotator/canny/__init__.py +6 -0
app.py +32 -5

annotator/canny/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import cv2
+class CannyDetector:
+    def __call__(self, img, low_threshold=100, high_threshold=200, safe=False, threshold=200):
+        return cv2.Canny(img, low_threshold, high_threshold)

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ import cv2
 from annotator.util import resize_image
 from annotator.hed import SOFT_HEDdetector
 from annotator.lineart import LineartDetector
 from diffusers import UNet2DConditionModel, ControlNetModel
 from transformers import CLIPVisionModelWithProjection
 from huggingface_hub import snapshot_download
@@ -18,6 +20,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 contour_detector = SOFT_HEDdetector()
 lineart_detector = LineartDetector()
 base_model_path = "runwayml/stable-diffusion-v1-5"
 transformer_block_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
@@ -48,6 +51,16 @@ contour_content_fusion_encoder = ControlNetModel.from_unet(contour_unet)
 contour_pipe = StyleContentStableDiffusionControlNetPipeline.from_pretrained(base_model_path, controlnet=contour_content_fusion_encoder)
 contour_styleshot = StyleShot(device, contour_pipe, contour_ip_ckpt, contour_style_aware_encoder_path, contour_transformer_block_path)
 lineart_ip_ckpt = os.path.join(styleshot_lineart_model_path, "pretrained_weight/ip.bin")
 lineart_style_aware_encoder_path = os.path.join(styleshot_lineart_model_path, "pretrained_weight/style_aware_encoder.bin")
 lineart_transformer_block_path = transformer_block_path
@@ -66,11 +79,14 @@ def process(style_image, content_image, prompt, num_samples, image_resolution, c
     btns = []
     contour_content_images = []
     contour_results = []
     lineart_content_images = []
     lineart_results = []
     type1 = 'Contour'
     type2 = 'Lineart'
     if btn1 == type1 or content_image is None:
         style_shots = [contour_styleshot]
@@ -78,9 +94,12 @@ def process(style_image, content_image, prompt, num_samples, image_resolution, c
     elif btn1 == type2:
         style_shots = [lineart_styleshot]
         btns = [type2]
     elif btn1 == "Both":
-        style_shots = [contour_styleshot, lineart_styleshot]
-        btns = [type1, type2]
     ori_style_image = style_image.copy()
@@ -103,6 +122,9 @@ def process(style_image, content_image, prompt, num_samples, image_resolution, c
             elif btn == type2:
                 content_image = resize_image(ori_content_image, image_resolution)
                 content_image = lineart_detector(content_image, coarse=False)
             content_image = Image.fromarray(content_image)
         else:
@@ -127,12 +149,17 @@ def process(style_image, content_image, prompt, num_samples, image_resolution, c
         elif btn == type2:
             lineart_content_images = [content_image]
             lineart_results = g_images[0]
     if ori_content_image is None:
         contour_content_images = []
         lineart_results = []
         lineart_content_images = []
-    return [contour_results, contour_content_images, lineart_results, lineart_content_images]
 block = gr.Blocks().queue()
@@ -147,10 +174,10 @@ with block:
                 with gr.Column():
                     content_image = gr.Image(sources=['upload'], type="numpy", label='Content Image (optional)')
                     btn1 = gr.Radio(
-                        choices=["Contour", "Lineart", "Both"],
                         interactive=True,
                         label="Preprocessor",
-                        value="Both",
                     )
                     gr.Markdown("We recommend using 'Contour' for sparse control and 'Lineart' for detailed control. If you choose 'Both', we will provide results for two types of control. If you choose 'Contour', you can adjust the 'Contour Threshold' under the 'Advanced options' for the level of detail in control. ")
     with gr.Row():

 from annotator.util import resize_image
 from annotator.hed import SOFT_HEDdetector
 from annotator.lineart import LineartDetector
+from annotator.lineart import LineartDetector
+from annotator.canny import CannyDetector
 from diffusers import UNet2DConditionModel, ControlNetModel
 from transformers import CLIPVisionModelWithProjection
 from huggingface_hub import snapshot_download
 contour_detector = SOFT_HEDdetector()
 lineart_detector = LineartDetector()
+canny_detector = CannyDetector()
 base_model_path = "runwayml/stable-diffusion-v1-5"
 transformer_block_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
 contour_pipe = StyleContentStableDiffusionControlNetPipeline.from_pretrained(base_model_path, controlnet=contour_content_fusion_encoder)
 contour_styleshot = StyleShot(device, contour_pipe, contour_ip_ckpt, contour_style_aware_encoder_path, contour_transformer_block_path)
+# weights for ip-adapter and our content-fusion encoder
+canny_ip_ckpt = os.path.join(styleshot_model_path, "pretrained_weight/ip.bin")
+canny_style_aware_encoder_path = os.path.join(styleshot_model_path, "pretrained_weight/style_aware_encoder.bin")
+canny_transformer_block_path = transformer_block_path
+canny_unet = UNet2DConditionModel.from_pretrained(base_model_path, subfolder="unet")
+canny_content_fusion_encoder = ControlNetModel.from_unet(canny_unet)
+canny_pipe = StyleContentStableDiffusionControlNetPipeline.from_pretrained(base_model_path, controlnet=canny_content_fusion_encoder)
+canny_styleshot = StyleShot(device, canny_pipe, canny_ip_ckpt, canny_style_aware_encoder_path, canny_transformer_block_path)
 lineart_ip_ckpt = os.path.join(styleshot_lineart_model_path, "pretrained_weight/ip.bin")
 lineart_style_aware_encoder_path = os.path.join(styleshot_lineart_model_path, "pretrained_weight/style_aware_encoder.bin")
 lineart_transformer_block_path = transformer_block_path
     btns = []
     contour_content_images = []
     contour_results = []
+    canny_content_images = []
+    canny_results = []
     lineart_content_images = []
     lineart_results = []
     type1 = 'Contour'
     type2 = 'Lineart'
+    type3 = 'Canny'
     if btn1 == type1 or content_image is None:
         style_shots = [contour_styleshot]
     elif btn1 == type2:
         style_shots = [lineart_styleshot]
         btns = [type2]
+    elif btn1 == type3:
+        style_shots = [canny_styleshot]
+        btns = [type3]
     elif btn1 == "Both":
+        style_shots = [contour_styleshot, lineart_styleshot, canny_styleshot]
+        btns = [type1, type2, type3]
     ori_style_image = style_image.copy()
             elif btn == type2:
                 content_image = resize_image(ori_content_image, image_resolution)
                 content_image = lineart_detector(content_image, coarse=False)
+            elif btn == type3:
+                content_image = resize_image(ori_content_image, image_resolution)
+                content_image = canny_detector(content_image)
             content_image = Image.fromarray(content_image)
         else:
         elif btn == type2:
             lineart_content_images = [content_image]
             lineart_results = g_images[0]
+        elif btn == type3:
+            canny_content_images = [content_image]
+            canny_results = g_images[0]
     if ori_content_image is None:
         contour_content_images = []
         lineart_results = []
         lineart_content_images = []
+        canny_results = []
+        canny_content_images = []
+    return [contour_results, contour_content_images, lineart_results, lineart_content_images, canny_results, canny_content_images]
 block = gr.Blocks().queue()
                 with gr.Column():
                     content_image = gr.Image(sources=['upload'], type="numpy", label='Content Image (optional)')
                     btn1 = gr.Radio(
+                        choices=["Contour", "Lineart", "Canny", "All"],
                         interactive=True,
                         label="Preprocessor",
+                        value="All",
                     )
                     gr.Markdown("We recommend using 'Contour' for sparse control and 'Lineart' for detailed control. If you choose 'Both', we will provide results for two types of control. If you choose 'Contour', you can adjust the 'Contour Threshold' under the 'Advanced options' for the level of detail in control. ")
     with gr.Row():