Spaces:

CrucibleAI
/

ControlNetMediaPipeFaceSD21

Runtime error

Joseph Catrambone commited on Apr 4, 2023

Commit

d491fdb

•

1 Parent(s): a1a7f32

Automatically scale input images to 512x512 with center crop if they're non-square.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,9 +24,21 @@ ddim_sampler = DDIMSampler(model)  # ControlNet _only_ works with DDIM.
 def process(input_image: Image.Image, prompt, a_prompt, n_prompt, max_faces: int, min_confidence: float, num_samples, ddim_steps, guess_mode, strength, scale, seed: int, eta):
     with torch.no_grad():
         empty = generate_annotation(input_image, max_faces, min_confidence)
         visualization = Image.fromarray(empty)  # Save to help debug.
         empty = numpy.moveaxis(empty, 2, 0)  # h, w, c -> c, h, w
         control = torch.from_numpy(empty.copy()).float().to(device) / 255.0
         control = torch.stack([control for _ in range(num_samples)], dim=0)
@@ -81,7 +93,7 @@ with block:
         gr.Markdown("## Control Stable Diffusion with a Facial Pose")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(source='upload', type="numpy")
             prompt = gr.Textbox(label="Prompt")
             run_button = gr.Button(label="Run")
             with gr.Accordion("Advanced options", open=False):

 def process(input_image: Image.Image, prompt, a_prompt, n_prompt, max_faces: int, min_confidence: float, num_samples, ddim_steps, guess_mode, strength, scale, seed: int, eta):
     with torch.no_grad():
+        # Scale to 512x512.
+        img_size = input_image.size
+        scale_factor = 512/min(img_size)
+        input_image = input_image.resize((1+int(img_size[0]*scale_factor), 1+int(img_size[1]*scale_factor)))
+        img_size = input_image.size
+        left_padding = (img_size[0] - 512)//2
+        top_padding = (img_size[1] - 512)//2
+        input_image = input_image.crop((left_padding, top_padding, left_padding+512, top_padding+512))
+        # Generate annotation
+        input_image = numpy.asarray(input_image)
         empty = generate_annotation(input_image, max_faces, min_confidence)
         visualization = Image.fromarray(empty)  # Save to help debug.
+        # Prep for network:
         empty = numpy.moveaxis(empty, 2, 0)  # h, w, c -> c, h, w
         control = torch.from_numpy(empty.copy()).float().to(device) / 255.0
         control = torch.stack([control for _ in range(num_samples)], dim=0)
         gr.Markdown("## Control Stable Diffusion with a Facial Pose")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(source='upload', type="pil")
             prompt = gr.Textbox(label="Prompt")
             run_button = gr.Button(label="Run")
             with gr.Accordion("Advanced options", open=False):