rbanfield commited on
Commit
cc579f2
1 Parent(s): 727f0f1

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +2 -8
  2. app.py +215 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
  title: ControlNetV1.1
3
- emoji: 😻
4
- colorFrom: gray
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 3.44.4
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: ControlNetV1.1
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 3.42.0
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import cv2
4
+ import numpy as np
5
+ import torch
6
+ import random
7
+ import base64
8
+ import json
9
+ import threading
10
+ import uuid
11
+ import math
12
+
13
+ import io
14
+ from PIL import Image
15
+
16
+ from diffusers import AutoencoderKL, StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler,StableDiffusionControlNetImg2ImgPipeline,StableDiffusionXLControlNetPipeline,DiffusionPipeline
17
+ from diffusers.utils import load_image
18
+ from transformers import pipeline
19
+
20
+ import gradio as gr
21
+
22
+ vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
23
+
24
+
25
+ canny_controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
26
+ canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
27
+ "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet, torch_dtype=torch.float16, use_safetensors=True
28
+ )
29
+
30
+ canny_controlnet_tile = ControlNetModel.from_pretrained("lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16)
31
+ canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
32
+ "SG161222/Realistic_Vision_V3.0_VAE", controlnet=canny_controlnet_tile, torch_dtype=torch.float16, use_safetensors=True
33
+ )
34
+ canny_pipe_img2img.enable_model_cpu_offload()
35
+ canny_pipe_img2img.enable_xformers_memory_efficient_attention()
36
+
37
+
38
+ canny_pipe.scheduler = UniPCMultistepScheduler.from_config(canny_pipe.scheduler.config)
39
+ canny_pipe.enable_model_cpu_offload()
40
+ canny_pipe.enable_xformers_memory_efficient_attention()
41
+
42
+ controlnet_xl = ControlNetModel.from_pretrained(
43
+ "diffusers/controlnet-canny-sdxl-1.0",
44
+ torch_dtype=torch.float16
45
+ )
46
+ vae_xl = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
47
+ pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
48
+ "stabilityai/stable-diffusion-xl-base-1.0",
49
+ controlnet=controlnet_xl,
50
+ vae=vae_xl,
51
+ torch_dtype=torch.float16,
52
+ use_safetensors=True,
53
+ variant="fp16",
54
+ )
55
+ pipe_xl.scheduler = UniPCMultistepScheduler.from_config(pipe_xl.scheduler.config)
56
+ pipe_xl.enable_xformers_memory_efficient_attention()
57
+ pipe_xl.enable_model_cpu_offload()
58
+
59
+ refiner = DiffusionPipeline.from_pretrained(
60
+ "stabilityai/stable-diffusion-xl-refiner-1.0",
61
+ text_encoder_2=pipe_xl.text_encoder_2,
62
+ vae=pipe_xl.vae,
63
+ torch_dtype=torch.float16,
64
+ use_safetensors=True,
65
+ variant="fp16",
66
+ )
67
+ refiner.enable_xformers_memory_efficient_attention()
68
+ refiner.enable_model_cpu_offload()
69
+
70
+ def resize_image_output(im, width, height):
71
+ im = np.array(im)
72
+ newSize = (width,height)
73
+ img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
74
+ img = Image.fromarray(img)
75
+ return img
76
+
77
+ def resize_image(im, max_size = 590000):
78
+ [x,y,z] = im.shape
79
+ new_size = [0,0]
80
+
81
+
82
+ min_size = 262144
83
+ if x*y > max_size:
84
+ scale_ratio = math.sqrt((x*y)/max_size)
85
+ new_size[0] = int(x / scale_ratio)
86
+ new_size[1] = int(y / scale_ratio)
87
+ elif x*y <= min_size:
88
+ scale_ratio = math.sqrt((x*y)/min_size)
89
+ new_size[0] = int(x / scale_ratio)
90
+ new_size[1] = int(y / scale_ratio)
91
+ else:
92
+ new_size[0] = int(x)
93
+ new_size[1] = int(y)
94
+
95
+ height = (new_size[0] // 8) * 8
96
+ width = (new_size[1] // 8) * 8
97
+
98
+ newSize = (width,height)
99
+ img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
100
+ return img
101
+
102
+ def process_canny_tile(input_image,control_image, x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength_conditioning, scale, seed, eta, low_threshold, high_threshold):
103
+
104
+ image = input_image
105
+
106
+ return canny_pipe_img2img(
107
+ prompt = '',
108
+ image=image,
109
+ control_image = image,
110
+ num_inference_steps=20,
111
+ guidance_scale=4,
112
+ strength = 0.3,
113
+ guess_mode = True,
114
+ negative_prompt=n_prompt,
115
+ num_images_per_prompt=1,
116
+ eta=eta,
117
+ generator=torch.Generator(device="cpu").manual_seed(seed)
118
+ )
119
+
120
+ def process_canny(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):
121
+
122
+ image = input_image
123
+
124
+ print(strength)
125
+
126
+
127
+ return canny_pipe(
128
+ prompt=','.join([prompt,a_prompt]),
129
+ image=image,
130
+ height=x,
131
+ width=y,
132
+ num_inference_steps=ddim_steps,
133
+ guidance_scale=scale,
134
+ negative_prompt=n_prompt,
135
+ num_images_per_prompt=num_samples,
136
+ eta=eta,
137
+ controlnet_conditioning_scale=strength,
138
+ generator=torch.Generator(device="cpu").manual_seed(seed)
139
+ )
140
+
141
+ def process_canny_sdxl(input_image,x ,y, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, guess_mode, strength, scale, seed, eta, low_threshold, high_threshold):
142
+
143
+ image = input_image
144
+
145
+ image = pipe_xl(
146
+ prompt=','.join([prompt,a_prompt]),
147
+ image=image,
148
+ height=x,
149
+ width=y,
150
+ num_inference_steps=ddim_steps,
151
+ guidance_scale=scale,
152
+ negative_prompt=n_prompt,
153
+ num_images_per_prompt=num_samples,
154
+ eta=eta,
155
+ controlnet_conditioning_scale=strength,
156
+ generator=torch.Generator(device="cpu").manual_seed(seed),
157
+ output_type="latent"
158
+ ).images
159
+
160
+ return refiner(
161
+ prompt=prompt,
162
+ num_inference_steps=ddim_steps,
163
+ num_images_per_prompt=num_samples,
164
+ denoising_start=0.8,
165
+ image=image,
166
+ )
167
+
168
+
169
+ def process(image, prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold):
170
+ image = load_image(image)
171
+ image = np.array(image)
172
+ [x_orig,y_orig,z_orig] = image.shape
173
+ image = resize_image(image)
174
+ [x,y,z] = image.shape
175
+
176
+ image = cv2.Canny(image, low_threshold, high_threshold)
177
+ image = image[:, :, None]
178
+ image = np.concatenate([image, image, image], axis=2)
179
+ image = Image.fromarray(image)
180
+
181
+ return process_canny(image,x,y, prompt, a_prompt, n_prompt, 1, None, ddim_steps, False, float(strength), scale, seed, eta, low_threshold, high_threshold)
182
+
183
+
184
+ demo = gr.Blocks().queue()
185
+
186
+ with demo:
187
+ with gr.Row():
188
+ gr.Markdown("## Control Stable Diffusion with Canny Edge Maps")
189
+
190
+ with gr.Row():
191
+ with gr.Column():
192
+ input_image = gr.Image(type="pil", label="Input Image")
193
+ input_prompt = gr.Textbox()
194
+ run_button = gr.Button(label="Run")
195
+
196
+ with gr.Accordion("Advanced Options"):
197
+ strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
198
+ low_threshold = gr.Slider(label="Canny low threshold", minimum=1, maximum=255, value=100, step=1)
199
+ high_threshold = gr.Slider(label="Canny high threshold", minimum=1, maximum=255, value=200, step=1)
200
+ ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
201
+ scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=7.5, step=0.1) # default value was 9.0
202
+ seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
203
+ eta = gr.Number(label="eta (DDIM)", value=0.0)
204
+ a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed')
205
+ n_prompt = gr.Textbox(label="Negative Prompt",
206
+ value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
207
+
208
+ with gr.Column():
209
+ result = gr.outputs.Image(label='Output', type="pil")
210
+
211
+ ips = [input_image, input_prompt, a_prompt, n_prompt, ddim_steps, strength, scale, seed, eta, low_threshold, high_threshold]
212
+ run_button.click(fn=process, inputs=ips, outputs=[result])
213
+
214
+
215
+ demo.launch()