Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -19,17 +19,22 @@ css = """
|
|
19 |
}
|
20 |
"""
|
21 |
|
22 |
-
# Device and dtype setup
|
23 |
if torch.cuda.is_available():
|
24 |
power_device = "GPU"
|
25 |
device = "cuda"
|
26 |
-
dtype = torch.
|
27 |
else:
|
28 |
power_device = "CPU"
|
29 |
device = "cpu"
|
30 |
dtype = torch.float32
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
model_path = snapshot_download(
|
35 |
repo_id="black-forest-labs/FLUX.1-dev",
|
@@ -39,35 +44,44 @@ model_path = snapshot_download(
|
|
39 |
token=huggingface_token,
|
40 |
)
|
41 |
|
42 |
-
# Load pipeline with memory optimizations
|
43 |
controlnet = FluxControlNetModel.from_pretrained(
|
44 |
"jasperai/Flux.1-dev-Controlnet-Upscaler",
|
45 |
-
torch_dtype=dtype
|
|
|
|
|
46 |
).to(device)
|
47 |
|
48 |
pipe = FluxControlNetPipeline.from_pretrained(
|
49 |
model_path,
|
50 |
controlnet=controlnet,
|
51 |
-
torch_dtype=dtype
|
|
|
|
|
52 |
)
|
53 |
-
pipe.to(device)
|
54 |
|
55 |
-
# Enable memory optimizations
|
56 |
pipe.enable_model_cpu_offload()
|
57 |
-
pipe.enable_attention_slicing()
|
|
|
|
|
58 |
|
|
|
59 |
MAX_SEED = 1000000
|
60 |
-
MAX_PIXEL_BUDGET =
|
61 |
|
62 |
def check_resources():
|
63 |
if torch.cuda.is_available():
|
64 |
gpu_memory = torch.cuda.get_device_properties(0).total_memory
|
65 |
memory_allocated = torch.cuda.memory_allocated(0)
|
66 |
-
if memory_allocated/gpu_memory > 0.
|
67 |
return False
|
68 |
return True
|
69 |
|
70 |
def process_input(input_image, upscale_factor, **kwargs):
|
|
|
|
|
|
|
71 |
w, h = input_image.size
|
72 |
w_original, h_original = w, h
|
73 |
aspect_ratio = w / h
|
@@ -76,16 +90,17 @@ def process_input(input_image, upscale_factor, **kwargs):
|
|
76 |
|
77 |
if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
|
78 |
warnings.warn(
|
79 |
-
f"Requested output image is too large
|
80 |
)
|
81 |
gr.Info(
|
82 |
-
f"
|
83 |
)
|
84 |
input_image = input_image.resize(
|
85 |
(
|
86 |
int(aspect_ratio * MAX_PIXEL_BUDGET**0.5 // upscale_factor),
|
87 |
int(MAX_PIXEL_BUDGET**0.5 // aspect_ratio // upscale_factor),
|
88 |
-
)
|
|
|
89 |
)
|
90 |
was_resized = True
|
91 |
|
@@ -111,6 +126,7 @@ def infer(
|
|
111 |
gr.Warning("System resources are running low. Try reducing parameters.")
|
112 |
return None
|
113 |
|
|
|
114 |
if device == "cuda":
|
115 |
torch.cuda.empty_cache()
|
116 |
|
@@ -129,25 +145,25 @@ def infer(
|
|
129 |
generator = torch.Generator().manual_seed(seed)
|
130 |
|
131 |
gr.Info("Upscaling image...")
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
|
|
142 |
|
143 |
if was_resized:
|
144 |
gr.Info(
|
145 |
-
f"Resizing output image to
|
146 |
)
|
147 |
|
148 |
# resize to target desired size
|
149 |
image = image.resize((w_original * upscale_factor, h_original * upscale_factor))
|
150 |
-
image.save("output.jpg")
|
151 |
return [true_input_image, image, seed]
|
152 |
|
153 |
except RuntimeError as e:
|
@@ -170,23 +186,23 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
170 |
num_inference_steps = gr.Slider(
|
171 |
label="Number of Inference Steps",
|
172 |
minimum=8,
|
173 |
-
maximum=50
|
174 |
step=1,
|
175 |
-
value=28
|
176 |
)
|
177 |
upscale_factor = gr.Slider(
|
178 |
label="Upscale Factor",
|
179 |
minimum=1,
|
180 |
-
maximum=2,
|
181 |
step=1,
|
182 |
-
value=
|
183 |
)
|
184 |
controlnet_conditioning_scale = gr.Slider(
|
185 |
label="Controlnet Conditioning Scale",
|
186 |
minimum=0.1,
|
187 |
-
maximum=1.5
|
188 |
step=0.1,
|
189 |
-
value=0.6
|
190 |
)
|
191 |
seed = gr.Slider(
|
192 |
label="Seed",
|
@@ -205,8 +221,8 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
205 |
|
206 |
examples = gr.Examples(
|
207 |
examples=[
|
208 |
-
[42, False, os.path.join(current_dir, "z1.webp"),
|
209 |
-
[42, False, os.path.join(current_dir, "z2.webp"),
|
210 |
],
|
211 |
inputs=[
|
212 |
seed,
|
@@ -236,4 +252,11 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
|
|
236 |
show_api=False,
|
237 |
)
|
238 |
|
239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
}
|
20 |
"""
|
21 |
|
22 |
+
# Device and dtype setup with lower precision
|
23 |
if torch.cuda.is_available():
|
24 |
power_device = "GPU"
|
25 |
device = "cuda"
|
26 |
+
dtype = torch.float16 # Changed to float16 for less memory usage
|
27 |
else:
|
28 |
power_device = "CPU"
|
29 |
device = "cpu"
|
30 |
dtype = torch.float32
|
31 |
|
32 |
+
# Reduce CUDA memory usage
|
33 |
+
torch.cuda.empty_cache()
|
34 |
+
if torch.cuda.is_available():
|
35 |
+
torch.cuda.set_per_process_memory_fraction(0.7) # Use only 70% of GPU memory
|
36 |
+
|
37 |
+
huggingface_token = os.getenv("HF_TOKEN")
|
38 |
|
39 |
model_path = snapshot_download(
|
40 |
repo_id="black-forest-labs/FLUX.1-dev",
|
|
|
44 |
token=huggingface_token,
|
45 |
)
|
46 |
|
47 |
+
# Load pipeline with more memory optimizations
|
48 |
controlnet = FluxControlNetModel.from_pretrained(
|
49 |
"jasperai/Flux.1-dev-Controlnet-Upscaler",
|
50 |
+
torch_dtype=dtype,
|
51 |
+
low_cpu_mem_usage=True,
|
52 |
+
use_safetensors=True
|
53 |
).to(device)
|
54 |
|
55 |
pipe = FluxControlNetPipeline.from_pretrained(
|
56 |
model_path,
|
57 |
controlnet=controlnet,
|
58 |
+
torch_dtype=dtype,
|
59 |
+
low_cpu_mem_usage=True,
|
60 |
+
use_safetensors=True
|
61 |
)
|
|
|
62 |
|
63 |
+
# Enable all possible memory optimizations
|
64 |
pipe.enable_model_cpu_offload()
|
65 |
+
pipe.enable_attention_slicing(1)
|
66 |
+
pipe.enable_sequential_cpu_offload()
|
67 |
+
pipe.enable_vae_slicing()
|
68 |
|
69 |
+
# Further reduce memory usage
|
70 |
MAX_SEED = 1000000
|
71 |
+
MAX_PIXEL_BUDGET = 256 * 256 # Further reduced from 512 * 512
|
72 |
|
73 |
def check_resources():
|
74 |
if torch.cuda.is_available():
|
75 |
gpu_memory = torch.cuda.get_device_properties(0).total_memory
|
76 |
memory_allocated = torch.cuda.memory_allocated(0)
|
77 |
+
if memory_allocated/gpu_memory > 0.8: # 80% threshold
|
78 |
return False
|
79 |
return True
|
80 |
|
81 |
def process_input(input_image, upscale_factor, **kwargs):
|
82 |
+
# Convert image to RGB mode to ensure compatibility
|
83 |
+
input_image = input_image.convert('RGB')
|
84 |
+
|
85 |
w, h = input_image.size
|
86 |
w_original, h_original = w, h
|
87 |
aspect_ratio = w / h
|
|
|
90 |
|
91 |
if w * h * upscale_factor**2 > MAX_PIXEL_BUDGET:
|
92 |
warnings.warn(
|
93 |
+
f"Requested output image is too large. Resizing..."
|
94 |
)
|
95 |
gr.Info(
|
96 |
+
f"Resizing input image to fit memory constraints..."
|
97 |
)
|
98 |
input_image = input_image.resize(
|
99 |
(
|
100 |
int(aspect_ratio * MAX_PIXEL_BUDGET**0.5 // upscale_factor),
|
101 |
int(MAX_PIXEL_BUDGET**0.5 // aspect_ratio // upscale_factor),
|
102 |
+
),
|
103 |
+
Image.LANCZOS
|
104 |
)
|
105 |
was_resized = True
|
106 |
|
|
|
126 |
gr.Warning("System resources are running low. Try reducing parameters.")
|
127 |
return None
|
128 |
|
129 |
+
# Clear CUDA cache before processing
|
130 |
if device == "cuda":
|
131 |
torch.cuda.empty_cache()
|
132 |
|
|
|
145 |
generator = torch.Generator().manual_seed(seed)
|
146 |
|
147 |
gr.Info("Upscaling image...")
|
148 |
+
with torch.inference_mode(): # Use inference mode to save memory
|
149 |
+
image = pipe(
|
150 |
+
prompt="",
|
151 |
+
control_image=control_image,
|
152 |
+
controlnet_conditioning_scale=controlnet_conditioning_scale,
|
153 |
+
num_inference_steps=num_inference_steps,
|
154 |
+
guidance_scale=3.5,
|
155 |
+
height=control_image.size[1],
|
156 |
+
width=control_image.size[0],
|
157 |
+
generator=generator,
|
158 |
+
).images[0]
|
159 |
|
160 |
if was_resized:
|
161 |
gr.Info(
|
162 |
+
f"Resizing output image to final size..."
|
163 |
)
|
164 |
|
165 |
# resize to target desired size
|
166 |
image = image.resize((w_original * upscale_factor, h_original * upscale_factor))
|
|
|
167 |
return [true_input_image, image, seed]
|
168 |
|
169 |
except RuntimeError as e:
|
|
|
186 |
num_inference_steps = gr.Slider(
|
187 |
label="Number of Inference Steps",
|
188 |
minimum=8,
|
189 |
+
maximum=30, # Reduced from 50
|
190 |
step=1,
|
191 |
+
value=20, # Reduced from 28
|
192 |
)
|
193 |
upscale_factor = gr.Slider(
|
194 |
label="Upscale Factor",
|
195 |
minimum=1,
|
196 |
+
maximum=2,
|
197 |
step=1,
|
198 |
+
value=1, # Reduced default
|
199 |
)
|
200 |
controlnet_conditioning_scale = gr.Slider(
|
201 |
label="Controlnet Conditioning Scale",
|
202 |
minimum=0.1,
|
203 |
+
maximum=1.0, # Reduced from 1.5
|
204 |
step=0.1,
|
205 |
+
value=0.5, # Reduced from 0.6
|
206 |
)
|
207 |
seed = gr.Slider(
|
208 |
label="Seed",
|
|
|
221 |
|
222 |
examples = gr.Examples(
|
223 |
examples=[
|
224 |
+
[42, False, os.path.join(current_dir, "z1.webp"), 20, 1, 0.5], # Reduced parameters
|
225 |
+
[42, False, os.path.join(current_dir, "z2.webp"), 20, 1, 0.5], # Reduced parameters
|
226 |
],
|
227 |
inputs=[
|
228 |
seed,
|
|
|
252 |
show_api=False,
|
253 |
)
|
254 |
|
255 |
+
# Launch with minimal memory usage
|
256 |
+
demo.queue(max_size=1).launch(
|
257 |
+
share=False,
|
258 |
+
debug=True,
|
259 |
+
show_error=True,
|
260 |
+
max_threads=1,
|
261 |
+
enable_queue=True
|
262 |
+
)
|