Borcherding commited on
Commit
a436bfa
·
verified ·
1 Parent(s): bc71736

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -29
app.py CHANGED
@@ -3,6 +3,7 @@ import numpy as np
3
  import spaces
4
  import torch
5
  import random
 
6
  from peft import PeftModel
7
  from diffusers import FluxControlPipeline, FluxTransformer2DModel
8
  from image_gen_aux import DepthPreprocessor
@@ -10,38 +11,53 @@ from image_gen_aux import DepthPreprocessor
10
  MAX_SEED = np.iinfo(np.int32).max
11
  MAX_IMAGE_SIZE = 2048
12
 
13
- # Initialize models without moving to CUDA yet
14
- pipe = FluxControlPipeline.from_pretrained(
15
- "black-forest-labs/FLUX.1-Depth-dev",
16
- torch_dtype=torch.bfloat16
17
- )
 
 
 
 
 
 
 
18
  processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
19
 
20
  def cleanup_memory():
21
- """Clean up GPU memory"""
22
  if torch.cuda.is_available():
23
- torch.cuda.empty_cache()
24
- torch.cuda.ipc_collect()
 
 
 
 
 
 
 
 
 
25
 
26
  @spaces.GPU
27
  def load_lora(lora_path):
 
28
  if not lora_path.strip():
29
  return "Please provide a valid LoRA path"
30
  try:
31
  cleanup_memory()
32
 
33
- # Move to GPU within the wrapped function
34
- pipe.to("cuda")
35
- pipe.enable_model_cpu_offload()
36
 
37
- # Unload any existing LoRA weights first
38
- try:
39
- pipe.unload_lora_weights()
40
- except:
41
- pass
42
-
43
- # Load new LoRA weights
44
  pipe.load_lora_weights(lora_path)
 
 
45
  return f"Successfully loaded LoRA weights from {lora_path}"
46
  except Exception as e:
47
  cleanup_memory()
@@ -49,23 +65,24 @@ def load_lora(lora_path):
49
 
50
  @spaces.GPU
51
  def unload_lora():
 
52
  try:
53
  cleanup_memory()
54
- pipe.to("cuda")
 
55
  pipe.unload_lora_weights()
 
56
  return "Successfully unloaded LoRA weights"
57
  except Exception as e:
58
  cleanup_memory()
59
  return f"Error unloading LoRA weights: {str(e)}"
60
 
61
  def round_to_multiple(number, multiple):
62
- """Round a number to the nearest multiple"""
63
  return multiple * round(number / multiple)
64
 
65
  @spaces.GPU
66
  def infer(control_image, prompt, seed=42, randomize_seed=False, width=1024, height=1024,
67
  guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
68
-
69
  try:
70
  cleanup_memory()
71
 
@@ -76,14 +93,11 @@ def infer(control_image, prompt, seed=42, randomize_seed=False, width=1024, heig
76
  width = round_to_multiple(width, 16)
77
  height = round_to_multiple(height, 16)
78
 
79
- # Move pipeline to GPU within the wrapped function
80
- pipe.to("cuda")
81
-
82
  # Process control image
83
  control_image = processor(control_image)[0].convert("RGB")
84
 
85
- # Generate image
86
- with torch.inference_mode():
87
  image = pipe(
88
  prompt=prompt,
89
  control_image=control_image,
@@ -108,7 +122,6 @@ css="""
108
  """
109
 
110
  with gr.Blocks(css=css) as demo:
111
-
112
  with gr.Column(elem_id="col-container"):
113
  gr.Markdown(f"""# FLUX.1 Depth [dev] with LoRA Support
114
  12B param rectified flow transformer structural conditioning tuned, guidance-distilled from [FLUX.1 [pro]](https://blackforestlabs.ai/)
@@ -156,7 +169,7 @@ with gr.Blocks(css=css) as demo:
156
  label="Width",
157
  minimum=256,
158
  maximum=MAX_IMAGE_SIZE,
159
- step=16, # Changed to 16 to ensure divisibility
160
  value=1024,
161
  )
162
 
@@ -164,7 +177,7 @@ with gr.Blocks(css=css) as demo:
164
  label="Height",
165
  minimum=256,
166
  maximum=MAX_IMAGE_SIZE,
167
- step=16, # Changed to 16 to ensure divisibility
168
  value=1024,
169
  )
170
 
 
3
  import spaces
4
  import torch
5
  import random
6
+ import gc
7
  from peft import PeftModel
8
  from diffusers import FluxControlPipeline, FluxTransformer2DModel
9
  from image_gen_aux import DepthPreprocessor
 
11
  MAX_SEED = np.iinfo(np.int32).max
12
  MAX_IMAGE_SIZE = 2048
13
 
14
+ def init_pipeline():
15
+ """Initialize pipeline with memory-efficient settings"""
16
+ pipe = FluxControlPipeline.from_pretrained(
17
+ "black-forest-labs/FLUX.1-Depth-dev",
18
+ torch_dtype=torch.bfloat16,
19
+ low_cpu_mem_usage=True,
20
+ use_safetensors=True
21
+ )
22
+ return pipe
23
+
24
+ # Initialize models without moving to CUDA
25
+ pipe = init_pipeline()
26
  processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
27
 
28
  def cleanup_memory():
29
+ """Aggressive memory cleanup"""
30
  if torch.cuda.is_available():
31
+ with torch.cuda.device('cuda'):
32
+ torch.cuda.empty_cache()
33
+ torch.cuda.ipc_collect()
34
+ gc.collect()
35
+
36
+ def reinit_pipeline():
37
+ """Reinitialize the pipeline if needed"""
38
+ global pipe
39
+ cleanup_memory()
40
+ pipe = init_pipeline()
41
+ cleanup_memory()
42
 
43
  @spaces.GPU
44
  def load_lora(lora_path):
45
+ global pipe
46
  if not lora_path.strip():
47
  return "Please provide a valid LoRA path"
48
  try:
49
  cleanup_memory()
50
 
51
+ # Reinitialize pipeline
52
+ reinit_pipeline()
 
53
 
54
+ # Enable sequential CPU offload
55
+ pipe.enable_sequential_cpu_offload()
56
+
57
+ # Load LoRA weights
 
 
 
58
  pipe.load_lora_weights(lora_path)
59
+
60
+ cleanup_memory()
61
  return f"Successfully loaded LoRA weights from {lora_path}"
62
  except Exception as e:
63
  cleanup_memory()
 
65
 
66
  @spaces.GPU
67
  def unload_lora():
68
+ global pipe
69
  try:
70
  cleanup_memory()
71
+ reinit_pipeline()
72
+ pipe.enable_sequential_cpu_offload()
73
  pipe.unload_lora_weights()
74
+ cleanup_memory()
75
  return "Successfully unloaded LoRA weights"
76
  except Exception as e:
77
  cleanup_memory()
78
  return f"Error unloading LoRA weights: {str(e)}"
79
 
80
  def round_to_multiple(number, multiple):
 
81
  return multiple * round(number / multiple)
82
 
83
  @spaces.GPU
84
  def infer(control_image, prompt, seed=42, randomize_seed=False, width=1024, height=1024,
85
  guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
 
86
  try:
87
  cleanup_memory()
88
 
 
93
  width = round_to_multiple(width, 16)
94
  height = round_to_multiple(height, 16)
95
 
 
 
 
96
  # Process control image
97
  control_image = processor(control_image)[0].convert("RGB")
98
 
99
+ # Generate image with memory optimization
100
+ with torch.inference_mode(), torch.cuda.amp.autocast():
101
  image = pipe(
102
  prompt=prompt,
103
  control_image=control_image,
 
122
  """
123
 
124
  with gr.Blocks(css=css) as demo:
 
125
  with gr.Column(elem_id="col-container"):
126
  gr.Markdown(f"""# FLUX.1 Depth [dev] with LoRA Support
127
  12B param rectified flow transformer structural conditioning tuned, guidance-distilled from [FLUX.1 [pro]](https://blackforestlabs.ai/)
 
169
  label="Width",
170
  minimum=256,
171
  maximum=MAX_IMAGE_SIZE,
172
+ step=16,
173
  value=1024,
174
  )
175
 
 
177
  label="Height",
178
  minimum=256,
179
  maximum=MAX_IMAGE_SIZE,
180
+ step=16,
181
  value=1024,
182
  )
183