FLUX.1-Depth-dev-LoRA-BorchMods-public

Running

App Files Files Community

Borcherding commited on Jan 31

Commit

a436bfa

verified ·

1 Parent(s): bc71736

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -29

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import numpy as np
 import spaces
 import torch
 import random
 from peft import PeftModel
 from diffusers import FluxControlPipeline, FluxTransformer2DModel
 from image_gen_aux import DepthPreprocessor
@@ -10,38 +11,53 @@ from image_gen_aux import DepthPreprocessor
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
-# Initialize models without moving to CUDA yet
-pipe = FluxControlPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-Depth-dev",
-    torch_dtype=torch.bfloat16
-)
 processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
 def cleanup_memory():
-    """Clean up GPU memory"""
     if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
 @spaces.GPU
 def load_lora(lora_path):
     if not lora_path.strip():
         return "Please provide a valid LoRA path"
     try:
         cleanup_memory()
-        # Move to GPU within the wrapped function
-        pipe.to("cuda")
-        pipe.enable_model_cpu_offload()
-        # Unload any existing LoRA weights first
-        try:
-            pipe.unload_lora_weights()
-        except:
-            pass
-        # Load new LoRA weights
         pipe.load_lora_weights(lora_path)
         return f"Successfully loaded LoRA weights from {lora_path}"
     except Exception as e:
         cleanup_memory()
@@ -49,23 +65,24 @@ def load_lora(lora_path):
 @spaces.GPU
 def unload_lora():
     try:
         cleanup_memory()
-        pipe.to("cuda")
         pipe.unload_lora_weights()
         return "Successfully unloaded LoRA weights"
     except Exception as e:
         cleanup_memory()
         return f"Error unloading LoRA weights: {str(e)}"
 def round_to_multiple(number, multiple):
-    """Round a number to the nearest multiple"""
     return multiple * round(number / multiple)
 @spaces.GPU
 def infer(control_image, prompt, seed=42, randomize_seed=False, width=1024, height=1024,
           guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
     try:
         cleanup_memory()
@@ -76,14 +93,11 @@ def infer(control_image, prompt, seed=42, randomize_seed=False, width=1024, heig
         width = round_to_multiple(width, 16)
         height = round_to_multiple(height, 16)
-        # Move pipeline to GPU within the wrapped function
-        pipe.to("cuda")
         # Process control image
         control_image = processor(control_image)[0].convert("RGB")
-        # Generate image
-        with torch.inference_mode():
             image = pipe(
                 prompt=prompt,
                 control_image=control_image,
@@ -108,7 +122,6 @@ css="""
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown(f"""# FLUX.1 Depth [dev] with LoRA Support
 12B param rectified flow transformer structural conditioning tuned, guidance-distilled from [FLUX.1 [pro]](https://blackforestlabs.ai/)
@@ -156,7 +169,7 @@ with gr.Blocks(css=css) as demo:
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
-                    step=16,  # Changed to 16 to ensure divisibility
                     value=1024,
                 )
@@ -164,7 +177,7 @@ with gr.Blocks(css=css) as demo:
                     label="Height",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
-                    step=16,  # Changed to 16 to ensure divisibility
                     value=1024,
                 )

 import spaces
 import torch
 import random
+import gc
 from peft import PeftModel
 from diffusers import FluxControlPipeline, FluxTransformer2DModel
 from image_gen_aux import DepthPreprocessor
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
+def init_pipeline():
+    """Initialize pipeline with memory-efficient settings"""
+    pipe = FluxControlPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-Depth-dev",
+        torch_dtype=torch.bfloat16,
+        low_cpu_mem_usage=True,
+        use_safetensors=True
+    )
+    return pipe
+# Initialize models without moving to CUDA
+pipe = init_pipeline()
 processor = DepthPreprocessor.from_pretrained("LiheYoung/depth-anything-large-hf")
 def cleanup_memory():
+    """Aggressive memory cleanup"""
     if torch.cuda.is_available():
+        with torch.cuda.device('cuda'):
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+    gc.collect()
+def reinit_pipeline():
+    """Reinitialize the pipeline if needed"""
+    global pipe
+    cleanup_memory()
+    pipe = init_pipeline()
+    cleanup_memory()
 @spaces.GPU
 def load_lora(lora_path):
+    global pipe
     if not lora_path.strip():
         return "Please provide a valid LoRA path"
     try:
         cleanup_memory()
+        # Reinitialize pipeline
+        reinit_pipeline()
+        # Enable sequential CPU offload
+        pipe.enable_sequential_cpu_offload()
+        # Load LoRA weights
         pipe.load_lora_weights(lora_path)
+        cleanup_memory()
         return f"Successfully loaded LoRA weights from {lora_path}"
     except Exception as e:
         cleanup_memory()
 @spaces.GPU
 def unload_lora():
+    global pipe
     try:
         cleanup_memory()
+        reinit_pipeline()
+        pipe.enable_sequential_cpu_offload()
         pipe.unload_lora_weights()
+        cleanup_memory()
         return "Successfully unloaded LoRA weights"
     except Exception as e:
         cleanup_memory()
         return f"Error unloading LoRA weights: {str(e)}"
 def round_to_multiple(number, multiple):
     return multiple * round(number / multiple)
 @spaces.GPU
 def infer(control_image, prompt, seed=42, randomize_seed=False, width=1024, height=1024,
           guidance_scale=3.5, num_inference_steps=28, progress=gr.Progress(track_tqdm=True)):
     try:
         cleanup_memory()
         width = round_to_multiple(width, 16)
         height = round_to_multiple(height, 16)
         # Process control image
         control_image = processor(control_image)[0].convert("RGB")
+        # Generate image with memory optimization
+        with torch.inference_mode(), torch.cuda.amp.autocast():
             image = pipe(
                 prompt=prompt,
                 control_image=control_image,
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown(f"""# FLUX.1 Depth [dev] with LoRA Support
 12B param rectified flow transformer structural conditioning tuned, guidance-distilled from [FLUX.1 [pro]](https://blackforestlabs.ai/)
                     label="Width",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
+                    step=16,
                     value=1024,
                 )
                     label="Height",
                     minimum=256,
                     maximum=MAX_IMAGE_SIZE,
+                    step=16,
                     value=1024,
                 )