Spaces:

KingNish
/

SDXL-Flash

Running on Zero

App Files Files Community

KingNish commited on Jun 3, 2024

Commit

085c378

verified ·

1 Parent(s): c996b7d

faster (testing)

Browse files

made faster by
1. Model Loading and Device Placement: The model (pipe) is now loaded only once outside the generate function. This saves significant time on each generation.
2. Resolution Binning: Enabled by default in the options, this technique speeds up generation and reduces VRAM usage, especially for larger images.
3. Torch Compile (Experimental): Added the option to use torch.compile which might further improve performance on compatible hardware. This is highly dependent on your setup.
4. CPU Offloading (Experimental): Allows offloading parts of the model to CPU RAM, potentially enabling larger image generation or batch sizes if your GPU VRAM is limited.
5. Batch Generation: Added the ability to generate multiple images in a single pass (controlled by BATCH_SIZE environment variable or defaulting to 1). This significantly reduces overhead when generating more than one image.

Files changed (1) hide show

app.py +59 -37

app.py CHANGED Viewed

@@ -10,27 +10,33 @@ import spaces
 import torch
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
-if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo may not work on CPU.</p>"
-MAX_SEED = np.iinfo(np.int32).max
-CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "1") == "1"
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-if torch.cuda.is_available():
-    pipe = StableDiffusionXLPipeline.from_pretrained(
-        "sd-community/sdxl-flash",
-        torch_dtype=torch.float16,
-        use_safetensors=True,
-        add_watermarker=False
-    )
-    pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
-    pipe.to("cuda")
 def save_image(img):
     unique_name = str(uuid.uuid4()) + ".png"
@@ -53,51 +59,60 @@ def generate(
     guidance_scale: float = 3,
     num_inference_steps: int = 30,
     randomize_seed: bool = False,
-    use_resolution_binning: bool = True,
     progress=gr.Progress(track_tqdm=True),
 ):
-    pipe.to(device)
     seed = int(randomize_seed_fn(seed, randomize_seed))
-    generator = torch.Generator().manual_seed(seed)
     options = {
-        "prompt":prompt,
-        "negative_prompt":negative_prompt,
-        "width":width,
-        "height":height,
-        "guidance_scale":guidance_scale,
-        "num_inference_steps":num_inference_steps,
-        "generator":generator,
-        "use_resolution_binning":use_resolution_binning,
-        "output_type":"pil",
     }
-    images = pipe(**options).images
     image_paths = [save_image(img) for img in images]
     return image_paths, seed
 examples = [
     "a cat eating a piece of cheese",
     "a ROBOT riding a BLUE horse on Mars, photorealistic, 4k",
     "Ironman VS Hulk, ultrarealistic",
     "Astronaut in a jungle, cold color palette, oil pastel, detailed, 8k",
-    "An alien holding sign board contain word 'Flash', futuristic, neonpunk",
     "Kids going to school, Anime style"
 ]
 css = '''
-.gradio-container{max-width: 560px !important}
 h1{text-align:center}
 footer {
     visibility: hidden
 }
 '''
 with gr.Blocks(css=css) as demo:
-    gr.Markdown("""# SDXL Flash
-        ### First Image processing takes time then images generate faster.""")
     with gr.Group():
         with gr.Row():
             prompt = gr.Text(
@@ -108,8 +123,15 @@ with gr.Blocks(css=css) as demo:
                 container=False,
             )
             run_button = gr.Button("Run", scale=0)
-        result = gr.Gallery(label="Result", columns=1)
     with gr.Accordion("Advanced options", open=False):
         with gr.Row():
             use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True)
             negative_prompt = gr.Text(
@@ -164,7 +186,6 @@ with gr.Blocks(css=css) as demo:
         inputs=prompt,
         outputs=[result, seed],
         fn=generate,
-        cache_examples=CACHE_EXAMPLES,
     )
     use_negative_prompt.change(
@@ -191,6 +212,7 @@ with gr.Blocks(css=css) as demo:
             guidance_scale,
             num_inference_steps,
             randomize_seed,
         ],
         outputs=[result, seed],
         api_name="run",

 import torch
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
+# Use environment variables for flexibility
+MODEL_ID = os.getenv("MODEL_ID", "sd-community/sdxl-flash")
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
 USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
 ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
+BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1"))  # Allow generating multiple images at once
+# Determine device and load model outside of function for efficiency
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+pipe = StableDiffusionXLPipeline.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    use_safetensors=True,
+    add_watermarker=False,
+    variant="fp16" if torch.cuda.is_available() else None,
+).to(device)
+pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
+# Torch compile for potential speedup (experimental)
+if USE_TORCH_COMPILE:
+    pipe.compile()
+# CPU offloading for larger RAM capacity (experimental)
+if ENABLE_CPU_OFFLOAD:
+    pipe.enable_model_cpu_offload()
+MAX_SEED = np.iinfo(np.int32).max
 def save_image(img):
     unique_name = str(uuid.uuid4()) + ".png"
     guidance_scale: float = 3,
     num_inference_steps: int = 30,
     randomize_seed: bool = False,
+    use_resolution_binning: bool = True,
+    num_images: int = 1,  # Number of images to generate
     progress=gr.Progress(track_tqdm=True),
 ):
     seed = int(randomize_seed_fn(seed, randomize_seed))
+    generator = torch.Generator(device=device).manual_seed(seed)
+    # Improved options handling
     options = {
+        "prompt": [prompt] * num_images,
+        "negative_prompt": [negative_prompt] * num_images if use_negative_prompt else None,
+        "width": width,
+        "height": height,
+        "guidance_scale": guidance_scale,
+        "num_inference_steps": num_inference_steps,
+        "generator": generator,
+        "output_type": "pil",
     }
+    # Use resolution binning for faster generation with less VRAM usage
+    if use_resolution_binning:
+        options["use_resolution_binning"] = True
+    # Generate images potentially in batches
+    images = []
+    for i in range(0, num_images, BATCH_SIZE):
+        batch_options = options.copy()
+        batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
+        if "negative_prompt" in batch_options:
+            batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
+        images.extend(pipe(**batch_options).images)
     image_paths = [save_image(img) for img in images]
     return image_paths, seed
 examples = [
     "a cat eating a piece of cheese",
     "a ROBOT riding a BLUE horse on Mars, photorealistic, 4k",
     "Ironman VS Hulk, ultrarealistic",
     "Astronaut in a jungle, cold color palette, oil pastel, detailed, 8k",
+    "An alien holding a sign board containing the word 'Flash', futuristic, neonpunk",
     "Kids going to school, Anime style"
 ]
 css = '''
+.gradio-container{max-width: 700px !important}
 h1{text-align:center}
 footer {
     visibility: hidden
 }
 '''
 with gr.Blocks(css=css) as demo:
+    gr.Markdown("""# SDXL Flash""")
     with gr.Group():
         with gr.Row():
             prompt = gr.Text(
                 container=False,
             )
             run_button = gr.Button("Run", scale=0)
+        result = gr.Gallery(label="Result", columns=2, show_label=False)
     with gr.Accordion("Advanced options", open=False):
+        num_images = gr.Slider(
+            label="Number of Images",
+            minimum=1,
+            maximum=4,
+            step=1,
+            value=1,
+        )
         with gr.Row():
             use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True)
             negative_prompt = gr.Text(
         inputs=prompt,
         outputs=[result, seed],
         fn=generate,
     )
     use_negative_prompt.change(
             guidance_scale,
             num_inference_steps,
             randomize_seed,
+            num_images
         ],
         outputs=[result, seed],
         api_name="run",