Spaces:

prithivMLmods
/

Text-to-Image-SD3.5

Running on Zero

App Files Files Community

prithivMLmods commited on Mar 6

Commit

4f97d6f

verified ·

1 Parent(s): 7be0e24

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -72

app.py CHANGED Viewed

@@ -23,6 +23,7 @@ from transformers import (
 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 DESCRIPTION = """
 # QwQ Edge 💬
 """
@@ -41,6 +42,23 @@ h1 {
 }
 '''
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -87,22 +105,6 @@ def clean_chat_history(chat_history):
             cleaned.append(msg)
     return cleaned
-# A helper function to render a progress bar using HTML.
-def render_progress_bar(label: str, progress: int, output_text: str = "") -> str:
-    """
-    Returns an HTML snippet containing a label, a progress bar (red background with a green inner bar),
-    and optionally some output text.
-    """
-    return f'''
-    <div style="margin-bottom: 10px;">
-      <div style="font-weight: bold; margin-bottom: 5px;">{label}</div>
-      <div style="width: 100%; background-color: red; border-radius: 5px; overflow: hidden; height: 10px;">
-        <div style="width: {progress}%; background-color: green; height: 100%; transition: width 0.3s;"></div>
-      </div>
-      <div style="margin-top: 10px;">{output_text}</div>
-    </div>
-    '''
 # Environment variables and parameters for Stable Diffusion XL
 MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
@@ -183,6 +185,7 @@ def generate_image_fn(
         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
         if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
         if device.type == "cuda":
             with torch.autocast("cuda", dtype=torch.float16):
                 outputs = sd_pipe(**batch_options)
@@ -207,51 +210,36 @@ def generate(
     Special commands:
       - "@tts1" or "@tts2": triggers text-to-speech.
       - "@image": triggers image generation using the SDXL pipeline.
-    Instead of yielding a simple "Thinking..." text, an animated progress bar is shown (via an HTML snippet)
-    that goes from red to green. When the inference is complete the progress bar is replaced by the final result.
     """
     text = input_dict["text"]
     files = input_dict.get("files", [])
-    # Image generation branch
     if text.strip().lower().startswith("@image"):
         prompt = text[len("@image"):].strip()
-        # Use a container to capture the result from the thread.
-        result_container = []
-        def run_image():
-            result_container.append(generate_image_fn(
-                prompt=prompt,
-                negative_prompt="",
-                use_negative_prompt=False,
-                seed=1,
-                width=1024,
-                height=1024,
-                guidance_scale=3,
-                num_inference_steps=25,
-                randomize_seed=True,
-                use_resolution_binning=True,
-                num_images=1,
-            ))
-        thread = Thread(target=run_image)
-        thread.start()
-        start_time = time.time()
-        # Simulate progress bar updates while image generation is running.
-        while thread.is_alive():
-            progress = min(95, int((time.time() - start_time) / 5 * 95))
-            yield render_progress_bar("Generating Image", progress)
-            time.sleep(0.5)
-        thread.join()
-        # Final update before showing the result.
-        yield render_progress_bar("Generating Image", 100)
-        image_paths, used_seed = result_container[0]
         yield gr.Image(image_paths[0])
         return  # Exit early
-    tts_prefix = "@tts"
-    is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
-    voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
     if is_tts and voice_index:
         voice = TTS_VOICES[voice_index - 1]
         text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
@@ -264,7 +252,6 @@ def generate(
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
-    # Multimodal (image + text) branch
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
@@ -287,20 +274,17 @@ def generate(
         thread.start()
         buffer = ""
-        start_time = time.time()
-        # Initial progress bar for multimodal inference.
-        yield render_progress_bar("Thinking...", 0)
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
-            progress = min(95, int((time.time() - start_time) / 5 * 95))
-            yield render_progress_bar("Thinking...", progress, output_text=buffer)
-        # Final progress update (100%).
-        yield render_progress_bar("Thinking...", 100, output_text=buffer)
-        # Then yield final response (progress bar update no longer shown).
-        yield buffer
     else:
-        # Text-only generation branch.
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
@@ -321,20 +305,18 @@ def generate(
         t = Thread(target=model.generate, kwargs=generation_kwargs)
         t.start()
         outputs = []
-        start_time = time.time()
-        # Initial progress bar update.
-        yield render_progress_bar("Thinking...", 0)
         for new_text in streamer:
             outputs.append(new_text)
             current_text = "".join(outputs)
-            progress = min(95, int((time.time() - start_time) / 5 * 95))
-            yield render_progress_bar("Thinking...", progress, output_text=current_text)
         final_response = "".join(outputs)
-        # Final update (100% progress).
-        yield render_progress_bar("Thinking...", 100, output_text=final_response)
-        # Finally, yield the final plain response so the progress bar disappears.
-        yield final_response
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice:

 from transformers.image_utils import load_image
 from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 DESCRIPTION = """
 # QwQ Edge 💬
 """
 }
 '''
+def progress_bar_html(label: str) -> str:
+    """Return an HTML snippet with a label and an animated, thin light-blue progress bar."""
+    return f"""
+<div style="display: flex; align-items: center;">
+  <span style="margin-right: 8px;">{label}</span>
+  <div style="position: relative; width: 110px; height: 5px; background: #e0e0e0; border-radius: 5px; overflow: hidden;">
+    <div style="width: 100%; height: 100%; background-color: lightblue; animation: progress-bar-animation 1s linear infinite;"></div>
+  </div>
+</div>
+<style>
+@keyframes progress-bar-animation {{
+  0% {{ transform: translateX(-100%); }}
+  100% {{ transform: translateX(100%); }}
+}}
+</style>
+"""
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
             cleaned.append(msg)
     return cleaned
 # Environment variables and parameters for Stable Diffusion XL
 MODEL_ID_SD = os.getenv("MODEL_VAL_PATH")  # SDXL Model repository path via env variable
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
         batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
         if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
             batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
+        # Wrap the pipeline call in autocast if using CUDA
         if device.type == "cuda":
             with torch.autocast("cuda", dtype=torch.float16):
                 outputs = sd_pipe(**batch_options)
     Special commands:
       - "@tts1" or "@tts2": triggers text-to-speech.
       - "@image": triggers image generation using the SDXL pipeline.
     """
     text = input_dict["text"]
     files = input_dict.get("files", [])
+    tts_prefix = "@tts"
+    is_tts = any(text.strip().lower().startswith(f"{tts_prefix}{i}") for i in range(1, 3))
+    voice_index = next((i for i in range(1, 3) if text.strip().lower().startswith(f"{tts_prefix}{i}")), None)
     if text.strip().lower().startswith("@image"):
+        # Remove the "@image" tag and use the rest as prompt
         prompt = text[len("@image"):].strip()
+        # Yield progress bar for image generation
+        yield progress_bar_html("Generating Image")
+        image_paths, used_seed = generate_image_fn(
+            prompt=prompt,
+            negative_prompt="",
+            use_negative_prompt=False,
+            seed=1,
+            width=1024,
+            height=1024,
+            guidance_scale=3,
+            num_inference_steps=25,
+            randomize_seed=True,
+            use_resolution_binning=True,
+            num_images=1,
+        )
+        # Yield the generated image, replacing the progress bar
         yield gr.Image(image_paths[0])
         return  # Exit early
     if is_tts and voice_index:
         voice = TTS_VOICES[voice_index - 1]
         text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
         conversation = clean_chat_history(chat_history)
         conversation.append({"role": "user", "content": text})
     if files:
         if len(files) > 1:
             images = [load_image(image) for image in files]
         thread.start()
         buffer = ""
+        # Yield initial progress bar for multimodal generation
+        yield progress_bar_html("Thinking...")
         for new_text in streamer:
             buffer += new_text
             buffer = buffer.replace("<|im_end|>", "")
+            time.sleep(0.01)
+            # Update with partial text and progress bar
+            yield f"<div>{buffer}</div><div>{progress_bar_html('Thinking...')}</div>"
+        # Final output: remove progress bar
+        yield f"<div>{buffer}</div>"
     else:
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         t = Thread(target=model.generate, kwargs=generation_kwargs)
         t.start()
+        # Yield initial progress bar for text generation
+        yield progress_bar_html("Thinking...")
         outputs = []
         for new_text in streamer:
             outputs.append(new_text)
             current_text = "".join(outputs)
+            time.sleep(0.01)
+            # Update message with partial text and progress bar
+            yield f"<div>{current_text}</div><div>{progress_bar_html('Thinking...')}</div>"
         final_response = "".join(outputs)
+        # Final output: only the final response text, progress bar removed.
+        yield f"<div>{final_response}</div>"
         # If TTS was requested, convert the final response to speech.
         if is_tts and voice: