Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,6 @@ from transformers import (
|
|
23 |
from transformers.image_utils import load_image
|
24 |
from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
25 |
|
26 |
-
|
27 |
DESCRIPTION = """
|
28 |
# QwQ Edge 💬
|
29 |
"""
|
@@ -48,6 +47,29 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
48 |
|
49 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
# Load text-only model and tokenizer
|
52 |
model_id = "prithivMLmods/FastThink-0.5B-Tiny"
|
53 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
@@ -129,28 +151,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
|
129 |
seed = random.randint(0, MAX_SEED)
|
130 |
return seed
|
131 |
|
132 |
-
def progress_with_text(text):
|
133 |
-
"""
|
134 |
-
Returns an HTML snippet that shows an animated progress bar along with the given text.
|
135 |
-
"""
|
136 |
-
return f"""
|
137 |
-
<div style="display: flex; align-items: center;">
|
138 |
-
<span style="margin-right: 10px;">Thinking...</span>
|
139 |
-
<div style="width: 110px; height: 5px; background-color: #ddd; overflow: hidden; position: relative; margin-left: 10px;">
|
140 |
-
<div style="width: 50%; height: 100%; background-color: #1565c0; animation: loading 1.5s linear infinite;"></div>
|
141 |
-
</div>
|
142 |
-
</div>
|
143 |
-
<div style="margin-top: 10px;">
|
144 |
-
{text}
|
145 |
-
</div>
|
146 |
-
<style>
|
147 |
-
@keyframes loading {{
|
148 |
-
0% {{ transform: translateX(-50%); }}
|
149 |
-
100% {{ transform: translateX(100%); }}
|
150 |
-
}}
|
151 |
-
</style>
|
152 |
-
"""
|
153 |
-
|
154 |
@spaces.GPU(duration=60, enable_queue=True)
|
155 |
def generate_image_fn(
|
156 |
prompt: str,
|
@@ -190,7 +190,6 @@ def generate_image_fn(
|
|
190 |
batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
|
191 |
if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
|
192 |
batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
|
193 |
-
# Wrap the pipeline call in autocast if using CUDA
|
194 |
if device.type == "cuda":
|
195 |
with torch.autocast("cuda", dtype=torch.float16):
|
196 |
outputs = sd_pipe(**batch_options)
|
@@ -219,10 +218,11 @@ def generate(
|
|
219 |
text = input_dict["text"]
|
220 |
files = input_dict.get("files", [])
|
221 |
|
|
|
222 |
if text.strip().lower().startswith("@image"):
|
223 |
-
# Remove the "@image" tag and use the rest as prompt
|
224 |
prompt = text[len("@image"):].strip()
|
225 |
-
|
|
|
226 |
image_paths, used_seed = generate_image_fn(
|
227 |
prompt=prompt,
|
228 |
negative_prompt="",
|
@@ -236,7 +236,7 @@ def generate(
|
|
236 |
use_resolution_binning=True,
|
237 |
num_images=1,
|
238 |
)
|
239 |
-
#
|
240 |
yield gr.Image(image_paths[0])
|
241 |
return # Exit early
|
242 |
|
@@ -247,16 +247,14 @@ def generate(
|
|
247 |
if is_tts and voice_index:
|
248 |
voice = TTS_VOICES[voice_index - 1]
|
249 |
text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
|
250 |
-
# Clear previous chat history for a fresh TTS request.
|
251 |
conversation = [{"role": "user", "content": text}]
|
252 |
else:
|
253 |
voice = None
|
254 |
-
# Remove any stray @tts tags and build the conversation history.
|
255 |
text = text.replace(tts_prefix, "").strip()
|
256 |
conversation = clean_chat_history(chat_history)
|
257 |
conversation.append({"role": "user", "content": text})
|
258 |
|
259 |
-
#
|
260 |
if files:
|
261 |
if len(files) > 1:
|
262 |
images = [load_image(image) for image in files]
|
@@ -279,15 +277,21 @@ def generate(
|
|
279 |
thread.start()
|
280 |
|
281 |
buffer = ""
|
282 |
-
#
|
283 |
-
yield gr.HTML(
|
284 |
for new_text in streamer:
|
285 |
buffer += new_text
|
286 |
buffer = buffer.replace("<|im_end|>", "")
|
287 |
-
|
288 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
else:
|
290 |
-
#
|
291 |
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
292 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
293 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
@@ -308,17 +312,21 @@ def generate(
|
|
308 |
t = Thread(target=model.generate, kwargs=generation_kwargs)
|
309 |
t.start()
|
310 |
|
311 |
-
buffer = ""
|
312 |
-
# Yield the initial animated progress bar with no text yet.
|
313 |
-
yield gr.HTML(progress_with_text(""))
|
314 |
outputs = []
|
|
|
|
|
|
|
315 |
for new_text in streamer:
|
316 |
outputs.append(new_text)
|
317 |
buffer = "".join(outputs)
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
|
|
|
|
|
|
|
|
322 |
|
323 |
# If TTS was requested, convert the final response to speech.
|
324 |
if is_tts and voice:
|
|
|
23 |
from transformers.image_utils import load_image
|
24 |
from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
25 |
|
|
|
26 |
DESCRIPTION = """
|
27 |
# QwQ Edge 💬
|
28 |
"""
|
|
|
47 |
|
48 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
49 |
|
50 |
+
# Define a helper function that returns HTML for a progress bar with a label.
|
51 |
+
def progress_bar_html_with_label(label="Thinking..."):
|
52 |
+
return f"""
|
53 |
+
<div style="font-weight: bold; margin-bottom: 5px;">{label}</div>
|
54 |
+
<div id="progress-container" style="width: 100%; background-color: #eee; border-radius: 4px; overflow: hidden;">
|
55 |
+
<div id="progress-bar" style="width: 0%; height: 10px; background-color: limegreen; transition: width 0.1s;"></div>
|
56 |
+
</div>
|
57 |
+
<script>
|
58 |
+
(function() {{
|
59 |
+
let progressBar = document.getElementById("progress-bar");
|
60 |
+
let width = 0;
|
61 |
+
let interval = setInterval(function(){{
|
62 |
+
if(width < 100) {{
|
63 |
+
width += 1;
|
64 |
+
progressBar.style.width = width + "%";
|
65 |
+
}} else {{
|
66 |
+
clearInterval(interval);
|
67 |
+
}}
|
68 |
+
}}, 100);
|
69 |
+
}})();
|
70 |
+
</script>
|
71 |
+
"""
|
72 |
+
|
73 |
# Load text-only model and tokenizer
|
74 |
model_id = "prithivMLmods/FastThink-0.5B-Tiny"
|
75 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
151 |
seed = random.randint(0, MAX_SEED)
|
152 |
return seed
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
@spaces.GPU(duration=60, enable_queue=True)
|
155 |
def generate_image_fn(
|
156 |
prompt: str,
|
|
|
190 |
batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
|
191 |
if "negative_prompt" in batch_options and batch_options["negative_prompt"] is not None:
|
192 |
batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
|
|
|
193 |
if device.type == "cuda":
|
194 |
with torch.autocast("cuda", dtype=torch.float16):
|
195 |
outputs = sd_pipe(**batch_options)
|
|
|
218 |
text = input_dict["text"]
|
219 |
files = input_dict.get("files", [])
|
220 |
|
221 |
+
# If the command is for image generation
|
222 |
if text.strip().lower().startswith("@image"):
|
|
|
223 |
prompt = text[len("@image"):].strip()
|
224 |
+
# Show animated progress bar with "Generating Image" label
|
225 |
+
yield gr.HTML(progress_bar_html_with_label("Generating Image"))
|
226 |
image_paths, used_seed = generate_image_fn(
|
227 |
prompt=prompt,
|
228 |
negative_prompt="",
|
|
|
236 |
use_resolution_binning=True,
|
237 |
num_images=1,
|
238 |
)
|
239 |
+
# After generation, yield only the image (progress bar no longer shown)
|
240 |
yield gr.Image(image_paths[0])
|
241 |
return # Exit early
|
242 |
|
|
|
247 |
if is_tts and voice_index:
|
248 |
voice = TTS_VOICES[voice_index - 1]
|
249 |
text = text.replace(f"{tts_prefix}{voice_index}", "").strip()
|
|
|
250 |
conversation = [{"role": "user", "content": text}]
|
251 |
else:
|
252 |
voice = None
|
|
|
253 |
text = text.replace(tts_prefix, "").strip()
|
254 |
conversation = clean_chat_history(chat_history)
|
255 |
conversation.append({"role": "user", "content": text})
|
256 |
|
257 |
+
# Multimodal generation (with file inputs)
|
258 |
if files:
|
259 |
if len(files) > 1:
|
260 |
images = [load_image(image) for image in files]
|
|
|
277 |
thread.start()
|
278 |
|
279 |
buffer = ""
|
280 |
+
# Show initial progress bar with label "Thinking..."
|
281 |
+
yield gr.HTML(progress_bar_html_with_label("Thinking..."))
|
282 |
for new_text in streamer:
|
283 |
buffer += new_text
|
284 |
buffer = buffer.replace("<|im_end|>", "")
|
285 |
+
# Update the message to show both the progress bar and current text output.
|
286 |
+
html = f"""
|
287 |
+
{progress_bar_html_with_label("Thinking...")}
|
288 |
+
<div style="margin-top: 10px;">{buffer}</div>
|
289 |
+
"""
|
290 |
+
yield gr.HTML(html)
|
291 |
+
# Final output: only the generated text without the progress bar.
|
292 |
+
yield buffer
|
293 |
else:
|
294 |
+
# Text-only generation
|
295 |
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
|
296 |
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
|
297 |
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
|
|
|
312 |
t = Thread(target=model.generate, kwargs=generation_kwargs)
|
313 |
t.start()
|
314 |
|
|
|
|
|
|
|
315 |
outputs = []
|
316 |
+
buffer = ""
|
317 |
+
# Show initial progress bar with label "Thinking..."
|
318 |
+
yield gr.HTML(progress_bar_html_with_label("Thinking..."))
|
319 |
for new_text in streamer:
|
320 |
outputs.append(new_text)
|
321 |
buffer = "".join(outputs)
|
322 |
+
html = f"""
|
323 |
+
{progress_bar_html_with_label("Thinking...")}
|
324 |
+
<div style="margin-top: 10px;">{buffer}</div>
|
325 |
+
"""
|
326 |
+
yield gr.HTML(html)
|
327 |
+
final_response = buffer
|
328 |
+
# Final output: just the final text.
|
329 |
+
yield final_response
|
330 |
|
331 |
# If TTS was requested, convert the final response to speech.
|
332 |
if is_tts and voice:
|