Spaces:

mgoin
/

tinystories-deepsparse

Sleeping

mgoin commited on Oct 4, 2023

Commit

8479bd4

•

1 Parent(s): fe7f276

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,17 +7,23 @@ from typing import Tuple, List
 deepsparse.cpu.print_hardware_capability()
-MODEL_PATH = "TinyStories-1M"
 DESCRIPTION = f"""
-# TinyStories running on DeepSparse
-The model stub for this example is: {MODEL_PATH}
 """
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 512
 def clear_and_save_textbox(message: str) -> Tuple[str, str]:
     return "", message
@@ -38,15 +44,6 @@ def delete_prev_fn(history: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]
     return history, message or ""
-# Setup the engine
-pipe = deepsparse.Pipeline.create(
-    task="text-generation",
-    model_path=MODEL_PATH,
-    max_generated_tokens=DEFAULT_MAX_NEW_TOKENS,
-    sequence_length=MAX_MAX_NEW_TOKENS,
-)
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
@@ -89,17 +86,13 @@ with gr.Blocks(css="style.css") as demo:
     )
     # Generation inference
-    def generate(message, history, max_new_tokens: int, temperature: float):
-        streamer = TextIteratorStreamer(pipe.tokenizer)
-        pipe.max_generated_tokens = max_new_tokens
-        pipe.sampling_temperature = temperature
-        generation_kwargs = dict(sequences=message, streamer=streamer)
-        thread = Thread(target=pipe, kwargs=generation_kwargs)
-        thread.start()
-        for new_text in streamer:
-            history[-1][1] += new_text
             yield history
-        thread.join()
         print(pipe.timer_manager)
     # Hooking up all the buttons

 deepsparse.cpu.print_hardware_capability()
+MODEL_PATH = "hf:mgoin/TinyStories-1M-deepsparse"
 DESCRIPTION = f"""
+# {MODEL_PATH} running on DeepSparse
 """
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 512
+# Setup the engine
+pipe = deepsparse.Pipeline.create(
+    task="text-generation",
+    model_path=MODEL_PATH,
+    max_generated_tokens=DEFAULT_MAX_NEW_TOKENS,
+    sequence_length=MAX_MAX_NEW_TOKENS,
+)
 def clear_and_save_textbox(message: str) -> Tuple[str, str]:
     return "", message
     return history, message or ""
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     )
     # Generation inference
+    def generate(message, history, max_new_tokens: int, temperature: float):
+        generation_config = {"max_new_tokens": max_new_tokens, "temperature": temperature}
+        inference = pipe(sequences=message, streaming=True, **generation_config)
+        for token in inference:
+            history[-1][1] += token.generations[0].text
             yield history
         print(pipe.timer_manager)
     # Hooking up all the buttons