mgoin commited on
Commit
8479bd4
1 Parent(s): fe7f276

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -23
app.py CHANGED
@@ -7,17 +7,23 @@ from typing import Tuple, List
7
 
8
  deepsparse.cpu.print_hardware_capability()
9
 
10
- MODEL_PATH = "TinyStories-1M"
11
 
12
  DESCRIPTION = f"""
13
- # TinyStories running on DeepSparse
14
-
15
- The model stub for this example is: {MODEL_PATH}
16
  """
17
 
18
  MAX_MAX_NEW_TOKENS = 2048
19
  DEFAULT_MAX_NEW_TOKENS = 512
20
 
 
 
 
 
 
 
 
 
21
 
22
  def clear_and_save_textbox(message: str) -> Tuple[str, str]:
23
  return "", message
@@ -38,15 +44,6 @@ def delete_prev_fn(history: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]
38
  return history, message or ""
39
 
40
 
41
- # Setup the engine
42
- pipe = deepsparse.Pipeline.create(
43
- task="text-generation",
44
- model_path=MODEL_PATH,
45
- max_generated_tokens=DEFAULT_MAX_NEW_TOKENS,
46
- sequence_length=MAX_MAX_NEW_TOKENS,
47
- )
48
-
49
-
50
  with gr.Blocks(css="style.css") as demo:
51
  gr.Markdown(DESCRIPTION)
52
 
@@ -89,17 +86,13 @@ with gr.Blocks(css="style.css") as demo:
89
  )
90
 
91
  # Generation inference
92
- def generate(message, history, max_new_tokens: int, temperature: float):
93
- streamer = TextIteratorStreamer(pipe.tokenizer)
94
- pipe.max_generated_tokens = max_new_tokens
95
- pipe.sampling_temperature = temperature
96
- generation_kwargs = dict(sequences=message, streamer=streamer)
97
- thread = Thread(target=pipe, kwargs=generation_kwargs)
98
- thread.start()
99
- for new_text in streamer:
100
- history[-1][1] += new_text
101
  yield history
102
- thread.join()
103
  print(pipe.timer_manager)
104
 
105
  # Hooking up all the buttons
 
7
 
8
  deepsparse.cpu.print_hardware_capability()
9
 
10
+ MODEL_PATH = "hf:mgoin/TinyStories-1M-deepsparse"
11
 
12
  DESCRIPTION = f"""
13
+ # {MODEL_PATH} running on DeepSparse
 
 
14
  """
15
 
16
  MAX_MAX_NEW_TOKENS = 2048
17
  DEFAULT_MAX_NEW_TOKENS = 512
18
 
19
+ # Setup the engine
20
+ pipe = deepsparse.Pipeline.create(
21
+ task="text-generation",
22
+ model_path=MODEL_PATH,
23
+ max_generated_tokens=DEFAULT_MAX_NEW_TOKENS,
24
+ sequence_length=MAX_MAX_NEW_TOKENS,
25
+ )
26
+
27
 
28
  def clear_and_save_textbox(message: str) -> Tuple[str, str]:
29
  return "", message
 
44
  return history, message or ""
45
 
46
 
 
 
 
 
 
 
 
 
 
47
  with gr.Blocks(css="style.css") as demo:
48
  gr.Markdown(DESCRIPTION)
49
 
 
86
  )
87
 
88
  # Generation inference
89
+ def generate(message, history, max_new_tokens: int, temperature: float):
90
+ generation_config = {"max_new_tokens": max_new_tokens, "temperature": temperature}
91
+ inference = pipe(sequences=message, streaming=True, **generation_config)
92
+ for token in inference:
93
+ history[-1][1] += token.generations[0].text
 
 
 
 
94
  yield history
95
+
96
  print(pipe.timer_manager)
97
 
98
  # Hooking up all the buttons