ffreemt commited on
Commit
63d3c89
·
1 Parent(s): d652fb6
Files changed (2) hide show
  1. app-org.py +18 -4
  2. app.py +5 -2
app-org.py CHANGED
@@ -12,7 +12,10 @@ torch
12
  """
13
  # pylint: disable=line-too-long, missing-module-docstring, missing-function-docstring
14
  # import torch
 
 
15
  import gradio as gr
 
16
  from examples_list import examples_list
17
  from transformers import AutoModel, AutoTokenizer # AutoModelForCausalLM,
18
 
@@ -35,10 +38,21 @@ def chat(message, history):
35
  # inputs = tokenizer(prompt, return_tensors="pt").to(device=device)
36
  # output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
37
  # return tokenizer.decode(output[0], skip_special_tokens=True)
38
- for response, _ in chat_model.stream_chat(
39
- tokenizer, message, history, max_length=2048, top_p=0.7, temperature=0.95
40
- ):
41
- yield response
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  chatbot = gr.Chatbot([], label="Bot", height=450)
44
  textbox = gr.Textbox('', scale=10, label='', lines=2, placeholder="Ask me anything")
 
12
  """
13
  # pylint: disable=line-too-long, missing-module-docstring, missing-function-docstring
14
  # import torch
15
+ from time import time
16
+
17
  import gradio as gr
18
+ from about_time import about_time
19
  from examples_list import examples_list
20
  from transformers import AutoModel, AutoTokenizer # AutoModelForCausalLM,
21
 
 
38
  # inputs = tokenizer(prompt, return_tensors="pt").to(device=device)
39
  # output = model.generate(**inputs, do_sample=True, top_p=0.95, top_k=0, max_new_tokens=256)
40
  # return tokenizer.decode(output[0], skip_special_tokens=True)
41
+ flag = 1
42
+ then = time()
43
+ prefix = ""
44
+ prelude = 0.0
45
+ with about_time() as dur:
46
+ for response, _ in chat_model.stream_chat(
47
+ tokenizer, message, history, max_length=2048, top_p=0.7, temperature=0.95
48
+ ):
49
+ if flag:
50
+ flag = 0
51
+ prelude = time() - then
52
+ prefix = f"{prelude:.2f}s"
53
+ yield f"{prefix} {response}"
54
+ suffix = f"\n(time elapsed: {dur.duration_human}, {(time() - prelude)/len(response):.2f}s/char)"
55
+ yield f"{response}{suffix}"
56
 
57
  chatbot = gr.Chatbot([], label="Bot", height=450)
58
  textbox = gr.Textbox('', scale=10, label='', lines=2, placeholder="Ask me anything")
app.py CHANGED
@@ -29,8 +29,10 @@ demo.launch()
29
  # import torch
30
  import random
31
  import time
 
32
  import gradio as gr
33
 
 
34
  def respond2(message, chat_history):
35
  if chat_history is None:
36
  chat_history = []
@@ -51,7 +53,8 @@ def respond2(message, chat_history):
51
  yield "", chat_history
52
 
53
  def stream_chat():
54
- """samples:
 
55
 
56
  Sure [('test me', 'Sure')]
57
  Sure, [('test me', 'Sure,')]
@@ -154,4 +157,4 @@ with gr.Blocks() as demo:
154
 
155
  msg.submit(respond2, [msg, chatbot], [msg, chatbot])
156
 
157
- # demo.queue(max_size=2).launch()
 
29
  # import torch
30
  import random
31
  import time
32
+
33
  import gradio as gr
34
 
35
+
36
  def respond2(message, chat_history):
37
  if chat_history is None:
38
  chat_history = []
 
53
  yield "", chat_history
54
 
55
  def stream_chat():
56
+ """
57
+ List samples.
58
 
59
  Sure [('test me', 'Sure')]
60
  Sure, [('test me', 'Sure,')]
 
157
 
158
  msg.submit(respond2, [msg, chatbot], [msg, chatbot])
159
 
160
+ # demo.queue(max_size=2).launch()