Tobias Bergmann commited on
Commit
518754f
·
1 Parent(s): 7d00bdf
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -2,7 +2,7 @@ from llama_cpp import Llama
2
  from huggingface_hub import hf_hub_download
3
  import gradio as gr
4
  from typing import Tuple, List
5
- import time # Import the time module
6
 
7
  DESCRIPTION = f"""
8
  # Chat with Arco 500M as GGUF on CPU
@@ -36,26 +36,26 @@ def predict(message: str, history: List[List[str]], max_new_tokens: int = DEFAUL
36
 
37
  # Initialize reply for this round
38
  reply = ""
39
-
40
  # Initialize token count and start time
41
  token_count = 0
42
  start_time = time.time()
43
-
44
  # This will produce a generator of output chunks
45
  stream = pipe(
46
- prompt,
47
  max_tokens=max_new_tokens,
48
  stop=["</s>"],
49
  stream=True
50
  )
51
-
52
  # Send each token stream output to the user
53
  for output in stream:
54
  new_text = output['choices'][0]['text']
55
  reply += new_text
56
  token_count += len(new_text.split()) # Estimate tokens by counting spaces
57
  history[-1][1] = reply # Update the current reply in history
58
-
59
  # Calculate elapsed time and TPS
60
  elapsed_time = time.time() - start_time
61
  if elapsed_time > 0:
@@ -81,6 +81,6 @@ with gr.Blocks() as demo:
81
  label="Max New Tokens",
82
  )
83
  status_field = gr.Text(label="Status", interactive=False, visible=True) # Add Status field
84
- textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot], progress=status_field)
85
 
86
- demo.queue().launch()
 
2
  from huggingface_hub import hf_hub_download
3
  import gradio as gr
4
  from typing import Tuple, List
5
+ import time
6
 
7
  DESCRIPTION = f"""
8
  # Chat with Arco 500M as GGUF on CPU
 
36
 
37
  # Initialize reply for this round
38
  reply = ""
39
+
40
  # Initialize token count and start time
41
  token_count = 0
42
  start_time = time.time()
43
+
44
  # This will produce a generator of output chunks
45
  stream = pipe(
46
+ prompt,
47
  max_tokens=max_new_tokens,
48
  stop=["</s>"],
49
  stream=True
50
  )
51
+
52
  # Send each token stream output to the user
53
  for output in stream:
54
  new_text = output['choices'][0]['text']
55
  reply += new_text
56
  token_count += len(new_text.split()) # Estimate tokens by counting spaces
57
  history[-1][1] = reply # Update the current reply in history
58
+
59
  # Calculate elapsed time and TPS
60
  elapsed_time = time.time() - start_time
61
  if elapsed_time > 0:
 
81
  label="Max New Tokens",
82
  )
83
  status_field = gr.Text(label="Status", interactive=False, visible=True) # Add Status field
84
+ textbox.submit(predict, [textbox, chatbot, max_new_tokens_slider], [textbox, chatbot], )
85
 
86
+ demo.queue().launch()