Cran-May commited on
Commit
5264058
·
verified ·
1 Parent(s): cef1b0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -27,6 +27,11 @@ hf_hub_download(
27
  def get_messages_formatter_type(model_name):
28
  return MessagesFormatterType.LLAMA_3
29
 
 
 
 
 
 
30
  def respond(
31
  message,
32
  history: list[tuple[str, str]],
@@ -97,8 +102,9 @@ def respond(
97
  outputs = ""
98
  for output in stream:
99
  outputs += output
100
- token_count += len(output.split())
101
- yield outputs, history + [(message, outputs)] # Update chatbot history
 
102
 
103
  end_time = time.time()
104
  latency = end_time - start_time
@@ -126,11 +132,11 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet"
126
  code_background_fill_dark="#292733",
127
  )) as demo:
128
 
129
- chatbot = gr.Chatbot(scale=1, show_copy_button=True) # 移除 likeable=True
130
  message = gr.Textbox(label="Your message")
131
  model_dropdown = gr.Dropdown(
132
- ["llama-3.2-1b-instruct-q4_k_m.gguf"],
133
- value="llama-3.2-1b-instruct-q4_k_m.gguf",
134
  label="Model"
135
  )
136
  system_message = gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
@@ -150,10 +156,10 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
150
 
151
  history = gr.State([])
152
 
153
- def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
154
- return respond(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
155
-
156
- message.submit(chat_fn, [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty], [chatbot, history])
157
 
158
  gr.Markdown(description)
159
 
 
27
  def get_messages_formatter_type(model_name):
28
  return MessagesFormatterType.LLAMA_3
29
 
30
+ def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
31
+ history_list = history or []
32
+ generator = respond(message, history_list, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
33
+ return generator, history_list
34
+
35
  def respond(
36
  message,
37
  history: list[tuple[str, str]],
 
102
  outputs = ""
103
  for output in stream:
104
  outputs += output
105
+ token_count += len(output.split())
106
+ new_history = history + [(message, outputs)]
107
+ yield new_history # 只需要yield更新后的历史记录
108
 
109
  end_time = time.time()
110
  latency = end_time - start_time
 
132
  code_background_fill_dark="#292733",
133
  )) as demo:
134
 
135
+ chatbot = gr.Chatbot(scale=1, show_copy_button=True)
136
  message = gr.Textbox(label="Your message")
137
  model_dropdown = gr.Dropdown(
138
+ ["openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf"], # 更新为实际的模型文件名
139
+ value="openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf",
140
  label="Model"
141
  )
142
  system_message = gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
 
156
 
157
  history = gr.State([])
158
 
159
+ message.submit(
160
+ chat_fn,
161
+ [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
162
+ [chatbot, history])
163
 
164
  gr.Markdown(description)
165