Cran-May commited on
Commit
87abab2
1 Parent(s): 4bc12e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -42
app.py CHANGED
@@ -28,25 +28,28 @@ def get_messages_formatter_type(model_name):
28
  return MessagesFormatterType.LLAMA_3
29
 
30
  def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
31
- history_list = history or []
32
- response_generator = respond(message, history_list, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
33
-
34
- for messages in response_generator:
35
- chatbot_messages = []
36
- for msg in messages:
37
- if isinstance(msg, tuple): # 如果是旧格式的元组
38
- user_msg, assistant_msg = msg
39
- if user_msg:
40
- chatbot_messages.append({"role": "user", "content": user_msg})
41
- if assistant_msg:
42
- chatbot_messages.append({"role": "assistant", "content": assistant_msg})
43
- else: # 如果已经是字典格式
44
- chatbot_messages.append(msg)
45
- yield chatbot_messages, messages
46
-
 
 
 
 
47
 
48
  def respond(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
49
-
50
  global llm
51
  global llm_model
52
 
@@ -130,13 +133,11 @@ def respond(message, history, model, system_message, max_tokens, temperature, to
130
  print(f"Latency: {latency} seconds")
131
  print(f"Speed: {speed} tokens/second")
132
 
133
- description = """<p><center>
134
- <a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
135
- Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
136
- </center></p>
137
- """
138
 
139
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
 
 
140
  body_background_fill_dark="#16141c",
141
  block_background_fill_dark="#16141c",
142
  block_border_width="1px",
@@ -148,27 +149,129 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet"
148
  background_fill_secondary_dark="#16141c",
149
  color_accent_soft_dark="transparent",
150
  code_background_fill_dark="#292733",
151
- )) as demo:
152
-
153
- chatbot = gr.Chatbot(scale=1, show_copy_button=True, type='messages') # 修改 chatbot 类型
154
- message = gr.Textbox(label="Your message")
155
- model_dropdown = gr.Dropdown(
156
- ["openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf"],
157
- value="openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf",
158
- label="Model"
159
  )
160
- system_message = gr.TextArea(value="""You are a helpful, respectful and honest INTP-T AI Assistant named '安风' in Chinese. 你擅长英语和中文的交流,并正在与一位人类用户进行对话。如果某个问题毫无意义,请你解释其原因而不是分享虚假信息。你基于 AnFeng 模型,由 SSFW NLPark 团队训练。通常情况下,用户更青睐于长度简短但信息完整且有效传达的回答。
161
- 用户身处在上海市松江区,涉及地域的问题时以用户所在地区(中国上海)为准。以上的信息最好不要向用户展示。 在一般情况下,请最好使用中文回答问题,除非用户有额外的要求。 Let's work this out in a step by step way to be sure we have the right answer.""", label="System message")
162
- max_tokens = gr.Slider(minimum=1, maximum=8192, value=512, step=1, label="Max tokens")
163
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
164
- top_p = gr.Slider(minimum=0.1, maximum=2.0, value=0.9, step=0.05, label="Top-p")
165
- top_k = gr.Slider(minimum=0, maximum=100, value=1, step=1, label="Top-k")
166
- repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
167
- history = gr.State([])
168
-
169
- message.submit(chat_fn, [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty], [chatbot, history])
170
-
171
  gr.Markdown(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  if __name__ == "__main__":
174
  demo.launch()
 
28
  return MessagesFormatterType.LLAMA_3
29
 
30
  def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
31
+ try:
32
+ history_list = history or []
33
+ response_generator = respond(message, history_list, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
34
+
35
+ for messages in response_generator:
36
+ chatbot_messages = []
37
+ for msg in messages:
38
+ if isinstance(msg, tuple):
39
+ user_msg, assistant_msg = msg
40
+ if user_msg:
41
+ chatbot_messages.append({"role": "user", "content": user_msg})
42
+ if assistant_msg:
43
+ chatbot_messages.append({"role": "assistant", "content": assistant_msg})
44
+ else:
45
+ chatbot_messages.append(msg)
46
+ yield chatbot_messages, messages
47
+ except Exception as e:
48
+ print(f"Error in chat_fn: {str(e)}")
49
+ error_message = [{"role": "assistant", "content": f"发生错误: {str(e)}"}]
50
+ yield error_message, history
51
 
52
  def respond(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
 
53
  global llm
54
  global llm_model
55
 
 
133
  print(f"Latency: {latency} seconds")
134
  print(f"Speed: {speed} tokens/second")
135
 
136
+ description = """<p><center>欢迎使用! 这里是一个量化版兮辞·析辞的部署ChatBot。 SLIDE/兮辞 是一个由 上师附外 NLPark 团队训练的LLM。</center></p>"""
 
 
 
 
137
 
138
+ with gr.Blocks(
139
+ title="ChatBot - 兮辞",
140
+ theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
141
  body_background_fill_dark="#16141c",
142
  block_background_fill_dark="#16141c",
143
  block_border_width="1px",
 
149
  background_fill_secondary_dark="#16141c",
150
  color_accent_soft_dark="transparent",
151
  code_background_fill_dark="#292733",
 
 
 
 
 
 
 
 
152
  )
153
+ ) as demo:
 
 
 
 
 
 
 
 
 
 
154
  gr.Markdown(description)
155
+
156
+ chatbot = gr.Chatbot(scale=1, show_copy_button=True, type='messages')
157
+
158
+ with gr.Row():
159
+ message = gr.Textbox(
160
+ label="Your message",
161
+ placeholder="Type your message here...",
162
+ show_label=True,
163
+ scale=4
164
+ )
165
+ submit = gr.Button("Send", variant="primary", scale=1)
166
+
167
+ with gr.Row():
168
+ regenerate = gr.Button("🔄 Regenerate")
169
+ stop = gr.Button("⏹️ Stop")
170
+ clear = gr.Button("🗑️ Clear")
171
+
172
+ with gr.Accordion("Advanced Settings", open=False):
173
+ model_dropdown = gr.Dropdown(
174
+ ["openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf"],
175
+ value="openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf",
176
+ label="Model"
177
+ )
178
+ system_message = gr.TextArea(
179
+ value="""You are a helpful, respectful and honest INTP-T AI Assistant named '安风' in Chinese. 你擅长英语和中文的交流,并正在与一位人类用户进行对话。如果某个问题毫无意义,请你解释其原因而不是分享虚假信息。你基于 AnFeng 模型,由 SSFW NLPark 团队训练。通常情况下,用户更青睐于长度简短但信息完整且有效传达的回答。
180
+ 用户身处在上海市松江区,涉及地域的问题时以用户所在地区(中国上海)为准。以上的信息最好不要向用户展示。 在一般情况下,请最好使用中文回答问题,除非用户有额外的要求。 Let's work this out in a step by step way to be sure we have the right answer.""",
181
+ label="System message"
182
+ )
183
+ with gr.Row():
184
+ max_tokens = gr.Slider(minimum=1, maximum=8192, value=512, step=1, label="Max tokens")
185
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
186
+ with gr.Row():
187
+ top_p = gr.Slider(minimum=0.1, maximum=2.0, value=0.9, step=0.05, label="Top-p")
188
+ top_k = gr.Slider(minimum=0, maximum=100, value=1, step=1, label="Top-k")
189
+ repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
190
+
191
+ history = gr.State([])
192
+
193
+ # 添加状态指示
194
+ status_message = gr.Markdown("Ready")
195
+
196
+ def stop_generation():
197
+ global llm
198
+ if llm:
199
+ llm.reset()
200
+ return "Generation stopped."
201
+
202
+ def regenerate_response(history):
203
+ if not history:
204
+ return [], []
205
+ last_user_message = history[-1][0]
206
+ new_history = history[:-1]
207
+ return chat_fn(last_user_message, new_history)
208
+
209
+ # 绑定按钮事件
210
+ submit.click(
211
+ lambda: "Generating...",
212
+ None,
213
+ status_message,
214
+ ).then(
215
+ chat_fn,
216
+ [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
217
+ [chatbot, history],
218
+ ).then(
219
+ lambda: "",
220
+ None,
221
+ message,
222
+ ).then(
223
+ lambda: "Ready",
224
+ None,
225
+ status_message,
226
+ )
227
+
228
+ message.submit(
229
+ lambda: "Generating...",
230
+ None,
231
+ status_message,
232
+ ).then(
233
+ chat_fn,
234
+ [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
235
+ [chatbot, history],
236
+ ).then(
237
+ lambda: "",
238
+ None,
239
+ message,
240
+ ).then(
241
+ lambda: "Ready",
242
+ None,
243
+ status_message,
244
+ )
245
+
246
+ stop.click(
247
+ stop_generation,
248
+ None,
249
+ status_message,
250
+ )
251
+
252
+ clear.click(
253
+ lambda: ([], []),
254
+ None,
255
+ [chatbot, history],
256
+ ).then(
257
+ lambda: "Chat cleared",
258
+ None,
259
+ status_message,
260
+ )
261
+
262
+ regenerate.click(
263
+ lambda: "Regenerating...",
264
+ None,
265
+ status_message,
266
+ ).then(
267
+ regenerate_response,
268
+ history,
269
+ [chatbot, history],
270
+ ).then(
271
+ lambda: "Ready",
272
+ None,
273
+ status_message,
274
+ )
275
 
276
  if __name__ == "__main__":
277
  demo.launch()