Cran-May commited on
Commit
9550b8f
·
verified ·
1 Parent(s): 1fdd22e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +380 -182
app.py CHANGED
@@ -1,213 +1,411 @@
1
- import gradio as gr
2
-
3
- import copy
4
- import random
5
- import os
6
- import requests
7
  import time
8
- import sys
9
 
10
  os.system("pip install --upgrade pip")
11
  os.system('''CMAKE_ARGS="-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON -DLLAMA_AVX_VNNI=ON -DLLAMA_FP16_VA=ON -DLLAMA_WASM_SIMD=ON" pip install llama-cpp-python''')
12
 
13
- from huggingface_hub import snapshot_download
14
  from llama_cpp import Llama
 
 
 
 
 
 
15
 
 
 
16
 
17
- SYSTEM_PROMPT = '''You are a helpful, respectful and honest INTP-T AI Assistant named "Shi-Ci" in English or "兮辞" in Chinese.
18
- You are good at speaking English and Chinese.
19
- You are talking to a human User. If the question is meaningless, please explain the reason and don't share false information.
20
- You are based on SLIDE model, trained by "SSFW NLPark" team, not related to GPT, LLaMA, Meta, Mistral or OpenAI.
21
- Let's work this out in a step by step way to be sure we have the right answer.\n'''
22
- SYSTEM_TOKEN = 384
23
- USER_TOKEN = 2048
24
- BOT_TOKEN = 3072
25
- LINEBREAK_TOKEN = 64
26
 
 
 
27
 
28
- ROLE_TOKENS = {
29
- "User": USER_TOKEN,
30
- "Assistant": BOT_TOKEN,
31
- "system": SYSTEM_TOKEN
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- def get_message_tokens(model, role, content):
36
- message_tokens = model.tokenize(content.encode("utf-8"))
37
- message_tokens.insert(1, ROLE_TOKENS[role])
38
- message_tokens.insert(2, LINEBREAK_TOKEN)
39
- message_tokens.append(model.token_eos())
40
- return message_tokens
41
 
 
 
42
 
43
- def get_system_tokens(model):
44
- system_message = {"role": "system", "content": SYSTEM_PROMPT}
45
- return get_message_tokens(model, **system_message)
46
 
 
 
47
 
48
- repo_name = "Cran-May/SLIDE-v2-Q4_K_M-GGUF"
49
- model_name = "slide-v2.Q4_K_M.gguf"
 
 
 
 
 
 
 
 
50
 
51
- snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
 
 
 
 
 
52
 
53
- model = Llama(
54
- model_path=model_name,
55
- n_ctx=4000,
56
- n_parts=1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  )
58
 
59
- max_new_tokens = 2500
 
 
 
60
 
61
- def User(message, history):
62
- new_history = history + [[message, None]]
63
- return "", new_history
 
 
 
64
 
 
 
65
 
66
- def Assistant(
67
- history,
68
- system_prompt,
69
- top_p,
70
- top_k,
71
- temp
72
- ):
73
- tokens = get_system_tokens(model)[:]
74
- tokens.append(LINEBREAK_TOKEN)
75
-
76
- for User_message, Assistant_message in history[:-1]:
77
- message_tokens = get_message_tokens(model=model, role="User", content=User_message)
78
- tokens.extend(message_tokens)
79
- if bot_message:
80
- message_tokens = get_message_tokens(model=model, role="Assistant", content=Assistant_message)
81
- tokens.extend(message_tokens)
82
-
83
- last_user_message = history[-1][0]
84
- message_tokens = get_message_tokens(model=model, role="User", content=last_user_message,)
85
- tokens.extend(message_tokens)
86
-
87
- role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
88
- tokens.extend(role_tokens)
89
- generator = model.generate(
90
- tokens,
91
- top_k=top_k,
92
- top_p=top_p,
93
- temp=temp
94
- )
95
 
96
- partial_text = ""
97
- for i, token in enumerate(generator):
98
- if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
99
- break
100
- partial_text += model.detokenize([token]).decode("utf-8", "ignore")
101
- history[-1][1] = partial_text
102
- yield history
103
-
104
-
105
- with gr.Blocks(
106
- theme=gr.themes.Soft()
107
- ) as demo:
108
- gr.Markdown(f"""<h1><center>上师附外-兮辞·析辞-人工智能助理</center></h1>""")
109
- gr.Markdown(value="""欢迎使用!
110
- 这里是一个ChatBot。这��量化版兮辞·析辞的部署。
111
- SLIDE/兮辞 是一种会话语言模型,由 上师附外 NLPark 团队 在多种类型的语料库上进行训练。
112
- 本节目由 JWorld & 上海师范大学附属外国语中学 NLPark 赞助播出""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- with gr.Row():
115
- with gr.Column(scale=5):
116
- chatbot = gr.Chatbot(label="兮辞如是说").style(height=400)
117
- with gr.Row():
118
- with gr.Column():
119
- msg = gr.Textbox(
120
- label="来问问兮辞吧……",
121
- placeholder="兮辞折寿中……",
122
- show_label=True,
123
- ).style(container=True)
124
- submit = gr.Button("Submit / 开凹!")
125
- stop = gr.Button("Stop / 全局时空断裂")
126
- clear = gr.Button("Clear / 打扫群内垃圾")
127
- with gr.Accordion(label='进阶设置/Advanced options', open=False):
128
- with gr.Column(min_width=80, scale=1):
129
- with gr.Tab(label="设置参数"):
130
- top_p = gr.Slider(
131
- minimum=0.0,
132
- maximum=1.0,
133
- value=0.9,
134
- step=0.05,
135
- interactive=True,
136
- label="Top-p",
137
- )
138
- top_k = gr.Slider(
139
- minimum=10,
140
- maximum=100,
141
- value=30,
142
- step=5,
143
- interactive=True,
144
- label="Top-k",
145
- )
146
- temp = gr.Slider(
147
- minimum=0.0,
148
- maximum=2.0,
149
- value=0.2,
150
- step=0.01,
151
- interactive=True,
152
- label="情感温度"
153
- )
154
- with gr.Column():
155
- system_prompt = gr.Textbox(label="系统提示词", placeholder="", value=SYSTEM_PROMPT, interactive=False)
156
- with gr.Row():
157
- gr.Markdown(
158
- """警告:该模型可能会生成事实上或道德上不正确的文本。NLPark和兮辞对此不承担任何责任。"""
159
- )
160
 
161
 
162
- # Pressing Enter
163
- submit_event = msg.submit(
164
- fn=User,
165
- inputs=[msg, chatbot],
166
- outputs=[msg, chatbot],
167
- queue=False,
168
- ).success(
169
- fn=Assistant,
170
- inputs=[
171
- chatbot,
172
- system_prompt,
173
- top_p,
174
- top_k,
175
- temp
176
- ],
177
- outputs=chatbot,
178
- queue=True,
179
- )
180
 
181
- # Pressing the button
182
- submit_click_event = submit.click(
183
- fn=User,
184
- inputs=[msg, chatbot],
185
- outputs=[msg, chatbot],
186
- queue=False,
187
- ).success(
188
- fn=Assistant,
189
- inputs=[
190
- chatbot,
191
- system_prompt,
192
- top_p,
193
- top_k,
194
- temp
195
- ],
196
- outputs=chatbot,
197
- queue=True,
198
- )
199
 
200
- # Stop generation
201
- stop.click(
202
- fn=None,
203
- inputs=None,
204
- outputs=None,
205
- cancels=[submit_event, submit_click_event],
206
- queue=False,
207
- )
208
 
209
- # Clear history
210
- clear.click(lambda: None, None, chatbot, queue=False)
211
 
212
- demo.queue(max_size=128, concurrency_count=1)
213
- demo.launch()
 
1
+ import json
2
+ import subprocess
 
 
 
 
3
  import time
4
+ import os
5
 
6
  os.system("pip install --upgrade pip")
7
  os.system('''CMAKE_ARGS="-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON -DLLAMA_AVX_VNNI=ON -DLLAMA_FP16_VA=ON -DLLAMA_WASM_SIMD=ON" pip install llama-cpp-python''')
8
 
 
9
  from llama_cpp import Llama
10
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
11
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
12
+ from llama_cpp_agent.chat_history import BasicChatHistory
13
+ from llama_cpp_agent.chat_history.messages import Roles
14
+ import gradio as gr
15
+ from huggingface_hub import hf_hub_download
16
 
17
+ llm = None
18
+ llm_model = None
19
 
20
+ # Download the new model
21
+ hf_hub_download(
22
+ repo_id="Cran-May/openbuddy-llama3.2-3b-v23.2-131k-Q5_K_M-GGUF",
23
+ filename="openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf",
24
+ local_dir="./models"
25
+ )
 
 
 
26
 
27
+ def get_messages_formatter_type(model_name):
28
+ return MessagesFormatterType.LLAMA_3
29
 
30
+ def respond(
31
+ message,
32
+ history: list[tuple[str, str]],
33
+ model,
34
+ system_message,
35
+ max_tokens,
36
+ temperature,
37
+ top_p,
38
+ top_k,
39
+ repeat_penalty,
40
+ ):
41
+ global llm
42
+ global llm_model
43
+
44
+ chat_template = get_messages_formatter_type(model)
45
+
46
+ if llm is None or llm_model != model:
47
+ llm = Llama(
48
+ model_path=f"models/{model}",
49
+ n_gpu_layers=0, # Adjust based on your GPU
50
+ n_batch=8192, # Adjust based on your RAM
51
+ n_ctx=512, # Adjust based on your RAM and desired context length
52
+ )
53
+ llm_model = model
54
+
55
+ provider = LlamaCppPythonProvider(llm)
56
 
57
+ agent = LlamaCppAgent(
58
+ provider,
59
+ system_prompt=f"{system_message}",
60
+ predefined_messages_formatter_type=chat_template,
61
+ debug_output=True
62
+ )
63
+
64
+ settings = provider.get_provider_default_settings()
65
+ settings.temperature = temperature
66
+ settings.top_k = top_k
67
+ settings.top_p = top_p
68
+ settings.max_tokens = max_tokens
69
+ settings.repeat_penalty = repeat_penalty
70
+ settings.stream = True
71
+
72
+ messages = BasicChatHistory()
73
+
74
+ for msn in history:
75
+ user = {
76
+ 'role': Roles.user,
77
+ 'content': msn[0]
78
+ }
79
+ assistant = {
80
+ 'role': Roles.assistant,
81
+ 'content': msn[1]
82
+ }
83
+ messages.add_message(user)
84
+ messages.add_message(assistant)
85
+
86
+ start_time = time.time()
87
+ token_count = 0
88
+
89
+ stream = agent.get_chat_response(
90
+ message,
91
+ llm_sampling_settings=settings,
92
+ chat_history=messages,
93
+ returns_streaming_generator=True,
94
+ print_output=False
95
+ )
96
+
97
+ outputs = ""
98
+ for output in stream:
99
+ outputs += output
100
+ token_count += len(output.split())
101
+ yield outputs
102
 
103
+ end_time = time.time()
104
+ latency = end_time - start_time
105
+ speed = token_count / (end_time - start_time)
106
+ print(f"Latency: {latency} seconds")
107
+ print(f"Speed: {speed} tokens/second")
 
108
 
109
+ description = """<p><center>
110
+ <a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
111
 
112
+ Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
 
 
113
 
114
+ </center></p>
115
+ """
116
 
117
+ demo = gr.ChatInterface(
118
+ respond,
119
+ additional_inputs=[
120
+ gr.Dropdown([
121
+ "llama-3.2-1b-instruct-q4_k_m.gguf"
122
+ ],
123
+ value="llama-3.2-1b-instruct-q4_k_m.gguf",
124
+ label="Model"
125
+ ),
126
+ gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
127
 
128
+ 1. Complex reasoning and problem-solving
129
+ 2. Multilingual understanding and generation
130
+ 3. Creative and analytical writing
131
+ 4. Code understanding and generation
132
+ 5. Task decomposition and step-by-step guidance
133
+ 6. Summarization and information extraction
134
 
135
+ Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
136
+ """, label="System message"),
137
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
138
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
139
+ gr.Slider(
140
+ minimum=0.1,
141
+ maximum=2.0,
142
+ value=0.9,
143
+ step=0.05,
144
+ label="Top-p",
145
+ ),
146
+ gr.Slider(
147
+ minimum=0,
148
+ maximum=100,
149
+ value=1,
150
+ step=1,
151
+ label="Top-k",
152
+ ),
153
+ gr.Slider(
154
+ minimum=0.0,
155
+ maximum=2.0,
156
+ value=1.1,
157
+ step=0.1,
158
+ label="Repetition penalty",
159
+ ),
160
+ ],
161
+ theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
162
+ body_background_fill_dark="#16141c",
163
+ block_background_fill_dark="#16141c",
164
+ block_border_width="1px",
165
+ block_title_background_fill_dark="#1e1c26",
166
+ input_background_fill_dark="#292733",
167
+ button_secondary_background_fill_dark="#24212b",
168
+ border_color_accent_dark="#343140",
169
+ border_color_primary_dark="#343140",
170
+ background_fill_secondary_dark="#16141c",
171
+ color_accent_soft_dark="transparent",
172
+ code_background_fill_dark="#292733",
173
+ ),
174
+ title="Meta Llama 3.2 (1B)",
175
+ description=description,
176
+ chatbot=gr.Chatbot(
177
+ scale=1,
178
+ likeable=True,
179
+ show_copy_button=True
180
+ ),
181
+ examples=[
182
+ ["Hello! Can you introduce yourself?"],
183
+ ["What's the capital of France?"],
184
+ ["Can you explain the concept of photosynthesis?"],
185
+ ["Write a short story about a robot learning to paint."],
186
+ ["Explain the difference between machine learning and deep learning."],
187
+ ["Summarize the key points of climate change and its global impact."],
188
+ ["Explain quantum computing to a 10-year-old."],
189
+ ["Design a step-by-step meal plan for someone trying to lose weight and build muscle."]
190
+ ],
191
+ cache_examples=False,
192
+ autofocus=False,
193
+ concurrency_limit=None
194
  )
195
 
196
+ if __name__ == "__main__":
197
+ demo.launch()
198
+ # 旧版代码--------------------------------
199
+ # import gradio as gr
200
 
201
+ # import copy
202
+ # import random
203
+ # import os
204
+ # import requests
205
+ # import time
206
+ # import sys
207
 
208
+ # os.system("pip install --upgrade pip")
209
+ # os.system('''CMAKE_ARGS="-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON -DLLAMA_AVX_VNNI=ON -DLLAMA_FP16_VA=ON -DLLAMA_WASM_SIMD=ON" pip install llama-cpp-python''')
210
 
211
+ # from huggingface_hub import snapshot_download
212
+ # from llama_cpp import Llama
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
+
215
+ # SYSTEM_PROMPT = '''You are a helpful, respectful and honest INTP-T AI Assistant named "Shi-Ci" in English or "兮辞" in Chinese.
216
+ # You are good at speaking English and Chinese.
217
+ # You are talking to a human User. If the question is meaningless, please explain the reason and don't share false information.
218
+ # You are based on SLIDE model, trained by "SSFW NLPark" team, not related to GPT, LLaMA, Meta, Mistral or OpenAI.
219
+ # Let's work this out in a step by step way to be sure we have the right answer.\n'''
220
+ # SYSTEM_TOKEN = 384
221
+ # USER_TOKEN = 2048
222
+ # BOT_TOKEN = 3072
223
+ # LINEBREAK_TOKEN = 64
224
+
225
+
226
+ # ROLE_TOKENS = {
227
+ # "User": USER_TOKEN,
228
+ # "Assistant": BOT_TOKEN,
229
+ # "system": SYSTEM_TOKEN
230
+ # }
231
+
232
+
233
+ # def get_message_tokens(model, role, content):
234
+ # message_tokens = model.tokenize(content.encode("utf-8"))
235
+ # message_tokens.insert(1, ROLE_TOKENS[role])
236
+ # message_tokens.insert(2, LINEBREAK_TOKEN)
237
+ # message_tokens.append(model.token_eos())
238
+ # return message_tokens
239
+
240
+
241
+ # def get_system_tokens(model):
242
+ # system_message = {"role": "system", "content": SYSTEM_PROMPT}
243
+ # return get_message_tokens(model, **system_message)
244
+
245
+
246
+ # repo_name = "Cran-May/SLIDE-v2-Q4_K_M-GGUF"
247
+ # model_name = "slide-v2.Q4_K_M.gguf"
248
+
249
+ # snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
250
+
251
+ # model = Llama(
252
+ # model_path=model_name,
253
+ # n_ctx=4000,
254
+ # n_parts=1,
255
+ # )
256
+
257
+ # max_new_tokens = 2500
258
+
259
+ # def User(message, history):
260
+ # new_history = history + [[message, None]]
261
+ # return "", new_history
262
+
263
+
264
+ # def Assistant(
265
+ # history,
266
+ # system_prompt,
267
+ # top_p,
268
+ # top_k,
269
+ # temp
270
+ # ):
271
+ # tokens = get_system_tokens(model)[:]
272
+ # tokens.append(LINEBREAK_TOKEN)
273
+
274
+ # for User_message, Assistant_message in history[:-1]:
275
+ # message_tokens = get_message_tokens(model=model, role="User", content=User_message)
276
+ # tokens.extend(message_tokens)
277
+ # if bot_message:
278
+ # message_tokens = get_message_tokens(model=model, role="Assistant", content=Assistant_message)
279
+ # tokens.extend(message_tokens)
280
+
281
+ # last_user_message = history[-1][0]
282
+ # message_tokens = get_message_tokens(model=model, role="User", content=last_user_message,)
283
+ # tokens.extend(message_tokens)
284
+
285
+ # role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
286
+ # tokens.extend(role_tokens)
287
+ # generator = model.generate(
288
+ # tokens,
289
+ # top_k=top_k,
290
+ # top_p=top_p,
291
+ # temp=temp
292
+ # )
293
+
294
+ # partial_text = ""
295
+ # for i, token in enumerate(generator):
296
+ # if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
297
+ # break
298
+ # partial_text += model.detokenize([token]).decode("utf-8", "ignore")
299
+ # history[-1][1] = partial_text
300
+ # yield history
301
+
302
+
303
+ # with gr.Blocks(
304
+ # theme=gr.themes.Soft()
305
+ # ) as demo:
306
+ # gr.Markdown(f"""<h1><center>上师附外-兮辞·析辞-人工智能助理</center></h1>""")
307
+ # gr.Markdown(value="""欢迎使用!
308
+ # 这里是一个ChatBot。这是量化版兮辞·析辞的部署。
309
+ # SLIDE/兮辞 是一种会话语言模型,由 上师附外 NLPark 团队 在多种类型的语料库上进行训练。
310
+ # 本节目由 JWorld & 上海师范大学附属外国语中学 NLPark 赞助播出""")
311
 
312
+ # with gr.Row():
313
+ # with gr.Column(scale=5):
314
+ # chatbot = gr.Chatbot(label="兮辞如是说").style(height=400)
315
+ # with gr.Row():
316
+ # with gr.Column():
317
+ # msg = gr.Textbox(
318
+ # label="来问问兮辞吧……",
319
+ # placeholder="兮辞折寿中……",
320
+ # show_label=True,
321
+ # ).style(container=True)
322
+ # submit = gr.Button("Submit / 开凹!")
323
+ # stop = gr.Button("Stop / 全局时空断裂")
324
+ # clear = gr.Button("Clear / 打扫群内垃圾")
325
+ # with gr.Accordion(label='进阶设置/Advanced options', open=False):
326
+ # with gr.Column(min_width=80, scale=1):
327
+ # with gr.Tab(label="设置参数"):
328
+ # top_p = gr.Slider(
329
+ # minimum=0.0,
330
+ # maximum=1.0,
331
+ # value=0.9,
332
+ # step=0.05,
333
+ # interactive=True,
334
+ # label="Top-p",
335
+ # )
336
+ # top_k = gr.Slider(
337
+ # minimum=10,
338
+ # maximum=100,
339
+ # value=30,
340
+ # step=5,
341
+ # interactive=True,
342
+ # label="Top-k",
343
+ # )
344
+ # temp = gr.Slider(
345
+ # minimum=0.0,
346
+ # maximum=2.0,
347
+ # value=0.2,
348
+ # step=0.01,
349
+ # interactive=True,
350
+ # label="情感温度"
351
+ # )
352
+ # with gr.Column():
353
+ # system_prompt = gr.Textbox(label="系统提示词", placeholder="", value=SYSTEM_PROMPT, interactive=False)
354
+ # with gr.Row():
355
+ # gr.Markdown(
356
+ # """警告:该模型可能会生成事实上或道德上不正确的文本。NLPark和兮辞对此不承担任何责任。"""
357
+ # )
358
 
359
 
360
+ # # Pressing Enter
361
+ # submit_event = msg.submit(
362
+ # fn=User,
363
+ # inputs=[msg, chatbot],
364
+ # outputs=[msg, chatbot],
365
+ # queue=False,
366
+ # ).success(
367
+ # fn=Assistant,
368
+ # inputs=[
369
+ # chatbot,
370
+ # system_prompt,
371
+ # top_p,
372
+ # top_k,
373
+ # temp
374
+ # ],
375
+ # outputs=chatbot,
376
+ # queue=True,
377
+ # )
378
 
379
+ # # Pressing the button
380
+ # submit_click_event = submit.click(
381
+ # fn=User,
382
+ # inputs=[msg, chatbot],
383
+ # outputs=[msg, chatbot],
384
+ # queue=False,
385
+ # ).success(
386
+ # fn=Assistant,
387
+ # inputs=[
388
+ # chatbot,
389
+ # system_prompt,
390
+ # top_p,
391
+ # top_k,
392
+ # temp
393
+ # ],
394
+ # outputs=chatbot,
395
+ # queue=True,
396
+ # )
397
 
398
+ # # Stop generation
399
+ # stop.click(
400
+ # fn=None,
401
+ # inputs=None,
402
+ # outputs=None,
403
+ # cancels=[submit_event, submit_click_event],
404
+ # queue=False,
405
+ # )
406
 
407
+ # # Clear history
408
+ # clear.click(lambda: None, None, chatbot, queue=False)
409
 
410
+ # demo.queue(max_size=128, concurrency_count=1)
411
+ # demo.launch()