Shi-Ci-app

Runtime error

App Files Files Community

Cran-May commited on Oct 27, 2024

Commit

5264058

verified ·

1 Parent(s): cef1b0c

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -27,6 +27,11 @@ hf_hub_download(
 def get_messages_formatter_type(model_name):
     return MessagesFormatterType.LLAMA_3
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -97,8 +102,9 @@ def respond(
     outputs = ""
     for output in stream:
         outputs += output
-        token_count += len(output.split())
-        yield outputs, history + [(message, outputs)]  # Update chatbot history
     end_time = time.time()
     latency = end_time - start_time
@@ -126,11 +132,11 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet"
         code_background_fill_dark="#292733",
     )) as demo:
-    chatbot = gr.Chatbot(scale=1, show_copy_button=True)  # 移除 likeable=True
     message = gr.Textbox(label="Your message")
     model_dropdown = gr.Dropdown(
-        ["llama-3.2-1b-instruct-q4_k_m.gguf"],
-        value="llama-3.2-1b-instruct-q4_k_m.gguf",
         label="Model"
     )
     system_message = gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
@@ -150,10 +156,10 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
     history = gr.State([])
-    def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
-        return respond(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
-    message.submit(chat_fn, [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty], [chatbot, history])
     gr.Markdown(description)

 def get_messages_formatter_type(model_name):
     return MessagesFormatterType.LLAMA_3
+def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
+    history_list = history or []
+    generator = respond(message, history_list, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
+    return generator, history_list
 def respond(
     message,
     history: list[tuple[str, str]],
     outputs = ""
     for output in stream:
         outputs += output
+        token_count += len(output.split())
+        new_history = history + [(message, outputs)]
+        yield new_history  # 只需要yield更新后的历史记录
     end_time = time.time()
     latency = end_time - start_time
         code_background_fill_dark="#292733",
     )) as demo:
+    chatbot = gr.Chatbot(scale=1, show_copy_button=True)
     message = gr.Textbox(label="Your message")
     model_dropdown = gr.Dropdown(
+        ["openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf"],  # 更新为实际的模型文件名
+        value="openbuddy-llama3.2-3b-v23.2-131k-q5_k_m-imat.gguf",
         label="Model"
     )
     system_message = gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
     history = gr.State([])
+    message.submit(
+        chat_fn,
+        [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty],
+        [chatbot, history])
     gr.Markdown(description)