HuatuoGPT-o1-7B-GGUF-Demo-Q4

Running

App Files Files Community

Lyte commited on 8 days ago

Commit

3f93878

•

1 Parent(s): ee69746

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -18

app.py CHANGED Viewed

@@ -25,45 +25,57 @@ LICENSE = """
 --- Apache 2.0 License ---
 """
-def generate_text(message, history, max_tokens=512, temperature=0.9, top_p=0.95):
     """Generate a response using the Llama model."""
-    temp = ""
     response = model.create_chat_completion(
-        messages=[{"role": "user", "content": message}],
         temperature=temperature,
         max_tokens=max_tokens,
         top_p=top_p,
         stream=True,
     )
     for streamed in response:
         delta = streamed["choices"][0].get("delta", {})
         text_chunk = delta.get("content", "")
-        temp += text_chunk
-        yield temp
 with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION)
-    chatbot = gr.ChatInterface(
-        generate_text,
-        title="FreedomIntelligence/HuatuoGPT-o1-7B | GGUF Demo",
-        description="Edit settings below if needed.",
         examples=[
             ["How many r's are in the word strawberry?"],
             ['How to stop a cough?'],
             ['How do I relieve feet pain?'],
         ],
-        cache_examples=False,
-        fill_height=True,
-        fill_width=True
     )
-    #with gr.Accordion("Adjust Parameters", open=False):
-    #    gr.Slider(minimum=512, maximum=4096, value=1024, step=1, label="Max Tokens")
-    #    gr.Slider(minimum=0.1, maximum=1.5, value=0.9, step=0.1, label="Temperature")
-    #    gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
-    #gr.Markdown(LICENSE)
 if __name__ == "__main__":
     demo.launch()

 --- Apache 2.0 License ---
 """
+def user(message, history):
+    return "", history + [{"role": "user", "content": message}]
+def generate_text(history, max_tokens=512, temperature=0.9, top_p=0.95):
     """Generate a response using the Llama model."""
+    messages = [{"role": item["role"], "content": item["content"]} for item in history[:-1]]
+    message = history[-1]['content']
     response = model.create_chat_completion(
+        messages=messages + [{"role": "user", "content": message}],
         temperature=temperature,
         max_tokens=max_tokens,
         top_p=top_p,
         stream=True,
     )
+    history.append({"role": "assistant", "content": ""})
     for streamed in response:
         delta = streamed["choices"][0].get("delta", {})
         text_chunk = delta.get("content", "")
+        history[-1]['content'] += text_chunk
+        yield history
 with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION)
+    chatbot = gr.Chatbot(type="messages")
+    msg = gr.Textbox()
+    clear = gr.Button("Clear")
+    with gr.Accordion("Adjust Parameters", open=False):
+        max_tokens = gr.Slider(minimum=512, maximum=4096, value=1024, step=1, label="Max Tokens")
+        temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.9, step=0.1, label="Temperature")
+        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
+    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+        generate_text, [chatbot, max_tokens, temperature, top_p], chatbot
+    )
+    clear.click(lambda: None, None, chatbot, queue=False)
+    gr.Examples(
         examples=[
             ["How many r's are in the word strawberry?"],
             ['How to stop a cough?'],
             ['How do I relieve feet pain?'],
         ],
+        inputs=msg,
+        label="Examples",
     )
+    gr.Markdown(LICENSE)
 if __name__ == "__main__":
     demo.launch()