Shi-Ci-app

Runtime error

App Files Files Community

Cran-May commited on Oct 27, 2024

Commit

cef1b0c

verified ·

1 Parent(s): dd432ec

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -70

app.py CHANGED Viewed

@@ -98,7 +98,7 @@ def respond(
     for output in stream:
         outputs += output
         token_count += len(output.split())
-        yield outputs
     end_time = time.time()
     latency = end_time - start_time
@@ -108,57 +108,11 @@ def respond(
 description = """<p><center>
 <a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
 Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
 </center></p>
 """
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Dropdown([
-                "llama-3.2-1b-instruct-q4_k_m.gguf"
-            ],
-            value="llama-3.2-1b-instruct-q4_k_m.gguf",
-            label="Model"
-        ),
-        gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
-1. Complex reasoning and problem-solving
-2. Multilingual understanding and generation
-3. Creative and analytical writing
-4. Code understanding and generation
-5. Task decomposition and step-by-step guidance
-6. Summarization and information extraction
-Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
-""", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=2.0,
-            value=0.9,
-            step=0.05,
-            label="Top-p",
-        ),
-        gr.Slider(
-            minimum=0,
-            maximum=100,
-            value=1,
-            step=1,
-            label="Top-k",
-        ),
-        gr.Slider(
-            minimum=0.0,
-            maximum=2.0,
-            value=1.1,
-            step=0.1,
-            label="Repetition penalty",
-        ),
-    ],
-    theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         body_background_fill_dark="#16141c",
         block_background_fill_dark="#16141c",
         block_border_width="1px",
@@ -170,31 +124,42 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
         background_fill_secondary_dark="#16141c",
         color_accent_soft_dark="transparent",
         code_background_fill_dark="#292733",
-    ),
-    title="Meta Llama 3.2 (1B)",
-    description=description,
-    chatbot=gr.Chatbot(
-        scale=1,
-        likeable=True,
-        show_copy_button=True
-    ),
-    examples=[
-        ["Hello! Can you introduce yourself?"],
-        ["What's the capital of France?"],
-        ["Can you explain the concept of photosynthesis?"],
-        ["Write a short story about a robot learning to paint."],
-        ["Explain the difference between machine learning and deep learning."],
-        ["Summarize the key points of climate change and its global impact."],
-        ["Explain quantum computing to a 10-year-old."],
-        ["Design a step-by-step meal plan for someone trying to lose weight and build muscle."]
-    ],
-    cache_examples=False,
-    autofocus=False,
-    concurrency_limit=None
-)
 if __name__ == "__main__":
     demo.launch()
 # 旧版代码--------------------------------
 # import gradio as gr

     for output in stream:
         outputs += output
         token_count += len(output.split())
+        yield outputs, history + [(message, outputs)]  # Update chatbot history
     end_time = time.time()
     latency = end_time - start_time
 description = """<p><center>
 <a href="https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF" target="_blank">[Meta Llama 3.2 (1B)]</a>
 Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for conversational dialogue use cases, including agentic retrieval and summarization tasks. It outperforms many open-source and closed chat models on industry benchmarks, and is intended for commercial and research use in multiple languages.
 </center></p>
 """
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         body_background_fill_dark="#16141c",
         block_background_fill_dark="#16141c",
         block_border_width="1px",
         background_fill_secondary_dark="#16141c",
         color_accent_soft_dark="transparent",
         code_background_fill_dark="#292733",
+    )) as demo:
+    chatbot = gr.Chatbot(scale=1, show_copy_button=True)  # 移除 likeable=True
+    message = gr.Textbox(label="Your message")
+    model_dropdown = gr.Dropdown(
+        ["llama-3.2-1b-instruct-q4_k_m.gguf"],
+        value="llama-3.2-1b-instruct-q4_k_m.gguf",
+        label="Model"
+    )
+    system_message = gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
+1. Complex reasoning and problem-solving
+2. Multilingual understanding and generation
+3. Creative and analytical writing
+4. Code understanding and generation
+5. Task decomposition and step-by-step guidance
+6. Summarization and information extraction
+Always strive for accuracy, clarity, and helpfulness in your responses. If you're unsure about something, express your uncertainty. Use the following format for your responses:
+""", label="System message")
+    max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max tokens")
+    temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
+    top_p = gr.Slider(minimum=0.1, maximum=2.0, value=0.9, step=0.05, label="Top-p")
+    top_k = gr.Slider(minimum=0, maximum=100, value=1, step=1, label="Top-k")
+    repeat_penalty = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
+    history = gr.State([])
+    def chat_fn(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
+        return respond(message, history, model, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty)
+    message.submit(chat_fn, [message, history, model_dropdown, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty], [chatbot, history])
+    gr.Markdown(description)
 if __name__ == "__main__":
     demo.launch()
 # 旧版代码--------------------------------
 # import gradio as gr