Orion-zhen's picture
Update app.py
88a5713 verified
raw
history blame
3.79 kB
import json
import spaces
import subprocess
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
CSS = """
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""
hf_hub_download(
repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
local_dir="./models",
)
llm = Llama(
model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
flash_attn=True,
n_ctx=8192,
n_batch=1024,
chat_format="chatml",
)
# Gradio 组件
output_md = gr.Markdown(
label="Answer",
value="Answer will be presented here",
latex_delimiters=[
{"left": "\\(", "right": "\\)", "display": True},
{"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True},
{"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True},
{"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True},
{"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True},
{"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True},
{"left": "\\[", "right": "\\]", "display": True},
],
elem_id="qwen-md",
show_copy_button=True,
container=True,
render=False,
)
target_lang = gr.Dropdown(
choices=["Chinese", "English"],
value="Chinese",
label="Output Language",
interactive=True,
render=False,
)
new_tokens = gr.Slider(
minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False
)
temperature = gr.Slider(
minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False
)
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False)
input_text = gr.Textbox(label="Ask math questions here", render=False)
submit_btn = gr.Button(value="Ask", render=False)
banner = gr.Markdown(value="""
# 📖 Qwen2.5-Math GGUF
This WebUI is based on Qwen2.5-Math-7B-Instruct-GGUF for mathematical reasoning. You can input texts of mathematical or arithmetic problems.
"""
)
# Gradio 函数
def respond(
input_text,
lang="Chinese",
max_tokens=2048,
temperature=0.5,
top_p=0.95,
):
if lang == "Chinese":
sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题"
else:
sys_msg = "You are a helpful math assistant. You should always provide your answer in English."
messages = [
{
"role": "system",
"content": sys_msg,
},
{"role": "user", "content": input_text},
]
response = ""
response = llm.create_chat_completion(
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
message_repl = ""
for chunk in response:
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
message_repl = message_repl + \
chunk['choices'][0]["delta"]["content"]
yield message_repl
with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
submit_btn.click(
fn=respond,
inputs=[input_text, target_lang, new_tokens, temperature, top_p],
outputs=output_md,
)
with gr.Column():
banner.render()
with gr.Row():
with gr.Column():
input_text.render()
target_lang.render()
new_tokens.render()
temperature.render()
top_p.render()
submit_btn.render()
with gr.Column():
output_md.render()
if __name__ == "__main__":
demo.launch()