|
import json |
|
import spaces |
|
import subprocess |
|
import gradio as gr |
|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
|
|
CSS = """ |
|
#qwen-md .katex-display { display: inline; } |
|
#qwen-md .katex-display>.katex { display: inline; } |
|
#qwen-md .katex-display>.katex>.katex-html { display: inline; } |
|
""" |
|
|
|
hf_hub_download( |
|
repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF", |
|
filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf", |
|
local_dir="./models", |
|
) |
|
|
|
llm = Llama( |
|
model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf", |
|
flash_attn=True, |
|
n_ctx=8192, |
|
n_batch=1024, |
|
chat_format="chatml", |
|
) |
|
|
|
|
|
output_md = gr.Markdown( |
|
label="Answer", |
|
value="Answer will be presented here", |
|
latex_delimiters=[ |
|
{"left": "\\(", "right": "\\)", "display": True}, |
|
{"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True}, |
|
{"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True}, |
|
{"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True}, |
|
{"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True}, |
|
{"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True}, |
|
{"left": "\\[", "right": "\\]", "display": True}, |
|
], |
|
elem_id="qwen-md", |
|
show_copy_button=True, |
|
container=True, |
|
render=False, |
|
) |
|
target_lang = gr.Dropdown( |
|
choices=["Chinese", "English"], |
|
value="Chinese", |
|
label="Output Language", |
|
interactive=True, |
|
render=False, |
|
) |
|
new_tokens = gr.Slider( |
|
minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False |
|
) |
|
temperature = gr.Slider( |
|
minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False |
|
) |
|
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False) |
|
input_text = gr.Textbox(label="Ask math questions here", render=False) |
|
submit_btn = gr.Button(value="Ask", render=False) |
|
banner = gr.Markdown(value=""" |
|
# 📖 Qwen2.5-Math GGUF |
|
This WebUI is based on Qwen2.5-Math-7B-Instruct-GGUF for mathematical reasoning. You can input texts of mathematical or arithmetic problems. |
|
""" |
|
) |
|
|
|
|
|
|
|
def respond( |
|
input_text, |
|
lang="Chinese", |
|
max_tokens=2048, |
|
temperature=0.5, |
|
top_p=0.95, |
|
): |
|
if lang == "Chinese": |
|
sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题" |
|
else: |
|
sys_msg = "You are a helpful math assistant. You should always provide your answer in English." |
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": sys_msg, |
|
}, |
|
{"role": "user", "content": input_text}, |
|
] |
|
|
|
response = "" |
|
response = llm.create_chat_completion( |
|
messages=messages, |
|
stream=True, |
|
max_tokens=max_tokens, |
|
temperature=temperature, |
|
top_p=top_p, |
|
) |
|
message_repl = "" |
|
for chunk in response: |
|
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]: |
|
message_repl = message_repl + \ |
|
chunk['choices'][0]["delta"]["content"] |
|
yield message_repl |
|
|
|
|
|
with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo: |
|
submit_btn.click( |
|
fn=respond, |
|
inputs=[input_text, target_lang, new_tokens, temperature, top_p], |
|
outputs=output_md, |
|
) |
|
with gr.Column(): |
|
banner.render() |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_text.render() |
|
target_lang.render() |
|
new_tokens.render() |
|
temperature.render() |
|
top_p.render() |
|
submit_btn.render() |
|
with gr.Column(): |
|
output_md.render() |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |