File size: 3,793 Bytes
6a06ad0 471c5b5 6a06ad0 471c5b5 84b8bdd 6a06ad0 7b5a6e4 aa0a870 6a06ad0 7b5a6e4 6a06ad0 b18c263 aa0a870 6a06ad0 471c5b5 109cb5e fa23349 88a5713 109cb5e aa0a870 471c5b5 aa0a870 f22d1ca 471c5b5 7b5a6e4 aa0a870 7b5a6e4 aa0a870 471c5b5 b18c263 aa0a870 2c0d5fe 471c5b5 109cb5e f22d1ca 109cb5e 3e484e4 109cb5e 471c5b5 109cb5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import json
import spaces
import subprocess
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
CSS = """
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""
hf_hub_download(
repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
local_dir="./models",
)
llm = Llama(
model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
flash_attn=True,
n_ctx=8192,
n_batch=1024,
chat_format="chatml",
)
# Gradio 组件
output_md = gr.Markdown(
label="Answer",
value="Answer will be presented here",
latex_delimiters=[
{"left": "\\(", "right": "\\)", "display": True},
{"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True},
{"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True},
{"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True},
{"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True},
{"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True},
{"left": "\\[", "right": "\\]", "display": True},
],
elem_id="qwen-md",
show_copy_button=True,
container=True,
render=False,
)
target_lang = gr.Dropdown(
choices=["Chinese", "English"],
value="Chinese",
label="Output Language",
interactive=True,
render=False,
)
new_tokens = gr.Slider(
minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False
)
temperature = gr.Slider(
minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False
)
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False)
input_text = gr.Textbox(label="Ask math questions here", render=False)
submit_btn = gr.Button(value="Ask", render=False)
banner = gr.Markdown(value="""
# 📖 Qwen2.5-Math GGUF
This WebUI is based on Qwen2.5-Math-7B-Instruct-GGUF for mathematical reasoning. You can input texts of mathematical or arithmetic problems.
"""
)
# Gradio 函数
def respond(
input_text,
lang="Chinese",
max_tokens=2048,
temperature=0.5,
top_p=0.95,
):
if lang == "Chinese":
sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题"
else:
sys_msg = "You are a helpful math assistant. You should always provide your answer in English."
messages = [
{
"role": "system",
"content": sys_msg,
},
{"role": "user", "content": input_text},
]
response = ""
response = llm.create_chat_completion(
messages=messages,
stream=True,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
message_repl = ""
for chunk in response:
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
message_repl = message_repl + \
chunk['choices'][0]["delta"]["content"]
yield message_repl
with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
submit_btn.click(
fn=respond,
inputs=[input_text, target_lang, new_tokens, temperature, top_p],
outputs=output_md,
)
with gr.Column():
banner.render()
with gr.Row():
with gr.Column():
input_text.render()
target_lang.render()
new_tokens.render()
temperature.render()
top_p.render()
submit_btn.render()
with gr.Column():
output_md.render()
if __name__ == "__main__":
demo.launch() |