File size: 3,793 Bytes
6a06ad0
 
 
471c5b5
6a06ad0
 
471c5b5
84b8bdd
 
 
 
 
6a06ad0
 
 
7b5a6e4
aa0a870
6a06ad0
 
 
7b5a6e4
6a06ad0
 
b18c263
aa0a870
6a06ad0
471c5b5
109cb5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa23349
88a5713
 
 
109cb5e
 
 
aa0a870
471c5b5
aa0a870
f22d1ca
 
 
 
471c5b5
7b5a6e4
 
 
 
aa0a870
 
 
7b5a6e4
aa0a870
 
 
471c5b5
 
b18c263
aa0a870
 
 
 
 
 
2c0d5fe
 
 
 
 
 
471c5b5
 
109cb5e
f22d1ca
 
 
 
 
109cb5e
3e484e4
109cb5e
 
 
 
 
 
 
 
 
 
471c5b5
 
109cb5e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import json
import spaces
import subprocess
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

CSS = """
#qwen-md .katex-display { display: inline; }
#qwen-md .katex-display>.katex { display: inline; }
#qwen-md .katex-display>.katex>.katex-html { display: inline; }
"""

hf_hub_download(
    repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
    filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
    local_dir="./models",
)

llm = Llama(
    model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
    flash_attn=True,
    n_ctx=8192,
    n_batch=1024,
    chat_format="chatml",
)

# Gradio 组件
output_md = gr.Markdown(
    label="Answer",
    value="Answer will be presented here",
    latex_delimiters=[
        {"left": "\\(", "right": "\\)", "display": True},
        {"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True},
        {"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True},
        {"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True},
        {"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True},
        {"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True},
        {"left": "\\[", "right": "\\]", "display": True},
    ],
    elem_id="qwen-md",
    show_copy_button=True,
    container=True,
    render=False,
)
target_lang = gr.Dropdown(
    choices=["Chinese", "English"],
    value="Chinese",
    label="Output Language",
    interactive=True,
    render=False,
)
new_tokens = gr.Slider(
    minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False
)
temperature = gr.Slider(
    minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False
)
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False)
input_text = gr.Textbox(label="Ask math questions here", render=False)
submit_btn = gr.Button(value="Ask", render=False)
banner = gr.Markdown(value="""
# 📖 Qwen2.5-Math GGUF
This WebUI is based on Qwen2.5-Math-7B-Instruct-GGUF for mathematical reasoning. You can input texts of mathematical or arithmetic problems.
"""
)


# Gradio 函数
def respond(
    input_text,
    lang="Chinese",
    max_tokens=2048,
    temperature=0.5,
    top_p=0.95,
):
    if lang == "Chinese":
        sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题"
    else:
        sys_msg = "You are a helpful math assistant. You should always provide your answer in English."
    messages = [
        {
            "role": "system",
            "content": sys_msg,
        },
        {"role": "user", "content": input_text},
    ]

    response = ""
    response = llm.create_chat_completion(
        messages=messages,
        stream=True,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
    )
    message_repl = ""
    for chunk in response:
        if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
            message_repl = message_repl + \
                chunk['choices'][0]["delta"]["content"]
        yield message_repl


with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
    submit_btn.click(
        fn=respond,
        inputs=[input_text, target_lang, new_tokens, temperature, top_p],
        outputs=output_md,
    )
    with gr.Column():
        banner.render()
        with gr.Row():
            with gr.Column():
                input_text.render()
                target_lang.render()
                new_tokens.render()
                temperature.render()
                top_p.render()
                submit_btn.render()
            with gr.Column():
                output_md.render()

if __name__ == "__main__":
    demo.launch()