Spaces:

Orion-zhen
/

Qwen2.5-Math-7B-Instruct-GGUF

Running

App Files Files Community

Qwen2.5-Math-7B-Instruct-GGUF / app.py

Orion-zhen

Update app.py

88a5713 verified 3 months ago

raw

history blame

3.79 kB

	import json
	import spaces
	import subprocess
	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	CSS = """
	#qwen-md .katex-display { display: inline; }
	#qwen-md .katex-display>.katex { display: inline; }
	#qwen-md .katex-display>.katex>.katex-html { display: inline; }
	"""

	hf_hub_download(
	repo_id="bartowski/Qwen2.5-Math-7B-Instruct-GGUF",
	filename="Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
	local_dir="./models",
	)

	llm = Llama(
	model_path="models/Qwen2.5-Math-7B-Instruct-Q6_K_L.gguf",
	flash_attn=True,
	n_ctx=8192,
	n_batch=1024,
	chat_format="chatml",
	)

	# Gradio 组件
	output_md = gr.Markdown(
	label="Answer",
	value="Answer will be presented here",
	latex_delimiters=[
	{"left": "\\(", "right": "\\)", "display": True},
	{"left": "\\begin\{equation\}", "right": "\\end\{equation\}", "display": True},
	{"left": "\\begin\{align\}", "right": "\\end\{align\}", "display": True},
	{"left": "\\begin\{alignat\}", "right": "\\end\{alignat\}", "display": True},
	{"left": "\\begin\{gather\}", "right": "\\end\{gather\}", "display": True},
	{"left": "\\begin\{CD\}", "right": "\\end\{CD\}", "display": True},
	{"left": "\\[", "right": "\\]", "display": True},
	],
	elem_id="qwen-md",
	show_copy_button=True,
	container=True,
	render=False,
	)
	target_lang = gr.Dropdown(
	choices=["Chinese", "English"],
	value="Chinese",
	label="Output Language",
	interactive=True,
	render=False,
	)
	new_tokens = gr.Slider(
	minimum=1, maximum=8192, value=2048, step=1, label="Max new tokens", render=False
	)
	temperature = gr.Slider(
	minimum=0, maximum=2.0, value=0.5, step=0.1, label="Temperature", render=False
	)
	top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.05, label="Top P", render=False)
	input_text = gr.Textbox(label="Ask math questions here", render=False)
	submit_btn = gr.Button(value="Ask", render=False)
	banner = gr.Markdown(value="""
	# 📖 Qwen2.5-Math GGUF
	This WebUI is based on Qwen2.5-Math-7B-Instruct-GGUF for mathematical reasoning. You can input texts of mathematical or arithmetic problems.
	"""
	)


	# Gradio 函数
	def respond(
	input_text,
	lang="Chinese",
	max_tokens=2048,
	temperature=0.5,
	top_p=0.95,
	):
	if lang == "Chinese":
	sys_msg = "你是一个乐于助人的数学助手. 你使用中文回答问题"
	else:
	sys_msg = "You are a helpful math assistant. You should always provide your answer in English."
	messages = [
	{
	"role": "system",
	"content": sys_msg,
	},
	{"role": "user", "content": input_text},
	]

	response = ""
	response = llm.create_chat_completion(
	messages=messages,
	stream=True,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	)
	message_repl = ""
	for chunk in response:
	if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
	message_repl = message_repl + \
	chunk['choices'][0]["delta"]["content"]
	yield message_repl


	with gr.Blocks(css=CSS, theme="NoCrypt/miku") as demo:
	submit_btn.click(
	fn=respond,
	inputs=[input_text, target_lang, new_tokens, temperature, top_p],
	outputs=output_md,
	)
	with gr.Column():
	banner.render()
	with gr.Row():
	with gr.Column():
	input_text.render()
	target_lang.render()
	new_tokens.render()
	temperature.render()
	top_p.render()
	submit_btn.render()
	with gr.Column():
	output_md.render()

	if __name__ == "__main__":
	demo.launch()