Spaces:

Ayush0804
/

mathMentor

Runtime error

App Files Files Community

mathMentor / app.py

Ayush0804

Update app.py

781befc verified 4 months ago

raw

history blame

7.45 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM,AutoProcessor,pipeline
	from PIL import Image
	import os
	import tempfile
	import torch
	from pathlib import Path
	import secrets

	# Initialise Hugging Face LLM
	model_id="microsoft/Phi-3.5-vision-instruct"
	model=AutoModelForCausalLM.from_pretrained(
	model_id,
	trust_remote_code=True,
	torch_dtype=torch.float16,)

	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)
	math_messages=[]
	# Function for processing the image
	def process_image(image,should_convert=False):
	'''
	Saves the uploaded image or sketch and then extracts math-related descriptions using the model
	'''
	global math_messages
	math_messages=[]
	# create a temporary directory for saving images
	uploaded_file_dir=os.environ.get("GRADIO_TEMP_DIR") or str(Path(tempfile.gettempdir())/"gradio")
	os.makedirs(uploaded_file_dir,exist_ok=True)
	# saves the uploaded image as a temporary file
	name = f"tmp{secrets.token_hex(20)}.jpg"
	filename = os.path.join(uploaded_file_dir, name)
	# If the input was a sketch then convert into RGB format
	if should_convert:
	new_img = Image.new('RGB', size=(image.width, image.height), color=(255, 255, 255))
	new_img.paste(image, (0, 0), mask=image)
	image = new_img
	# Saves the image in the temporary file
	image.save(filename)
	# Calling the model to process images
	messages = [{
	'role': 'system',
	'content': [{'text': 'You are a helpful assistant.'}]
	}, {
	'role': 'user',
	'content': [
	{'image': f'file://{filename}'},
	{'text': 'Please describe the math-related content in this image, ensuring that any LaTeX formulas are correctly transcribed. Non-mathematical details do not need to be described.'}
	]
	}]
	prompt = processor.tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	# Process the input
	inputs = processor(prompt, image, return_tensors="pt")

	# Generate the response
	generation_args = {
	"max_new_tokens": 1000,
	"temperature": 0.2,
	"do_sample": True,
	}
	generate_ids = model.generate(inputs, eos_token_id=processor.tokenizer.eos_token_id, generation_args)

	# Decode the response
	generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
	response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
	return response

	# Function to get math-response from the processed image
	def get_math_response(image_description,user_question):
	global math_messages
	if not math_messages:
	math_messages.append({'role': 'system', 'content': 'You are a helpful math assistant.'})
	math_messages = math_messages[:1]
	if image_description is not None:
	content = f'Image description: {image_description}\n\n'
	else:
	content = ''
	query = f"{content}User question: {user_question}"
	math_messages.append({'role': 'user', 'content': query})
	pipe = pipeline("text-generation", model="deepseek-ai/DeepSeek-V2.5-1210", trust_remote_code=True)
	response=pipe(math_messages)
	print(response)
	answer = None
	for resp in response:
	if resp.output is None:
	continue
	answer = resp.output.choices[0].message.content
	yield answer.replace("\\", "\\\\")
	print(f'query: {query}\nanswer: {answer}')
	if answer is None:
	math_messages.pop()
	else:
	math_messages.append({'role': 'assistant', 'content': answer})
	# creating the chatbot
	def math_chat_bot(image, sketchpad, question, state):
	current_tab_index = state["tab_index"]
	image_description = None
	# Upload
	if current_tab_index == 0:
	if image is not None:
	image_description = process_image(image)
	# Sketch
	elif current_tab_index == 1:
	print(sketchpad)
	if sketchpad and sketchpad["composite"]:
	image_description = process_image(sketchpad["composite"], True)
	yield from get_math_response(image_description, question)

	css = """
	#qwen-md .katex-display { display: inline; }
	#qwen-md .katex-display>.katex { display: inline; }
	#qwen-md .katex-display>.katex>.katex-html { display: inline; }
	"""

	def tabs_select(e: gr.SelectData, _state):
	_state["tab_index"] = e.index


	# 创建Gradio接口
	with gr.Blocks(css=css) as demo:
	gr.HTML(
	"""\
	<center><font size=3>This WebUI is based on Qwen2-VL for OCR and Qwen2.5-Math for mathematical reasoning. You can input either images or texts of mathematical or arithmetic problems.</center>"""
	)
	state = gr.State({"tab_index": 0})
	with gr.Row():
	with gr.Column():
	with gr.Tabs() as input_tabs:
	with gr.Tab("Upload"):
	input_image = gr.Image(type="pil", label="Upload"),
	with gr.Tab("Sketch"):
	input_sketchpad = gr.Sketchpad(type="pil", label="Sketch", layers=False)
	input_tabs.select(fn=tabs_select, inputs=[state])
	input_text = gr.Textbox(label="input your question")
	with gr.Row():
	with gr.Column():
	clear_btn = gr.ClearButton(
	[*input_image, input_sketchpad, input_text])
	with gr.Column():
	submit_btn = gr.Button("Submit", variant="primary")
	with gr.Column():
	output_md = gr.Markdown(label="answer",
	latex_delimiters=[{
	"left": "\\(",
	"right": "\\)",
	"display": True
	}, {
	"left": "\\begin\{equation\}",
	"right": "\\end\{equation\}",
	"display": True
	}, {
	"left": "\\begin\{align\}",
	"right": "\\end\{align\}",
	"display": True
	}, {
	"left": "\\begin\{alignat\}",
	"right": "\\end\{alignat\}",
	"display": True
	}, {
	"left": "\\begin\{gather\}",
	"right": "\\end\{gather\}",
	"display": True
	}, {
	"left": "\\begin\{CD\}",
	"right": "\\end\{CD\}",
	"display": True
	}, {
	"left": "\\[",
	"right": "\\]",
	"display": True
	}],
	elem_id="qwen-md")
	submit_btn.click(
	fn=math_chat_bot,
	inputs=[*input_image, input_sketchpad, input_text, state],
	outputs=output_md)
	demo.launch()