Spaces:

Ayush0804
/

mathMentor

Runtime error

App Files Files Community

Ayush0804 commited on Dec 14, 2024

Commit

1f27f4d

verified ·

1 Parent(s): 781befc

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -150

app.py CHANGED Viewed

@@ -1,57 +1,41 @@
-import gradio as gr
-from transformers import AutoModelForCausalLM,AutoProcessor,pipeline
 from PIL import Image
-import os
-import tempfile
 import torch
-from pathlib import Path
-import secrets
-# Initialise Hugging Face LLM
-model_id="microsoft/Phi-3.5-vision-instruct"
-model=AutoModelForCausalLM.from_pretrained(
     model_id,
     trust_remote_code=True,
-    torch_dtype=torch.float16,)
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)
-math_messages=[]
-# Function for processing the image
-def process_image(image,should_convert=False):
-    '''
-    Saves the uploaded image or sketch and then extracts math-related descriptions using the model
-    '''
-    global math_messages
-    math_messages=[]
-    # create a temporary directory for saving images
-    uploaded_file_dir=os.environ.get("GRADIO_TEMP_DIR") or str(Path(tempfile.gettempdir())/"gradio")
-    os.makedirs(uploaded_file_dir,exist_ok=True)
-    # saves the uploaded image as a temporary file
-    name = f"tmp{secrets.token_hex(20)}.jpg"
-    filename = os.path.join(uploaded_file_dir, name)
-    # If the input was a sketch then convert into RGB format
-    if should_convert:
-        new_img = Image.new('RGB', size=(image.width, image.height), color=(255, 255, 255))
-        new_img.paste(image, (0, 0), mask=image)
-        image = new_img
-    # Saves the image in the temporary file
-    image.save(filename)
-    # Calling the model to process images
-    messages = [{
-        'role': 'system',
-        'content': [{'text': 'You are a helpful assistant.'}]
-    }, {
-        'role': 'user',
-        'content': [
-            {'image': f'file://{filename}'},
-            {'text': 'Please describe the math-related content in this image, ensuring that any LaTeX formulas are correctly transcribed. Non-mathematical details do not need to be described.'}
-        ]
-    }]
     prompt = processor.tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     # Process the input
-    inputs = processor(prompt, image, return_tensors="pt")
     # Generate the response
     generation_args = {
@@ -64,115 +48,116 @@ def process_image(image,should_convert=False):
     # Decode the response
     generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
     response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
     return response
-# Function to get math-response from the processed image
-def get_math_response(image_description,user_question):
-    global math_messages
-    if not math_messages:
-        math_messages.append({'role': 'system', 'content': 'You are a helpful math assistant.'})
-    math_messages = math_messages[:1]
-    if image_description is not None:
-        content = f'Image description: {image_description}\n\n'
-    else:
-        content = ''
-    query = f"{content}User question: {user_question}"
-    math_messages.append({'role': 'user', 'content': query})
-    pipe = pipeline("text-generation", model="deepseek-ai/DeepSeek-V2.5-1210", trust_remote_code=True)
-    response=pipe(math_messages)
-    print(response)
-    answer = None
-    for resp in response:
-        if resp.output is None:
-            continue
-        answer = resp.output.choices[0].message.content
-        yield answer.replace("\\", "\\\\")
-    print(f'query: {query}\nanswer: {answer}')
-    if answer is None:
-        math_messages.pop()
-    else:
-        math_messages.append({'role': 'assistant', 'content': answer})
-# creating the chatbot
-def math_chat_bot(image, sketchpad, question, state):
-    current_tab_index = state["tab_index"]
-    image_description = None
-    # Upload
-    if current_tab_index == 0:
-        if image is not None:
-            image_description = process_image(image)
-    # Sketch
-    elif current_tab_index == 1:
-        print(sketchpad)
-        if sketchpad and sketchpad["composite"]:
-            image_description = process_image(sketchpad["composite"], True)
-    yield from get_math_response(image_description, question)
-css = """
-#qwen-md .katex-display { display: inline; }
-#qwen-md .katex-display>.katex { display: inline; }
-#qwen-md .katex-display>.katex>.katex-html { display: inline; }
 """
-def tabs_select(e: gr.SelectData, _state):
-    _state["tab_index"] = e.index
-# 创建Gradio接口
-with gr.Blocks(css=css) as demo:
-    gr.HTML(
-            """\
-<center><font size=3>This WebUI is based on Qwen2-VL for OCR and Qwen2.5-Math for mathematical reasoning. You can input either images or texts of mathematical or arithmetic problems.</center>"""
-            )
-    state = gr.State({"tab_index": 0})
-    with gr.Row():
         with gr.Column():
-            with gr.Tabs() as input_tabs:
-                with gr.Tab("Upload"):
-                    input_image = gr.Image(type="pil", label="Upload"),
-                with gr.Tab("Sketch"):
-                    input_sketchpad = gr.Sketchpad(type="pil", label="Sketch", layers=False)
-            input_tabs.select(fn=tabs_select, inputs=[state])
-            input_text = gr.Textbox(label="input your question")
-            with gr.Row():
-                with gr.Column():
-                    clear_btn = gr.ClearButton(
-                        [*input_image, input_sketchpad, input_text])
-                with gr.Column():
-                    submit_btn = gr.Button("Submit", variant="primary")
         with gr.Column():
-            output_md = gr.Markdown(label="answer",
-                                    latex_delimiters=[{
-                                        "left": "\\(",
-                                        "right": "\\)",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{equation\}",
-                                        "right": "\\end\{equation\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{align\}",
-                                        "right": "\\end\{align\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{alignat\}",
-                                        "right": "\\end\{alignat\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{gather\}",
-                                        "right": "\\end\{gather\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\begin\{CD\}",
-                                        "right": "\\end\{CD\}",
-                                        "display": True
-                                    }, {
-                                        "left": "\\[",
-                                        "right": "\\]",
-                                        "display": True
-                                    }],
-                                    elem_id="qwen-md")
-        submit_btn.click(
-            fn=math_chat_bot,
-            inputs=[*input_image, input_sketchpad, input_text, state],
-            outputs=output_md)
-demo.launch()

+import gradio as gr
+import spaces
 from PIL import Image
+import os
 import torch
+from transformers import AutoModelForCausalLM, AutoProcessor
+import subprocess
+from io import BytesIO
+# Install flash-attn
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+# Load the model and processor
+model_id = "microsoft/Phi-3.5-vision-instruct"
+model = AutoModelForCausalLM.from_pretrained(
     model_id,
     trust_remote_code=True,
+    torch_dtype=torch.float16,
+    use_flash_attention_2=False,  # Explicitly disable Flash Attention 2
+)
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True, num_crops=16)
+@spaces.GPU(duration=120)
+def solve_math_problem(image):
+    # Move model to GPU for this function call
+    model.to('cuda')
+    # Prepare the input
+    messages = [
+        {"role": "user", "content": "<|image_1|>\nSolve this math problem step by step. Explain your reasoning clearly."},
+    ]
     prompt = processor.tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     # Process the input
+    inputs = processor(prompt, image, return_tensors="pt").to("cuda")
     # Generate the response
     generation_args = {
     # Decode the response
     generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
     response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+    # Move model back to CPU to free up GPU memory
+    model.to('cpu')
     return response
+# Custom CSS
+custom_css = """
+<style>
+    body {
+        font-family: 'Arial', sans-serif;
+        background-color: #f0f3f7;
+        margin: 0;
+        padding: 0;
+    }
+    .container {
+        max-width: 1200px;
+        margin: 0 auto;
+        padding: 20px;
+    }
+    .header {
+        background-color: #2c3e50;
+        color: white;
+        padding: 20px 0;
+        text-align: center;
+    }
+    .header h1 {
+        margin: 0;
+        font-size: 2.5em;
+    }
+    .main-content {
+        display: flex;
+        justify-content: space-between;
+        margin-top: 30px;
+    }
+    .input-section, .output-section {
+        width: 48%;
+        background-color: white;
+        border-radius: 8px;
+        padding: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .gr-button {
+        background-color: #27ae60;
+        color: white;
+        border: none;
+        padding: 10px 20px;
+        border-radius: 5px;
+        cursor: pointer;
+        transition: background-color 0.3s;
+    }
+    .gr-button:hover {
+        background-color: #2ecc71;
+    }
+    .examples-section {
+        margin-top: 30px;
+        background-color: white;
+        border-radius: 8px;
+        padding: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .examples-section h3 {
+        margin-top: 0;
+        color: #2c3e50;
+    }
+    .footer {
+        text-align: center;
+        margin-top: 30px;
+        color: #7f8c8d;
+    }
+</style>
 """
+# Create the Gradio interface
+with gr.Blocks(css=custom_css) as iface:
+    gr.HTML("""
+        <div class="header">
+            <h1>AI Math Equation Solver</h1>
+            <p>Upload an image of a math problem, and our AI will solve it step by step!</p>
+        </div>
+    """)
+    with gr.Row(equal_height=True):
         with gr.Column():
+            gr.HTML("<h2>Upload Your Math Problem</h2>")
+            input_image = gr.Image(type="pil", label="Upload Math Problem Image")
+            submit_btn = gr.Button("Solve Problem", elem_classes=["gr-button"])
         with gr.Column():
+            gr.HTML("<h2>Solution</h2>")
+            output_text = gr.Textbox(label="Step-by-step Solution", lines=10)
+    gr.HTML("<h3>Try These Examples</h3>")
+    examples = gr.Examples(
+        examples=[
+            os.path.join(os.path.dirname(__file__), "eqn1.png"),
+            os.path.join(os.path.dirname(__file__), "eqn2.png")
+        ],
+        inputs=input_image,
+        outputs=output_text,
+        fn=solve_math_problem,
+        cache_examples=True,
+    )
+    gr.HTML("""
+        <div class="footer">
+            <p>Powered by Gradio and AI - Created for educational purposes</p>
+        </div>
+    """)
+    submit_btn.click(fn=solve_math_problem, inputs=input_image, outputs=output_text)
+# Launch the app
+iface.launch()