numinatest / app.py
Pmal's picture
update app.py
df48fb9 verified
import gradio as gr
from transformers import pipeline
# Load the model and tokenizer
# def load_model():
# # Load the NuminaMath-72B-CoT model
# pipe = pipeline(
# "text-generation",
# model="AI-MO/NuminaMath-72B-CoT",
# torch_dtype="auto",
# device_map="auto" # Automatically map to available GPU/CPU
# )
# return pipe
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
def load_model():
# tokenizer = AutoTokenizer.from_pretrained("AI-MO/NuminaMath-72B-CoT")
tokenizer = AutoTokenizer.from_pretrained("AI-MO/NuminaMath-7B-CoT")
model = AutoModelForCausalLM.from_pretrained(
# "AI-MO/NuminaMath-72B-CoT",
"AI-MO/NuminaMath-7B-CoT",
device_map="auto", # Automatically map to available GPU
# offload_folder="offload" # Offload unused parts to disk
load_in_8bit=True # Load model in 8-bit precision
)
return pipeline("text-generation", model=model, tokenizer=tokenizer)
# Initialize the pipeline
model_pipeline = load_model()
# Define the function to process inputs
def solve_math_question(prompt):
# Generate output using the model
outputs = model_pipeline(prompt, max_new_tokens=300, do_sample=False)
return outputs[0]["generated_text"]
# Define the Gradio interface
with gr.Blocks() as app:
gr.Markdown("# NuminaMath-72B-CoT Math Question Solver")
gr.Markdown(
"Ask a math-related question, and the model will attempt to solve it with reasoning!"
)
with gr.Row():
question = gr.Textbox(
label="Your Math Question",
placeholder="what is 2+2?",
)
output = gr.Textbox(label="Model Output")
submit_button = gr.Button("Solve")
submit_button.click(solve_math_question, inputs=question, outputs=output)
# Launch the app
app.launch()