import gradio as gr from transformers import pipeline # Load the model and tokenizer # def load_model(): # # Load the NuminaMath-72B-CoT model # pipe = pipeline( # "text-generation", # model="AI-MO/NuminaMath-72B-CoT", # torch_dtype="auto", # device_map="auto" # Automatically map to available GPU/CPU # ) # return pipe from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline def load_model(): # tokenizer = AutoTokenizer.from_pretrained("AI-MO/NuminaMath-72B-CoT") tokenizer = AutoTokenizer.from_pretrained("AI-MO/NuminaMath-7B-CoT") model = AutoModelForCausalLM.from_pretrained( # "AI-MO/NuminaMath-72B-CoT", "AI-MO/NuminaMath-7B-CoT", device_map="auto", # Automatically map to available GPU # offload_folder="offload" # Offload unused parts to disk load_in_8bit=True # Load model in 8-bit precision ) return pipeline("text-generation", model=model, tokenizer=tokenizer) # Initialize the pipeline model_pipeline = load_model() # Define the function to process inputs def solve_math_question(prompt): # Generate output using the model outputs = model_pipeline(prompt, max_new_tokens=300, do_sample=False) return outputs[0]["generated_text"] # Define the Gradio interface with gr.Blocks() as app: gr.Markdown("# NuminaMath-72B-CoT Math Question Solver") gr.Markdown( "Ask a math-related question, and the model will attempt to solve it with reasoning!" ) with gr.Row(): question = gr.Textbox( label="Your Math Question", placeholder="what is 2+2?", ) output = gr.Textbox(label="Model Output") submit_button = gr.Button("Solve") submit_button.click(solve_math_question, inputs=question, outputs=output) # Launch the app app.launch()