Pmal commited on
Commit
df48fb9
·
verified ·
1 Parent(s): f0f445c

update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -22,7 +22,8 @@ def load_model():
22
  # "AI-MO/NuminaMath-72B-CoT",
23
  "AI-MO/NuminaMath-7B-CoT",
24
  device_map="auto", # Automatically map to available GPU
25
- offload_folder="offload" # Offload unused parts to disk
 
26
  )
27
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
28
 
@@ -32,7 +33,7 @@ model_pipeline = load_model()
32
  # Define the function to process inputs
33
  def solve_math_question(prompt):
34
  # Generate output using the model
35
- outputs = model_pipeline(prompt, max_new_tokens=1024, do_sample=False)
36
  return outputs[0]["generated_text"]
37
 
38
  # Define the Gradio interface
@@ -45,7 +46,7 @@ with gr.Blocks() as app:
45
  with gr.Row():
46
  question = gr.Textbox(
47
  label="Your Math Question",
48
- placeholder="E.g., For how many values of the constant k will the polynomial x^2 + kx + 36 have two distinct integer roots?",
49
  )
50
  output = gr.Textbox(label="Model Output")
51
 
 
22
  # "AI-MO/NuminaMath-72B-CoT",
23
  "AI-MO/NuminaMath-7B-CoT",
24
  device_map="auto", # Automatically map to available GPU
25
+ # offload_folder="offload" # Offload unused parts to disk
26
+ load_in_8bit=True # Load model in 8-bit precision
27
  )
28
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
29
 
 
33
  # Define the function to process inputs
34
  def solve_math_question(prompt):
35
  # Generate output using the model
36
+ outputs = model_pipeline(prompt, max_new_tokens=300, do_sample=False)
37
  return outputs[0]["generated_text"]
38
 
39
  # Define the Gradio interface
 
46
  with gr.Row():
47
  question = gr.Textbox(
48
  label="Your Math Question",
49
+ placeholder="what is 2+2?",
50
  )
51
  output = gr.Textbox(label="Model Output")
52