Spaces:
Runtime error
Runtime error
update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,8 @@ def load_model():
|
|
22 |
# "AI-MO/NuminaMath-72B-CoT",
|
23 |
"AI-MO/NuminaMath-7B-CoT",
|
24 |
device_map="auto", # Automatically map to available GPU
|
25 |
-
offload_folder="offload" # Offload unused parts to disk
|
|
|
26 |
)
|
27 |
return pipeline("text-generation", model=model, tokenizer=tokenizer)
|
28 |
|
@@ -32,7 +33,7 @@ model_pipeline = load_model()
|
|
32 |
# Define the function to process inputs
|
33 |
def solve_math_question(prompt):
|
34 |
# Generate output using the model
|
35 |
-
outputs = model_pipeline(prompt, max_new_tokens=
|
36 |
return outputs[0]["generated_text"]
|
37 |
|
38 |
# Define the Gradio interface
|
@@ -45,7 +46,7 @@ with gr.Blocks() as app:
|
|
45 |
with gr.Row():
|
46 |
question = gr.Textbox(
|
47 |
label="Your Math Question",
|
48 |
-
placeholder="
|
49 |
)
|
50 |
output = gr.Textbox(label="Model Output")
|
51 |
|
|
|
22 |
# "AI-MO/NuminaMath-72B-CoT",
|
23 |
"AI-MO/NuminaMath-7B-CoT",
|
24 |
device_map="auto", # Automatically map to available GPU
|
25 |
+
# offload_folder="offload" # Offload unused parts to disk
|
26 |
+
load_in_8bit=True # Load model in 8-bit precision
|
27 |
)
|
28 |
return pipeline("text-generation", model=model, tokenizer=tokenizer)
|
29 |
|
|
|
33 |
# Define the function to process inputs
|
34 |
def solve_math_question(prompt):
|
35 |
# Generate output using the model
|
36 |
+
outputs = model_pipeline(prompt, max_new_tokens=300, do_sample=False)
|
37 |
return outputs[0]["generated_text"]
|
38 |
|
39 |
# Define the Gradio interface
|
|
|
46 |
with gr.Row():
|
47 |
question = gr.Textbox(
|
48 |
label="Your Math Question",
|
49 |
+
placeholder="what is 2+2?",
|
50 |
)
|
51 |
output = gr.Textbox(label="Model Output")
|
52 |
|