terryyz commited on
Commit
8624024
·
verified ·
1 Parent(s): 45b26c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -44
app.py CHANGED
@@ -79,49 +79,52 @@ def read_logs():
79
  with open(log_file, "r") as f:
80
  return f.read()
81
 
82
- with gr.Blocks() as demo:
83
- gr.Markdown("# BigCodeBench Evaluation App")
84
-
85
- with gr.Row():
86
- jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
87
- split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
88
- subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="full")
89
-
90
- with gr.Row():
91
- save_pass_rate = gr.Checkbox(label="Save Pass Rate")
92
- parallel = gr.Number(label="Parallel (optional)", precision=0)
93
- min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
94
- max_as_limit = gr.Number(label="Max AS Limit", value=128*1024, precision=0)
95
-
96
- with gr.Row():
97
- max_data_limit = gr.Number(label="Max Data Limit", value=4*1024, precision=0)
98
- max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
99
- check_gt_only = gr.Checkbox(label="Check GT Only")
100
- no_gt = gr.Checkbox(label="No GT")
101
-
102
- command_output = gr.Textbox(label="Command", lines=2)
103
- submit_btn = gr.Button("Run Evaluation")
104
- log_output = gr.Textbox(label="Execution Logs", lines=10)
105
-
106
- def update_command(*args):
107
- return generate_command(*args)
108
-
109
- input_components = [
110
- jsonl_file, split, subset, save_pass_rate, parallel,
111
- min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
112
- check_gt_only, no_gt
113
- ]
114
-
115
- for component in input_components:
116
- component.change(update_command, inputs=input_components, outputs=command_output)
117
-
118
- def on_submit(command):
119
- threading.Thread(target=run_bigcodebench, args=(command,), daemon=True).start()
120
- return "Evaluation started. Please wait for the logs to update..."
121
-
122
- submit_btn.click(on_submit, inputs=[command_output], outputs=[log_output])
123
-
124
- demo.load(read_logs, None, log_output, every=1)
 
125
 
 
 
126
  if __name__ == "__main__":
127
- demo.queue().launch()
 
79
  with open(log_file, "r") as f:
80
  return f.read()
81
 
82
+ def run()
83
+ with gr.Blocks() as demo:
84
+ gr.Markdown("# BigCodeBench Evaluation App")
85
+
86
+ with gr.Row():
87
+ jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
88
+ split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
89
+ subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="full")
90
+
91
+ with gr.Row():
92
+ save_pass_rate = gr.Checkbox(label="Save Pass Rate")
93
+ parallel = gr.Number(label="Parallel (optional)", precision=0)
94
+ min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
95
+ max_as_limit = gr.Number(label="Max AS Limit", value=128*1024, precision=0)
96
+
97
+ with gr.Row():
98
+ max_data_limit = gr.Number(label="Max Data Limit", value=4*1024, precision=0)
99
+ max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
100
+ check_gt_only = gr.Checkbox(label="Check GT Only")
101
+ no_gt = gr.Checkbox(label="No GT")
102
+
103
+ command_output = gr.Textbox(label="Command", lines=2)
104
+ submit_btn = gr.Button("Run Evaluation")
105
+ log_output = gr.Textbox(label="Execution Logs", lines=10)
106
+
107
+ def update_command(*args):
108
+ return generate_command(*args)
109
+
110
+ input_components = [
111
+ jsonl_file, split, subset, save_pass_rate, parallel,
112
+ min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
113
+ check_gt_only, no_gt
114
+ ]
115
+
116
+ for component in input_components:
117
+ component.change(update_command, inputs=input_components, outputs=command_output)
118
+
119
+ def on_submit(command):
120
+ threading.Thread(target=run_bigcodebench, args=(command,), daemon=True).start()
121
+ return "Evaluation started. Please wait for the logs to update..."
122
+
123
+ submit_btn.click(on_submit, inputs=[command_output], outputs=[log_output])
124
+
125
+ demo.load(read_logs, None, log_output, every=1)
126
 
127
+ demo.queue().launch()
128
+
129
  if __name__ == "__main__":
130
+ run()