terryyz commited on
Commit
af3bf93
1 Parent(s): a2c88c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -31
app.py CHANGED
@@ -50,12 +50,12 @@ def generate_command(
50
  return " ".join(command)
51
 
52
 
53
- def cleanup_previous_files(jsonl_file=None):
54
- for file in glob.glob("*.json") + glob.glob("*.log") + glob.glob("*.jsonl"):
 
55
  try:
56
- if jsonl_file is not None and file == jsonl_file:
57
- continue
58
- os.remove(file)
59
  except Exception as e:
60
  print(f"Error during cleanup of {file}: {e}")
61
 
@@ -91,15 +91,12 @@ def run_bigcodebench(command):
91
 
92
  def stream_logs(command, jsonl_file=None):
93
  global is_running
94
- local_filename = None
95
- if jsonl_file is not None:
96
- local_filename = os.path.basename(jsonl_file.name)
97
 
98
  if is_running:
99
  yield "A command is already running. Please wait for it to finish.\n"
100
  return
101
 
102
- cleanup_previous_files(local_filename)
103
  yield "Cleaned up previous files.\n"
104
 
105
  log_content = []
@@ -107,7 +104,6 @@ def stream_logs(command, jsonl_file=None):
107
  log_content.append(log_line)
108
  yield "".join(log_content)
109
 
110
-
111
  with gr.Blocks() as demo:
112
  gr.Markdown("# BigCodeBench Evaluator")
113
 
@@ -131,13 +127,9 @@ with gr.Blocks() as demo:
131
  command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
132
  with gr.Row():
133
  submit_btn = gr.Button("Run Evaluation")
134
- download_btn = gr.DownloadButton(label="Download Result", visible=False)
135
  log_output = gr.Textbox(label="Execution Logs", lines=20)
136
 
137
-
138
- def update_command(*args):
139
- return generate_command(*args)
140
-
141
  input_components = [
142
  jsonl_file, split, subset, save_pass_rate, parallel,
143
  min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
@@ -145,29 +137,31 @@ with gr.Blocks() as demo:
145
  ]
146
 
147
  for component in input_components:
148
- component.change(update_command, inputs=input_components, outputs=command_output)
 
149
 
150
- def start_evaluation(command, jsonl_file):
 
 
 
 
 
151
  for log in stream_logs(command, jsonl_file):
152
- yield log, gr.update(), gr.update()
153
 
154
  result_file = find_result_file()
155
  if result_file:
156
- print(f"Result file: {result_file}")
157
- return (gr.update(label="Evaluation completed. Result file found."),
158
- gr.Button(visible=False),
159
- gr.DownloadButton(label="Download Result", value=result_file, visible=True))
160
  else:
161
- return (gr.update(label="Evaluation completed. No result file found."),
162
- gr.Button("Run Evaluation", visible=True),
163
- gr.DownloadButton(visible=False))
164
-
165
  submit_btn.click(start_evaluation,
166
- inputs=[command_output, jsonl_file],
167
- outputs=[log_output, submit_btn, download_btn])
168
 
169
  if __name__ == "__main__":
170
- demo.queue(max_size=300).launch(server_name="0.0.0.0", server_port=7860)
171
  scheduler = BackgroundScheduler()
172
- scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h as backup in case automatic updates are not working
173
- scheduler.start()
 
50
  return " ".join(command)
51
 
52
 
53
+ def cleanup_previous_files(jsonl_file):
54
+ file_list = ['Dockerfile', 'app.py', 'README.md', os.path.basename(jsonl_file.name), "__pycache__"]
55
+ for file in glob.glob("*"):
56
  try:
57
+ if file not in file_list:
58
+ os.remove(file)
 
59
  except Exception as e:
60
  print(f"Error during cleanup of {file}: {e}")
61
 
 
91
 
92
  def stream_logs(command, jsonl_file=None):
93
  global is_running
 
 
 
94
 
95
  if is_running:
96
  yield "A command is already running. Please wait for it to finish.\n"
97
  return
98
 
99
+ cleanup_previous_files(jsonl_file)
100
  yield "Cleaned up previous files.\n"
101
 
102
  log_content = []
 
104
  log_content.append(log_line)
105
  yield "".join(log_content)
106
 
 
107
  with gr.Blocks() as demo:
108
  gr.Markdown("# BigCodeBench Evaluator")
109
 
 
127
  command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
128
  with gr.Row():
129
  submit_btn = gr.Button("Run Evaluation")
130
+ download_btn = gr.DownloadButton(label="Download Result")
131
  log_output = gr.Textbox(label="Execution Logs", lines=20)
132
 
 
 
 
 
133
  input_components = [
134
  jsonl_file, split, subset, save_pass_rate, parallel,
135
  min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
 
137
  ]
138
 
139
  for component in input_components:
140
+ component.change(generate_command, inputs=input_components, outputs=command_output)
141
+
142
 
143
+ def start_evaluation(command, jsonl_file, subset, split):
144
+ extra = subset + "_" if subset != "full" else ""
145
+ result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
146
+ with open(result_path, "w") as f:
147
+ f.write("")
148
+
149
  for log in stream_logs(command, jsonl_file):
150
+ yield log, gr.update(value=result_path, label=result_path), gr.update()
151
 
152
  result_file = find_result_file()
153
  if result_file:
154
+ return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
155
+ # gr.Button(visible=False)#,
156
+ # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
 
157
  else:
158
+ return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
159
+ # gr.Button("Run Evaluation", visible=True),
160
+ # gr.DownloadButton(visible=False))
 
161
  submit_btn.click(start_evaluation,
162
+ inputs=[command_output, jsonl_file, subset, split],
163
+ outputs=[log_output, download_btn])
164
 
165
  if __name__ == "__main__":
166
+ demo.queue(max_size=300).launch(share=True, server_name="0.0.0.0", server_port=7860)
167
  scheduler = BackgroundScheduler()