bigcodebench-evaluator-1

Running

App Files Files Community

terryyz commited on Aug 7, 2024

Commit

af3bf93

verified ·

1 Parent(s): a2c88c6

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -31

app.py CHANGED Viewed

@@ -50,12 +50,12 @@ def generate_command(
     return " ".join(command)
-def cleanup_previous_files(jsonl_file=None):
-    for file in glob.glob("*.json") + glob.glob("*.log") + glob.glob("*.jsonl"):
         try:
-            if jsonl_file is not None and file == jsonl_file:
-                continue
-            os.remove(file)
         except Exception as e:
             print(f"Error during cleanup of {file}: {e}")
@@ -91,15 +91,12 @@ def run_bigcodebench(command):
 def stream_logs(command, jsonl_file=None):
     global is_running
-    local_filename = None
-    if jsonl_file is not None:
-        local_filename = os.path.basename(jsonl_file.name)
     if is_running:
         yield "A command is already running. Please wait for it to finish.\n"
         return
-    cleanup_previous_files(local_filename)
     yield "Cleaned up previous files.\n"
     log_content = []
@@ -107,7 +104,6 @@ def stream_logs(command, jsonl_file=None):
         log_content.append(log_line)
         yield "".join(log_content)
 with gr.Blocks() as demo:
     gr.Markdown("# BigCodeBench Evaluator")
@@ -131,13 +127,9 @@ with gr.Blocks() as demo:
     command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
     with gr.Row():
         submit_btn = gr.Button("Run Evaluation")
-        download_btn = gr.DownloadButton(label="Download Result", visible=False)
     log_output = gr.Textbox(label="Execution Logs", lines=20)
-    def update_command(*args):
-        return generate_command(*args)
     input_components = [
         jsonl_file, split, subset, save_pass_rate, parallel,
         min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
@@ -145,29 +137,31 @@ with gr.Blocks() as demo:
     ]
     for component in input_components:
-        component.change(update_command, inputs=input_components, outputs=command_output)
-    def start_evaluation(command, jsonl_file):
         for log in stream_logs(command, jsonl_file):
-            yield log, gr.update(), gr.update()
         result_file = find_result_file()
         if result_file:
-            print(f"Result file: {result_file}")
-            return (gr.update(label="Evaluation completed. Result file found."),
-                    gr.Button(visible=False),
-                    gr.DownloadButton(label="Download Result", value=result_file, visible=True))
         else:
-            return (gr.update(label="Evaluation completed. No result file found."),
-                    gr.Button("Run Evaluation", visible=True),
-                    gr.DownloadButton(visible=False))
     submit_btn.click(start_evaluation,
-                 inputs=[command_output, jsonl_file],
-                 outputs=[log_output, submit_btn, download_btn])
 if __name__ == "__main__":
-    demo.queue(max_size=300).launch(server_name="0.0.0.0", server_port=7860)
     scheduler = BackgroundScheduler()
-    scheduler.add_job(restart_space, "interval", hours=3) # restarted every 3h as backup in case automatic updates are not working
-    scheduler.start()

     return " ".join(command)
+def cleanup_previous_files(jsonl_file):
+    file_list = ['Dockerfile', 'app.py', 'README.md', os.path.basename(jsonl_file.name), "__pycache__"]
+    for file in glob.glob("*"):
         try:
+            if file not in file_list:
+                os.remove(file)
         except Exception as e:
             print(f"Error during cleanup of {file}: {e}")
 def stream_logs(command, jsonl_file=None):
     global is_running
     if is_running:
         yield "A command is already running. Please wait for it to finish.\n"
         return
+    cleanup_previous_files(jsonl_file)
     yield "Cleaned up previous files.\n"
     log_content = []
         log_content.append(log_line)
         yield "".join(log_content)
 with gr.Blocks() as demo:
     gr.Markdown("# BigCodeBench Evaluator")
     command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
     with gr.Row():
         submit_btn = gr.Button("Run Evaluation")
+        download_btn = gr.DownloadButton(label="Download Result")
     log_output = gr.Textbox(label="Execution Logs", lines=20)
     input_components = [
         jsonl_file, split, subset, save_pass_rate, parallel,
         min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
     ]
     for component in input_components:
+        component.change(generate_command, inputs=input_components, outputs=command_output)
+    def start_evaluation(command, jsonl_file, subset, split):
+        extra = subset + "_" if subset != "full" else ""
+        result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
+        with open(result_path, "w") as f:
+            f.write("")
         for log in stream_logs(command, jsonl_file):
+            yield log, gr.update(value=result_path, label=result_path), gr.update()
         result_file = find_result_file()
         if result_file:
+            return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
+                    # gr.Button(visible=False)#,
+                    # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
         else:
+            return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
+                    # gr.Button("Run Evaluation", visible=True),
+                    # gr.DownloadButton(visible=False))
     submit_btn.click(start_evaluation,
+                 inputs=[command_output, jsonl_file, subset, split],
+                 outputs=[log_output, download_btn])
 if __name__ == "__main__":
+    demo.queue(max_size=300).launch(share=True, server_name="0.0.0.0", server_port=7860)
     scheduler = BackgroundScheduler()