File size: 4,550 Bytes
55f4d70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
import subprocess
import sys
import os
import threading
import time
class Logger:
def __init__(self, filename):
self.terminal = sys.stdout
self.log = open(filename, "w")
def write(self, message):
self.terminal.write(message)
self.log.write(message)
self.log.flush()
def flush(self):
self.terminal.flush()
self.log.flush()
def isatty(self):
return False
log_file = "bigcodebench_output.log"
sys.stdout = Logger(log_file)
default_command = "bigcodebench.evaluate"
is_running = False
def generate_command(
jsonl_file, split, subset, save_pass_rate, parallel,
min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
check_gt_only, no_gt
):
command = [default_command]
if jsonl_file is not None:
samples = os.path.basename(jsonl_file.name)
command.extend(["--samples", samples])
command.extend(["--split", split, "--subset", subset])
if save_pass_rate:
command.append("--save_pass_rate")
if parallel is not None and parallel != 0:
command.extend(["--parallel", str(int(parallel))])
command.extend([
"--min-time-limit", str(min_time_limit),
"--max-as-limit", str(int(max_as_limit)),
"--max-data-limit", str(int(max_data_limit)),
"--max-stack-limit", str(int(max_stack_limit))
])
if check_gt_only:
command.append("--check-gt-only")
if no_gt:
command.append("--no-gt")
return " ".join(command)
def run_bigcodebench(command):
global is_running
is_running = True
print(f"Executing command: {command}")
process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
for line in process.stdout:
print(line, end='')
process.wait()
if process.returncode != 0:
print(f"Error: Command exited with status {process.returncode}")
cleanup_command = "pids=$(ps -u $(id -u) -o pid,comm | grep 'bigcodebench' | awk '{print $1}'); if [ -n \"$pids\" ]; then echo $pids | xargs -r kill; fi; rm -rf /tmp/*"
subprocess.run(cleanup_command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
is_running = False
def read_logs():
with open(log_file, "r") as f:
return f.read()
with gr.Blocks() as demo:
gr.Markdown("# BigCodeBench Evaluation App")
with gr.Row():
jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="full")
with gr.Row():
save_pass_rate = gr.Checkbox(label="Save Pass Rate")
parallel = gr.Number(label="Parallel (optional)", precision=0)
min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
max_as_limit = gr.Number(label="Max AS Limit", value=128*1024, precision=0)
with gr.Row():
max_data_limit = gr.Number(label="Max Data Limit", value=4*1024, precision=0)
max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
check_gt_only = gr.Checkbox(label="Check GT Only")
no_gt = gr.Checkbox(label="No GT")
command_output = gr.Textbox(label="Command", lines=2, value=default_command, interactive=False)
submit_btn = gr.Button("Run Evaluation")
log_output = gr.Textbox(label="Execution Logs", lines=10)
def update_command(*args):
return generate_command(*args)
input_components = [
jsonl_file, split, subset, save_pass_rate, parallel,
min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
check_gt_only, no_gt
]
for component in input_components:
component.change(update_command, inputs=input_components, outputs=command_output)
def on_submit(command):
global is_running
if is_running:
return "A command is already running. Please wait for it to finish."
def run_and_update():
run_bigcodebench(command)
return read_logs()
return gr.update(value="Evaluation started. Please wait for the logs to update..."), gr.update(value=run_and_update)
submit_btn.click(on_submit, inputs=[command_output], outputs=[log_output, log_output])
if __name__ == "__main__":
demo.queue().launch() |