Spaces:

bigcode
/

bigcodebench-interaction

Runtime error

App Files Files Community

Terry Zhuo commited on Oct 26, 2024

Commit

158d594

1 Parent(s): 380859d

update

Browse files

Files changed (1) hide show

app.py +11 -17

app.py CHANGED Viewed

@@ -26,35 +26,28 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
 API = HfApi(token=HF_TOKEN)
 Result = Tuple[str, List[bool]]
-def run_code(code: str) -> str:
     # Create string buffers to capture output
     stdout_buffer = io.StringIO()
     stderr_buffer = io.StringIO()
-    # Create a dictionary for local variables
-    local_dict = {}
     # Capture both stdout and stderr
     with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
         try:
             # Execute the code
-            exec(code, globals(), local_dict)
             # Get the output
             output = stdout_buffer.getvalue()
             errors = stderr_buffer.getvalue()
-            # If there's a return value in the last expression, capture it
-            last_line = code.strip().split('\n')[-1]
-            if not (last_line.startswith('print') or last_line.strip() == ''):
-                try:
-                    result = eval(last_line, globals(), local_dict)
-                    if result is not None:
-                        output += f"\n>>> {result}"
-                except:
-                    pass
             # Combine stdout and stderr
             result = output
             if errors:
@@ -64,13 +57,14 @@ def run_code(code: str) -> str:
             # Capture any execution errors
             result = f"Error: {str(e)}"
-    return result if result.strip() else "Code executed successfully (no output)"
 # Create the Gradio interface with better styling
 interface = gr.Interface(
     fn=run_code,
     inputs=[
         gr.Code(label="Python Code", language="python"),
     ],
     outputs=[
         gr.Textbox(label="Output")

 API = HfApi(token=HF_TOKEN)
 Result = Tuple[str, List[bool]]
+dataset = load_dataset("bigcode/bigcodebench-tool")
+tasks = {
+    _id: task["mixed_tool_implementation"]
+    for _id, task in dataset.items()
+}
+def run_code(code: str, _id: str) -> str:
     # Create string buffers to capture output
     stdout_buffer = io.StringIO()
     stderr_buffer = io.StringIO()
+    pre_code = tasks[_id]
     # Capture both stdout and stderr
     with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
         try:
             # Execute the code
+            exec(pre_code + code)
             # Get the output
             output = stdout_buffer.getvalue()
             errors = stderr_buffer.getvalue()
             # Combine stdout and stderr
             result = output
             if errors:
             # Capture any execution errors
             result = f"Error: {str(e)}"
+    return result
 # Create the Gradio interface with better styling
 interface = gr.Interface(
     fn=run_code,
     inputs=[
         gr.Code(label="Python Code", language="python"),
+        gr.Dropdown(label="Task", choices=list(tasks.keys())),
     ],
     outputs=[
         gr.Textbox(label="Output")