Terry Zhuo
commited on
Commit
•
5d7ffc1
1
Parent(s):
f71fb26
update
Browse files
app.py
CHANGED
@@ -156,7 +156,7 @@ def evaluate(
|
|
156 |
if "solution" in sample
|
157 |
else problems[task_id]["complete_prompt"] + sample["completion"]
|
158 |
)
|
159 |
-
if "
|
160 |
solution = problems[task_id]["code_prompt"] + "\n pass\n" + solution
|
161 |
remainings.add(sample["_identifier"])
|
162 |
args = (
|
@@ -223,7 +223,7 @@ def evaluate(
|
|
223 |
pass_at_k["model"] = os.path.basename(samples).split("--bigcodebench-")[0]
|
224 |
pass_at_k["split"] = split
|
225 |
pass_at_k["subset"] = subset
|
226 |
-
pass_at_k["calibrated"] = "
|
227 |
pass_at_k["gt_pass_rate"] = gt_pass_rate
|
228 |
pass_at_k["failed_tasks"] = failed_tasks
|
229 |
|
|
|
156 |
if "solution" in sample
|
157 |
else problems[task_id]["complete_prompt"] + sample["completion"]
|
158 |
)
|
159 |
+
if "sanitized_calibrated" in samples:
|
160 |
solution = problems[task_id]["code_prompt"] + "\n pass\n" + solution
|
161 |
remainings.add(sample["_identifier"])
|
162 |
args = (
|
|
|
223 |
pass_at_k["model"] = os.path.basename(samples).split("--bigcodebench-")[0]
|
224 |
pass_at_k["split"] = split
|
225 |
pass_at_k["subset"] = subset
|
226 |
+
pass_at_k["calibrated"] = "sanitized_calibrated" in samples
|
227 |
pass_at_k["gt_pass_rate"] = gt_pass_rate
|
228 |
pass_at_k["failed_tasks"] = failed_tasks
|
229 |
|