Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -132,7 +132,8 @@ def add_new_eval(
|
|
132 |
json.dumps({
|
133 |
"id": task_id,
|
134 |
"model_answer": answer,
|
135 |
-
"score": score
|
|
|
136 |
}) + "\n"
|
137 |
)
|
138 |
|
@@ -153,18 +154,23 @@ def add_new_eval(
|
|
153 |
token=TOKEN
|
154 |
)
|
155 |
|
|
|
|
|
|
|
|
|
|
|
156 |
eval_entry = {
|
157 |
"Model Name": model_name,
|
158 |
"Base Model": model_family,
|
159 |
"URL": url,
|
160 |
"Organization": organization,
|
161 |
-
"Accuracy":
|
162 |
"Accuracy (easy)": accuracy_easy,
|
163 |
"Accuracy (medium)": accuracy_medium,
|
164 |
"Accuracy (hard)": accuracy_hard,
|
165 |
-
"Answer rate":
|
166 |
-
"Precision":
|
167 |
-
"EM":
|
168 |
}
|
169 |
eval_results["test"] = eval_results["test"].add_item(eval_entry)
|
170 |
eval_results.push_to_hub(RESULTS_DATASET, config_name=YEAR_VERSION, token=TOKEN)
|
|
|
132 |
json.dumps({
|
133 |
"id": task_id,
|
134 |
"model_answer": answer,
|
135 |
+
"score": score,
|
136 |
+
"has_ans": has_ans
|
137 |
}) + "\n"
|
138 |
)
|
139 |
|
|
|
154 |
token=TOKEN
|
155 |
)
|
156 |
|
157 |
+
accuracy = float("{:.1f}".format(np.average([x["acc"] for x in scored_file]) * 100))
|
158 |
+
coverage = float("{:.1f}".format(np.average([x["has_ans"] for x in scored_file])))
|
159 |
+
em = float("{:.1f}".format(np.average([1 if x["acc"] == 1 else 0 for x in scored_file])))
|
160 |
+
precision = float("{:.1f}".format(np.average([x["acc"] for x in scored_file if x["has_ans"] == 1])))
|
161 |
+
|
162 |
eval_entry = {
|
163 |
"Model Name": model_name,
|
164 |
"Base Model": model_family,
|
165 |
"URL": url,
|
166 |
"Organization": organization,
|
167 |
+
"Accuracy": accuracy,
|
168 |
"Accuracy (easy)": accuracy_easy,
|
169 |
"Accuracy (medium)": accuracy_medium,
|
170 |
"Accuracy (hard)": accuracy_hard,
|
171 |
+
"Answer rate": coverage,
|
172 |
+
"Precision": precision,
|
173 |
+
"EM": em
|
174 |
}
|
175 |
eval_results["test"] = eval_results["test"].add_item(eval_entry)
|
176 |
eval_results.push_to_hub(RESULTS_DATASET, config_name=YEAR_VERSION, token=TOKEN)
|