Spaces:
Restarting
on
CPU Upgrade
Restarting
on
CPU Upgrade
Sean Cho
commited on
Commit
•
9afcf15
1
Parent(s):
a86ccea
update evaluation columns
Browse files- app.py +2 -2
- src/display_models/read_results.py +3 -1
- src/display_models/utils.py +6 -4
app.py
CHANGED
@@ -258,8 +258,8 @@ NUMERIC_INTERVALS = {
|
|
258 |
"~3B": (1.5, 5),
|
259 |
"~7B": (6, 11),
|
260 |
"~13B": (12, 15),
|
261 |
-
"~35B": (16, 55),
|
262 |
-
"60B+": (55, 10000),
|
263 |
}
|
264 |
|
265 |
def filter_models(
|
|
|
258 |
"~3B": (1.5, 5),
|
259 |
"~7B": (6, 11),
|
260 |
"~13B": (12, 15),
|
261 |
+
# "~35B": (16, 55),
|
262 |
+
# "60B+": (55, 10000),
|
263 |
}
|
264 |
|
265 |
def filter_models(
|
src/display_models/read_results.py
CHANGED
@@ -9,12 +9,14 @@ import numpy as np
|
|
9 |
from src.display_models.utils import AutoEvalColumn, make_clickable_model
|
10 |
|
11 |
METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
|
12 |
-
BENCHMARKS = ["arc:challenge", "hellaswag", "hendrycksTest", "truthfulqa:mc"]
|
13 |
BENCH_TO_NAME = {
|
14 |
"arc:challenge": AutoEvalColumn.arc.name,
|
15 |
"hellaswag": AutoEvalColumn.hellaswag.name,
|
16 |
"hendrycksTest": AutoEvalColumn.mmlu.name,
|
17 |
"truthfulqa:mc": AutoEvalColumn.truthfulqa.name,
|
|
|
|
|
18 |
}
|
19 |
|
20 |
|
|
|
9 |
from src.display_models.utils import AutoEvalColumn, make_clickable_model
|
10 |
|
11 |
METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
|
12 |
+
BENCHMARKS = ["arc:challenge", "hellaswag", "hendrycksTest", "truthfulqa:mc", "commongen", "ethicalverification"]
|
13 |
BENCH_TO_NAME = {
|
14 |
"arc:challenge": AutoEvalColumn.arc.name,
|
15 |
"hellaswag": AutoEvalColumn.hellaswag.name,
|
16 |
"hendrycksTest": AutoEvalColumn.mmlu.name,
|
17 |
"truthfulqa:mc": AutoEvalColumn.truthfulqa.name,
|
18 |
+
"commongen": AutoEvalColumn.commongen.name,
|
19 |
+
"ethicalverification": AutoEvalColumn.ethicalverification.name,
|
20 |
}
|
21 |
|
22 |
|
src/display_models/utils.py
CHANGED
@@ -25,10 +25,12 @@ class AutoEvalColumn: # Auto evals column
|
|
25 |
model_type_symbol = ColumnContent("T", "str", True)
|
26 |
model = ColumnContent("Model", "markdown", True)
|
27 |
average = ColumnContent("Average ⬆️", "number", True)
|
28 |
-
arc = ColumnContent("ARC", "number", True)
|
29 |
-
hellaswag = ColumnContent("HellaSwag", "number", True)
|
30 |
-
mmlu = ColumnContent("MMLU", "number", True)
|
31 |
-
truthfulqa = ColumnContent("TruthfulQA", "number", True)
|
|
|
|
|
32 |
model_type = ColumnContent("Type", "str", False)
|
33 |
precision = ColumnContent("Precision", "str", False) # , True)
|
34 |
license = ColumnContent("Hub License", "str", False)
|
|
|
25 |
model_type_symbol = ColumnContent("T", "str", True)
|
26 |
model = ColumnContent("Model", "markdown", True)
|
27 |
average = ColumnContent("Average ⬆️", "number", True)
|
28 |
+
arc = ColumnContent("Ko-ARC", "number", True)
|
29 |
+
hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
|
30 |
+
mmlu = ColumnContent("Ko-MMLU", "number", True)
|
31 |
+
truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
|
32 |
+
commongen = ColumnContent("Ko-CommonGen", "number", True)
|
33 |
+
ethicalverification = ColumnContent("EthicalVerification", "number", True)
|
34 |
model_type = ColumnContent("Type", "str", False)
|
35 |
precision = ColumnContent("Precision", "str", False) # , True)
|
36 |
license = ColumnContent("Hub License", "str", False)
|