Sean Cho commited on
Commit
6313532
1 Parent(s): 150c99b

comment out evaluation fields not ready

Browse files
app.py CHANGED
@@ -77,8 +77,9 @@ BENCHMARK_COLS = [
77
  AutoEvalColumn.hellaswag,
78
  AutoEvalColumn.mmlu,
79
  AutoEvalColumn.truthfulqa,
80
- AutoEvalColumn.commongen,
81
- AutoEvalColumn.ethicalverification,
 
82
  ]
83
  ]
84
 
 
77
  AutoEvalColumn.hellaswag,
78
  AutoEvalColumn.mmlu,
79
  AutoEvalColumn.truthfulqa,
80
+ # TODO: Uncomment when we have results for these
81
+ # AutoEvalColumn.commongen,
82
+ # AutoEvalColumn.ethicalverification,
83
  ]
84
  ]
85
 
src/display_models/read_results.py CHANGED
@@ -9,14 +9,15 @@ import numpy as np
9
  from src.display_models.utils import AutoEvalColumn, make_clickable_model
10
 
11
  METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
12
- BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa:mc", "ko_commongen", "ethicalverification"]
13
  BENCH_TO_NAME = {
14
  "ko_arc_challenge": AutoEvalColumn.arc.name,
15
  "ko_hellaswag": AutoEvalColumn.hellaswag.name,
16
  "ko_mmlu": AutoEvalColumn.mmlu.name,
17
- "ko_truthfulqa:mc": AutoEvalColumn.truthfulqa.name,
18
- "ko_commongen": AutoEvalColumn.commongen.name,
19
- "ethicalverification": AutoEvalColumn.ethicalverification.name,
 
20
  }
21
 
22
 
 
9
  from src.display_models.utils import AutoEvalColumn, make_clickable_model
10
 
11
  METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
12
+ BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa_mc"] #, "ko_commongen", "ethicalverification"]
13
  BENCH_TO_NAME = {
14
  "ko_arc_challenge": AutoEvalColumn.arc.name,
15
  "ko_hellaswag": AutoEvalColumn.hellaswag.name,
16
  "ko_mmlu": AutoEvalColumn.mmlu.name,
17
+ "ko_truthfulqa_mc": AutoEvalColumn.truthfulqa.name,
18
+ # TODO: Uncomment when we have results for these
19
+ # "ko_commongen": AutoEvalColumn.commongen.name,
20
+ # "ethicalverification": AutoEvalColumn.ethicalverification.name,
21
  }
22
 
23
 
src/display_models/utils.py CHANGED
@@ -29,8 +29,9 @@ class AutoEvalColumn: # Auto evals column
29
  hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
30
  mmlu = ColumnContent("Ko-MMLU", "number", True)
31
  truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
32
- commongen = ColumnContent("Ko-CommonGen", "number", True)
33
- ethicalverification = ColumnContent("EthicalVerification", "number", True)
 
34
  model_type = ColumnContent("Type", "str", False)
35
  precision = ColumnContent("Precision", "str", False) # , True)
36
  license = ColumnContent("Hub License", "str", False)
 
29
  hellaswag = ColumnContent("Ko-HellaSwag", "number", True)
30
  mmlu = ColumnContent("Ko-MMLU", "number", True)
31
  truthfulqa = ColumnContent("Ko-TruthfulQA", "number", True)
32
+ # TODO: Uncomment when we have results for these
33
+ # commongen = ColumnContent("Ko-CommonGen", "number", True)
34
+ # ethicalverification = ColumnContent("EthicalVerification", "number", True)
35
  model_type = ColumnContent("Type", "str", False)
36
  precision = ColumnContent("Precision", "str", False) # , True)
37
  license = ColumnContent("Hub License", "str", False)