future-xy commited on
Commit
82a6ed1
1 Parent(s): a4a186c

fix result display bug

Browse files
Files changed (2) hide show
  1. src/backend/envs.py +1 -1
  2. src/display/utils.py +16 -18
src/backend/envs.py CHANGED
@@ -43,7 +43,7 @@ class Tasks(Enum):
43
 
44
  # task13 = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
45
 
46
- task14 = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT", 0)
47
 
48
  # task15 = Task("fever10", "acc", "FEVER", 16)
49
  # task15_1 = Task("fever11", "acc", "FEVER", 8)
 
43
 
44
  # task13 = Task("ifeval", "prompt_level_strict_acc", "IFEval", 0)
45
 
46
+ # task14 = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT", 0)
47
 
48
  # task15 = Task("fever10", "acc", "FEVER", 16)
49
  # task15_1 = Task("fever11", "acc", "FEVER", 8)
src/display/utils.py CHANGED
@@ -45,8 +45,8 @@ class Tasks(Enum):
45
  # halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
46
 
47
  # # XXX include me back at some point
48
- selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
49
- mmlu = Task("hendrycksTest", "acc", "MMLU")
50
 
51
 
52
  # These classes are for user facing column names,
@@ -63,11 +63,9 @@ class ColumnContent:
63
 
64
 
65
  auto_eval_column_dict = []
66
- auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "str", True, never_hidden=True)])
67
- auto_eval_column_dict.append(["hardware", ColumnContent, ColumnContent("Hardware", "str", True, never_hidden=True)])
68
  # Init
69
- # auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
70
- # auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
71
 
72
  # #Scores
73
  # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
@@ -75,18 +73,18 @@ auto_eval_column_dict.append(["hardware", ColumnContent, ColumnContent("Hardware
75
  for task in Tasks:
76
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
77
 
78
- # # Model information
79
- # auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
80
- # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
81
- # auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
82
- # auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
83
- # auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
84
- # auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
85
- # auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
86
- # auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
87
- # auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
88
- # # Dummy column for the search bar (hidden by the custom CSS)
89
- # auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
90
 
91
  # We use make dataclass to dynamically fill the scores from Tasks
92
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
45
  # halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
46
 
47
  # # XXX include me back at some point
48
+ # selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
49
+ mmlu = Task("mmlu", "acc", "MMLU/Acc")
50
 
51
 
52
  # These classes are for user facing column names,
 
63
 
64
 
65
  auto_eval_column_dict = []
 
 
66
  # Init
67
+ auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
68
+ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
69
 
70
  # #Scores
71
  # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
 
73
  for task in Tasks:
74
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
75
 
76
+ # Model information
77
+ auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
78
+ auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
79
+ auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
80
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
81
+ auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
82
+ auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
83
+ auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
84
+ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
85
+ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
86
+ # Dummy column for the search bar (hidden by the custom CSS)
87
+ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
88
 
89
  # We use make dataclass to dynamically fill the scores from Tasks
90
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)