Bram Vanroy commited on
Commit
809ba3d
·
1 Parent(s): 9739433

improve missing repr

Browse files
Files changed (1) hide show
  1. app.py +17 -12
app.py CHANGED
@@ -38,17 +38,18 @@ class Result:
38
  model_type: Literal["pretrained", "fine-tuned", "instruction-tuned", "RL-tuned"]
39
  dutch_coverage: Literal["none", "pretrained", "fine-tuned"]
40
  num_parameters: int
41
- arc: float = field(default=0.0)
42
- average: float = field(default=0.0, init=False)
43
- hellaswag: float = field(default=0.0)
44
- mmlu: float = field(default=0.0)
45
- truthfulqa: float = field(default=0.0)
46
  num_parameters_kmb: str = field(init=False)
47
 
48
  def __post_init__(self):
49
  if self.model_type not in ["pretrained", "fine-tuned", "instruction-tuned", "RL-tuned", "not-given"]:
50
  raise ValueError(
51
- f"Model type {self.model_type} must be one of 'pretrained', 'fine-tuned', 'instruction-tuned', 'RL-tuned', 'not-given"
 
52
  )
53
  if self.dutch_coverage not in ["none", "pretrained", "fine-tuned", "not-given"]:
54
  raise ValueError(
@@ -60,7 +61,10 @@ class Result:
60
  if task_name not in field_names:
61
  raise ValueError(f"Task name {task_name} not found in Result class fields so cannot create DataFrame")
62
 
63
- self.average = (self.arc + self.hellaswag + self.mmlu + self.truthfulqa) / 4
 
 
 
64
  self.num_parameters_kmb = convert_number_to_kmb(self.num_parameters)
65
 
66
 
@@ -145,23 +149,22 @@ class ResultSet:
145
  df = pd.DataFrame(data)
146
  df = df.sort_values(by=self.column_names["average"], ascending=False)
147
  number_cols = [col for attr, col in self.column_names.items() if attr in TASK_METRICS or attr == "average"]
148
- styler = df.style.format("{:.2f}", subset=number_cols)
149
 
150
  def highlight_max(col):
151
  return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
152
 
153
  styler = styler.apply(highlight_max, axis=0, subset=number_cols)
154
-
155
  num_params_col = self.column_names["num_parameters"]
156
  styler = styler.format(convert_number_to_kmb, subset=num_params_col)
157
-
158
  styler = styler.hide()
159
  return styler
160
 
161
  @cached_property
162
  def latex_df(self) -> Styler:
163
  number_cols = [col for attr, col in self.column_names.items() if attr in TASK_METRICS or attr == "average"]
164
- styler = self.df.style.format("{:.2f}", subset=number_cols)
165
 
166
  def highlight_max(col):
167
  return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
@@ -169,6 +172,7 @@ class ResultSet:
169
  styler = styler.apply(highlight_max, axis=0, subset=number_cols)
170
  num_params_col = self.column_names["num_parameters"]
171
  styler = styler.format(convert_number_to_kmb, subset=num_params_col)
 
172
  styler = styler.hide()
173
  return styler
174
 
@@ -244,7 +248,8 @@ with gr.Blocks() as demo:
244
 
245
  gr.Markdown(
246
  f"## Leaderboard\nOnly representative for the Dutch version (`*_nl`) of the benchmarks!"
247
- " All models have been benchmarked in 8-bit."
 
248
  )
249
 
250
  results = collect_results()
 
38
  model_type: Literal["pretrained", "fine-tuned", "instruction-tuned", "RL-tuned"]
39
  dutch_coverage: Literal["none", "pretrained", "fine-tuned"]
40
  num_parameters: int
41
+ arc: float = field(default=np.nan)
42
+ average: float = field(default=np.nan, init=False)
43
+ hellaswag: float = field(default=np.nan)
44
+ mmlu: float = field(default=np.nan)
45
+ truthfulqa: float = field(default=np.nan)
46
  num_parameters_kmb: str = field(init=False)
47
 
48
  def __post_init__(self):
49
  if self.model_type not in ["pretrained", "fine-tuned", "instruction-tuned", "RL-tuned", "not-given"]:
50
  raise ValueError(
51
+ f"Model type {self.model_type} must be one of 'pretrained', 'fine-tuned',"
52
+ f" 'instruction-tuned', 'RL-tuned', 'not-given"
53
  )
54
  if self.dutch_coverage not in ["none", "pretrained", "fine-tuned", "not-given"]:
55
  raise ValueError(
 
61
  if task_name not in field_names:
62
  raise ValueError(f"Task name {task_name} not found in Result class fields so cannot create DataFrame")
63
 
64
+ if any([np.isnan(getattr(self, task_name)) for task_name in TASK_METRICS]):
65
+ self.average = np.nan
66
+ else:
67
+ self.average = sum([getattr(self, task_name) for task_name in TASK_METRICS]) / 4
68
  self.num_parameters_kmb = convert_number_to_kmb(self.num_parameters)
69
 
70
 
 
149
  df = pd.DataFrame(data)
150
  df = df.sort_values(by=self.column_names["average"], ascending=False)
151
  number_cols = [col for attr, col in self.column_names.items() if attr in TASK_METRICS or attr == "average"]
152
+ styler = df.style.format("{:.2f}", subset=number_cols, na_rep="<missing>")
153
 
154
  def highlight_max(col):
155
  return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
156
 
157
  styler = styler.apply(highlight_max, axis=0, subset=number_cols)
 
158
  num_params_col = self.column_names["num_parameters"]
159
  styler = styler.format(convert_number_to_kmb, subset=num_params_col)
160
+ styler.set_caption("Leaderboard on Dutch benchmarks.")
161
  styler = styler.hide()
162
  return styler
163
 
164
  @cached_property
165
  def latex_df(self) -> Styler:
166
  number_cols = [col for attr, col in self.column_names.items() if attr in TASK_METRICS or attr == "average"]
167
+ styler = self.df.style.format("{:.2f}", subset=number_cols, na_rep="<missing>")
168
 
169
  def highlight_max(col):
170
  return np.where(col == np.nanmax(col.to_numpy()), "font-weight: bold;", None)
 
172
  styler = styler.apply(highlight_max, axis=0, subset=number_cols)
173
  num_params_col = self.column_names["num_parameters"]
174
  styler = styler.format(convert_number_to_kmb, subset=num_params_col)
175
+ styler.set_caption("Leaderboard on Dutch benchmarks.")
176
  styler = styler.hide()
177
  return styler
178
 
 
248
 
249
  gr.Markdown(
250
  f"## Leaderboard\nOnly representative for the Dutch version (`*_nl`) of the benchmarks!"
251
+ " All models have been benchmarked in 8-bit. `<missing>` values indicate that those benchmarks are still"
252
+ " pending."
253
  )
254
 
255
  results = collect_results()