facat commited on
Commit
33af91b
1 Parent(s): c1cde4c
Files changed (2) hide show
  1. .gitignore +2 -0
  2. tlem.py +20 -19
.gitignore CHANGED
@@ -2,3 +2,5 @@ __pycache__
2
  *.ju.py
3
  tests
4
 
 
 
 
2
  *.ju.py
3
  tests
4
 
5
+ README_files
6
+ .ipynb_checkpoints
tlem.py CHANGED
@@ -126,6 +126,7 @@ class Suite(EvaluationSuite):
126
 
127
  def get_suite(self, name) -> dict[str, Task]:
128
  chat = False
 
129
  match name:
130
  case _ if "chat" in name:
131
  chat = True
@@ -171,7 +172,6 @@ class Suite(EvaluationSuite):
171
  )
172
 
173
  case "open-leaderboard":
174
- suite = {}
175
  for name in [
176
  "arc",
177
  "hellaswag",
@@ -181,23 +181,24 @@ class Suite(EvaluationSuite):
181
  # "truthful_qa",
182
  "drop",
183
  ]:
184
- suite[name] = self.get_suite(name)
185
  case "tlem":
186
- suite = {
187
- name: self.get_suite(name)
188
- for name in [
189
- "arc",
190
- "hellaswag",
191
- "mmlu-chat",
192
- "winogrande",
193
- "gsm8k",
194
- "cmmlu-chat",
195
- "ceval-chat",
196
- "bbh",
197
- ]
198
- }
199
  case "all":
200
- suite = {name: self.get_suite(name) for name in self.supported_datasets}
 
201
  case _:
202
  raise NotImplementedError(
203
  f"{name} is not supported in {self.supported_datasets}"
@@ -205,8 +206,7 @@ class Suite(EvaluationSuite):
205
 
206
  if isinstance(suite, Task):
207
  suite = [suite]
208
- if isinstance(suite, list):
209
- suite = {name: suite}
210
 
211
  return suite
212
 
@@ -231,7 +231,8 @@ class Suite(EvaluationSuite):
231
  return suite
232
 
233
  def load(self, name):
234
- self.suite.update(self.get_suite(name))
 
235
  self.suite = self.drop_duplicates(self.suite)
236
  # return self
237
 
 
126
 
127
  def get_suite(self, name) -> dict[str, Task]:
128
  chat = False
129
+ suite={}
130
  match name:
131
  case _ if "chat" in name:
132
  chat = True
 
172
  )
173
 
174
  case "open-leaderboard":
 
175
  for name in [
176
  "arc",
177
  "hellaswag",
 
181
  # "truthful_qa",
182
  "drop",
183
  ]:
184
+ suite.update(self.get_suite(name))
185
  case "tlem":
186
+ for name in [
187
+ "arc",
188
+ "hellaswag",
189
+ "mmlu-chat",
190
+ "winogrande",
191
+ "gsm8k",
192
+ # "truthful_qa",
193
+ "cmmlu-chat",
194
+ "ceval-chat",
195
+ "bbh",
196
+ ]:
197
+ suite.update(self.get_suite(name))
198
+
199
  case "all":
200
+ for name in self.supported_datasets:
201
+ suite.update(self.get_suite(name))
202
  case _:
203
  raise NotImplementedError(
204
  f"{name} is not supported in {self.supported_datasets}"
 
206
 
207
  if isinstance(suite, Task):
208
  suite = [suite]
209
+ suite = {name: suite}
 
210
 
211
  return suite
212
 
 
231
  return suite
232
 
233
  def load(self, name):
234
+ sub_suite = self.get_suite(name)
235
+ self.suite.update(sub_suite)
236
  self.suite = self.drop_duplicates(self.suite)
237
  # return self
238