yentinglin commited on
Commit
be645c2
1 Parent(s): 9fc574b
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +6 -6
src/leaderboard/read_evals.py CHANGED
@@ -44,7 +44,7 @@ class EvalResult:
44
  precision = Precision.from_str(config.get("model_dtype"))
45
 
46
  # Get model and org
47
- org_and_model = config.get("model_name", config.get("model_args", None))
48
  org_and_model = org_and_model.split("/", 1)
49
 
50
  if len(org_and_model) == 1:
@@ -66,7 +66,7 @@ class EvalResult:
66
  if architectures:
67
  architecture = ";".join(architectures)
68
 
69
- print(data["results"])
70
  # Extract results available in this file (some results are split in several files)
71
  results = {}
72
  for task in Tasks:
@@ -74,7 +74,7 @@ class EvalResult:
74
 
75
  # We average all scores of a given metric (not all metrics are present in all files)
76
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
77
- print(f"{task}: {accs}")
78
  if accs.size == 0 or any([acc is None for acc in accs]):
79
  continue
80
 
@@ -177,12 +177,12 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
177
  eval_results = {}
178
  for model_result_filepath in model_result_filepaths:
179
  # Creation of result
180
- print(f"Model result filepath: {model_result_filepath}")
181
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
182
- print(eval_result.results)
183
  # print(eval_result)
184
  eval_result.update_with_request_file(requests_path)
185
- print(eval_result.results)
186
 
187
  # Store results of same eval together
188
  eval_name = eval_result.eval_name
 
44
  precision = Precision.from_str(config.get("model_dtype"))
45
 
46
  # Get model and org
47
+ org_and_model = data.get("model_name", config.get("model_name", config.get("model_args", None)))
48
  org_and_model = org_and_model.split("/", 1)
49
 
50
  if len(org_and_model) == 1:
 
66
  if architectures:
67
  architecture = ";".join(architectures)
68
 
69
+ # print(data["results"])
70
  # Extract results available in this file (some results are split in several files)
71
  results = {}
72
  for task in Tasks:
 
74
 
75
  # We average all scores of a given metric (not all metrics are present in all files)
76
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
77
+ # print(f"{task}: {accs}")
78
  if accs.size == 0 or any([acc is None for acc in accs]):
79
  continue
80
 
 
177
  eval_results = {}
178
  for model_result_filepath in model_result_filepaths:
179
  # Creation of result
180
+ # print(f"Model result filepath: {model_result_filepath}")
181
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
182
+ # print(eval_result.results)
183
  # print(eval_result)
184
  eval_result.update_with_request_file(requests_path)
185
+ # print(eval_result.results)
186
 
187
  # Store results of same eval together
188
  eval_name = eval_result.eval_name