Alina Lozovskaia commited on
Commit
79b2cd5
1 Parent(s): dadbd30

wip improvement

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +27 -25
src/leaderboard/read_evals.py CHANGED
@@ -1,6 +1,6 @@
1
  import json
2
  from pathlib import Path
3
- import dateutil.parser
4
  from json import JSONDecodeError
5
  import logging
6
  import math
@@ -189,39 +189,40 @@ def get_request_file_for_model(requests_path, model_name, precision):
189
  # Return empty string if no file found that matches criteria
190
  return request_file
191
 
192
- def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
193
- """From the path of the results folder root, extract all needed info for results"""
194
- model_result_filepaths = []
195
-
196
- results_path = Path(results_path)
197
 
198
- for root in results_path.rglob('*'):
199
- # root is now a Path object representing directories
200
- files = list(root.glob('*.json')) # Collect all .json files directly
201
-
202
- # Check if the directory is empty or contains no .json files
203
- if not files:
204
- continue
205
-
206
- # Sort the files by extracting the datetime from filenames assumed to be of the form "results_YYYYMMDD.json"
207
  try:
208
- files.sort(key=lambda x: x.stem.removeprefix("results_"))
209
- except dateutil.parser._parser.ParserError:
210
- files = [files[-1]]
 
 
 
211
 
212
- for file in files:
213
- # Construct file path correctly, ensuring no duplication of path parts
214
- model_result_filepath = file.resolve()
215
- model_result_filepaths.append(model_result_filepath)
216
 
 
 
217
  with open(dynamic_path) as f:
218
  dynamic_data = json.load(f)
 
 
 
 
 
219
 
220
  eval_results = {}
221
- for model_result_filepath in model_result_filepaths:
222
  # Creation of result
223
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
224
  eval_result.update_with_request_file(requests_path)
 
225
  if eval_result.full_model in dynamic_data:
226
  eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
227
  # Hardcoding because of gating problem
@@ -236,12 +237,13 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
236
  eval_results[eval_name] = eval_result
237
 
238
  results = []
239
- for v in eval_results.values():
240
  try:
241
  if v.status == "FINISHED":
242
  v.to_dict() # we test if the dict version is complete
243
  results.append(v)
244
- except KeyError: # not all eval values present
 
245
  continue
246
 
247
  return results
 
1
  import json
2
  from pathlib import Path
3
+ from datetime import datetime
4
  from json import JSONDecodeError
5
  import logging
6
  import math
 
189
  # Return empty string if no file found that matches criteria
190
  return request_file
191
 
 
 
 
 
 
192
 
193
+ def parse_datetime(datetime_str):
194
+ formats = [
195
+ "%Y-%m-%dT%H-%M-%S.%f", # Format with dashes
196
+ "%Y-%m-%dT%H:%M:%S.%f", # Standard format with colons
197
+ "%Y-%m-%dT%H %M %S.%f", # Spaces as separator
198
+ ]
199
+
200
+ for fmt in formats:
 
201
  try:
202
+ return datetime.strptime(datetime_str, fmt)
203
+ except ValueError:
204
+ continue
205
+ # in rare cases set unix start time for files with incorrect time (legacy files)
206
+ logging.error(f"No valid date format found for: {datetime_str}")
207
+ return datetime(1970, 1, 1)
208
 
 
 
 
 
209
 
210
+ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
211
+ """From the path of the results folder root, extract all needed info for results"""
212
  with open(dynamic_path) as f:
213
  dynamic_data = json.load(f)
214
+
215
+ results_path = Path(results_path)
216
+
217
+ model_files = list(results_path.rglob('results_*.json'))
218
+ model_files.sort(key=lambda file: parse_datetime(file.stem.removeprefix("results_")))
219
 
220
  eval_results = {}
221
+ for model_result_filepath in model_files:
222
  # Creation of result
223
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
224
  eval_result.update_with_request_file(requests_path)
225
+
226
  if eval_result.full_model in dynamic_data:
227
  eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
228
  # Hardcoding because of gating problem
 
237
  eval_results[eval_name] = eval_result
238
 
239
  results = []
240
+ for k, v in eval_results.items():
241
  try:
242
  if v.status == "FINISHED":
243
  v.to_dict() # we test if the dict version is complete
244
  results.append(v)
245
+ except KeyError as e:
246
+ logging.error(f"Error while checking model {k} dict, no key: {e}") # not all eval values present
247
  continue
248
 
249
  return results