Spaces:

yentinglin
/

open-tw-llm-leaderboard

Running

App Files Files Community

yentinglin commited on May 24

Commit

be645c2

•

1 Parent(s): 9fc574b

add

Browse files

Files changed (1) hide show

src/leaderboard/read_evals.py +6 -6

src/leaderboard/read_evals.py CHANGED Viewed

@@ -44,7 +44,7 @@ class EvalResult:
         precision = Precision.from_str(config.get("model_dtype"))
         # Get model and org
-        org_and_model = config.get("model_name", config.get("model_args", None))
         org_and_model = org_and_model.split("/", 1)
         if len(org_and_model) == 1:
@@ -66,7 +66,7 @@ class EvalResult:
             if architectures:
                 architecture = ";".join(architectures)
-        print(data["results"])
         # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
@@ -74,7 +74,7 @@ class EvalResult:
             # We average all scores of a given metric (not all metrics are present in all files)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
-            print(f"{task}: {accs}")
             if accs.size == 0 or any([acc is None for acc in accs]):
                 continue
@@ -177,12 +177,12 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result
-        print(f"Model result filepath: {model_result_filepath}")
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
-        print(eval_result.results)
         # print(eval_result)
         eval_result.update_with_request_file(requests_path)
-        print(eval_result.results)
         # Store results of same eval together
         eval_name = eval_result.eval_name

         precision = Precision.from_str(config.get("model_dtype"))
         # Get model and org
+        org_and_model = data.get("model_name", config.get("model_name", config.get("model_args", None)))
         org_and_model = org_and_model.split("/", 1)
         if len(org_and_model) == 1:
             if architectures:
                 architecture = ";".join(architectures)
+        # print(data["results"])
         # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
             # We average all scores of a given metric (not all metrics are present in all files)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
+            # print(f"{task}: {accs}")
             if accs.size == 0 or any([acc is None for acc in accs]):
                 continue
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result
+        # print(f"Model result filepath: {model_result_filepath}")
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
+        # print(eval_result.results)
         # print(eval_result)
         eval_result.update_with_request_file(requests_path)
+        # print(eval_result.results)
         # Store results of same eval together
         eval_name = eval_result.eval_name