Spaces:
Sleeping
Sleeping
meg-huggingface
commited on
Commit
•
9b340a8
1
Parent(s):
eee2949
Print statements to help explain what's happening
Browse files- app.py +1 -0
- src/leaderboard/read_evals.py +9 -6
- src/populate.py +3 -0
app.py
CHANGED
@@ -53,6 +53,7 @@ except Exception:
|
|
53 |
raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
54 |
leaderboard_df = original_df.copy()
|
55 |
|
|
|
56 |
(
|
57 |
finished_eval_queue_df,
|
58 |
running_eval_queue_df,
|
|
|
53 |
raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
54 |
leaderboard_df = original_df.copy()
|
55 |
|
56 |
+
|
57 |
(
|
58 |
finished_eval_queue_df,
|
59 |
running_eval_queue_df,
|
src/leaderboard/read_evals.py
CHANGED
@@ -157,7 +157,8 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
157 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
158 |
"""From the path of the results folder root, extract all needed info for results"""
|
159 |
model_result_filepaths = []
|
160 |
-
|
|
|
161 |
for root, _, files in os.walk(results_path):
|
162 |
# We should only have json files in model results
|
163 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
@@ -184,13 +185,15 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
184 |
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
185 |
else:
|
186 |
eval_results[eval_name] = eval_result
|
|
|
|
|
187 |
|
188 |
results = []
|
189 |
for v in eval_results.values():
|
190 |
-
try:
|
191 |
-
|
192 |
-
|
193 |
-
except KeyError: # not all eval values present
|
194 |
-
|
195 |
|
196 |
return results
|
|
|
157 |
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
158 |
"""From the path of the results folder root, extract all needed info for results"""
|
159 |
model_result_filepaths = []
|
160 |
+
print('looking in results_path: %s' % results_path)
|
161 |
+
print('looking in requests_path: %s' % requests_path)
|
162 |
for root, _, files in os.walk(results_path):
|
163 |
# We should only have json files in model results
|
164 |
if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
|
|
185 |
eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
|
186 |
else:
|
187 |
eval_results[eval_name] = eval_result
|
188 |
+
print("eval results is")
|
189 |
+
print(eval_results)
|
190 |
|
191 |
results = []
|
192 |
for v in eval_results.values():
|
193 |
+
#try:
|
194 |
+
v.to_dict() # we test if the dict version is complete
|
195 |
+
results.append(v)
|
196 |
+
#except KeyError: # not all eval values present
|
197 |
+
# continue
|
198 |
|
199 |
return results
|
src/populate.py
CHANGED
@@ -11,9 +11,12 @@ from src.leaderboard.read_evals import get_raw_eval_results
|
|
11 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
12 |
"""Creates a dataframe from all the individual experiment results"""
|
13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
|
|
17 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
18 |
df = df[cols].round(decimals=2)
|
19 |
|
|
|
11 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
12 |
"""Creates a dataframe from all the individual experiment results"""
|
13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
14 |
+
print(raw_data)
|
15 |
all_data_json = [v.to_dict() for v in raw_data]
|
16 |
|
17 |
df = pd.DataFrame.from_records(all_data_json)
|
18 |
+
print('df is')
|
19 |
+
print(df)
|
20 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
21 |
df = df[cols].round(decimals=2)
|
22 |
|