Spaces:
Runtime error
Runtime error
Add N/A table
Browse files
app.py
CHANGED
@@ -52,9 +52,10 @@ def mc2(doc):
|
|
52 |
return sum(p_true)
|
53 |
|
54 |
final_total_results = []
|
|
|
|
|
55 |
for validator in validators:
|
56 |
results_dir_file_list = os.listdir(f"""_results/few-shot/{validator}""")
|
57 |
-
final_split_results = []
|
58 |
number_of_nas, number_of_results, inference_total = 0,0,0
|
59 |
for task in tasks:
|
60 |
task_results_files = [result_file for result_file in results_dir_file_list if task in result_file]
|
@@ -66,7 +67,8 @@ for validator in validators:
|
|
66 |
results += json.load(f)
|
67 |
|
68 |
results = [clean_result(result, task) if "result" in result else result for result in results]
|
69 |
-
|
|
|
70 |
# Total results
|
71 |
number_of_nas += len([1 for result in results if ('cleaned_result' in result) and ('N/A' in result['cleaned_result'])])
|
72 |
inference_total += np.array([result['inference_time'] for result in results if 'inference_time' in result]).sum()
|
@@ -104,12 +106,17 @@ for validator in validators:
|
|
104 |
})
|
105 |
final_total_results[-1]["Average ⬆️"] = np.array([final_total_results[0]["ARC (25-shot) ⬆️"], final_total_results[0]["HellaSwag (10-shot) ⬆️"],final_total_results[0]["TruthfulQA (0-shot) ⬆️"], final_total_results[0]["MMLU (5-shot) ⬆️"]]).mean()
|
106 |
|
|
|
|
|
|
|
107 |
demo = gr.Blocks()
|
108 |
with demo:
|
109 |
with gr.Row():
|
110 |
title = gr.Markdown(value=f"""# <p style="text-align: center;"> Bittensor LMEH Leaderboard</p>""")
|
111 |
with gr.Row():
|
112 |
table_1 = gr.Dataframe(pd.DataFrame(final_total_results))
|
|
|
|
|
113 |
# with gr.Row(visible = False):
|
114 |
# table_2 = gr.Dataframe(pd.DataFrame(final_split_results))
|
115 |
|
|
|
52 |
return sum(p_true)
|
53 |
|
54 |
final_total_results = []
|
55 |
+
final_split_results = []
|
56 |
+
results_cumulative = []
|
57 |
for validator in validators:
|
58 |
results_dir_file_list = os.listdir(f"""_results/few-shot/{validator}""")
|
|
|
59 |
number_of_nas, number_of_results, inference_total = 0,0,0
|
60 |
for task in tasks:
|
61 |
task_results_files = [result_file for result_file in results_dir_file_list if task in result_file]
|
|
|
67 |
results += json.load(f)
|
68 |
|
69 |
results = [clean_result(result, task) if "result" in result else result for result in results]
|
70 |
+
results_cumulative += results
|
71 |
+
|
72 |
# Total results
|
73 |
number_of_nas += len([1 for result in results if ('cleaned_result' in result) and ('N/A' in result['cleaned_result'])])
|
74 |
inference_total += np.array([result['inference_time'] for result in results if 'inference_time' in result]).sum()
|
|
|
106 |
})
|
107 |
final_total_results[-1]["Average ⬆️"] = np.array([final_total_results[0]["ARC (25-shot) ⬆️"], final_total_results[0]["HellaSwag (10-shot) ⬆️"],final_total_results[0]["TruthfulQA (0-shot) ⬆️"], final_total_results[0]["MMLU (5-shot) ⬆️"]]).mean()
|
108 |
|
109 |
+
df = pd.DataFrame(results_cumulative)
|
110 |
+
df = df[df["cleaned_result"] == "N/A"].groupby("result", as_index=False).count().sort_values(by = ["id"], ascending = False).head(5)[["result","id"]]
|
111 |
+
|
112 |
demo = gr.Blocks()
|
113 |
with demo:
|
114 |
with gr.Row():
|
115 |
title = gr.Markdown(value=f"""# <p style="text-align: center;"> Bittensor LMEH Leaderboard</p>""")
|
116 |
with gr.Row():
|
117 |
table_1 = gr.Dataframe(pd.DataFrame(final_total_results))
|
118 |
+
with gr.Row():
|
119 |
+
table_2 = gr.Dataframe(df)
|
120 |
# with gr.Row(visible = False):
|
121 |
# table_2 = gr.Dataframe(pd.DataFrame(final_split_results))
|
122 |
|