kmfoda commited on
Commit
b4e9b7d
1 Parent(s): 0c8e76f

Add N/A table

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -52,9 +52,10 @@ def mc2(doc):
52
  return sum(p_true)
53
 
54
  final_total_results = []
 
 
55
  for validator in validators:
56
  results_dir_file_list = os.listdir(f"""_results/few-shot/{validator}""")
57
- final_split_results = []
58
  number_of_nas, number_of_results, inference_total = 0,0,0
59
  for task in tasks:
60
  task_results_files = [result_file for result_file in results_dir_file_list if task in result_file]
@@ -66,7 +67,8 @@ for validator in validators:
66
  results += json.load(f)
67
 
68
  results = [clean_result(result, task) if "result" in result else result for result in results]
69
-
 
70
  # Total results
71
  number_of_nas += len([1 for result in results if ('cleaned_result' in result) and ('N/A' in result['cleaned_result'])])
72
  inference_total += np.array([result['inference_time'] for result in results if 'inference_time' in result]).sum()
@@ -104,12 +106,17 @@ for validator in validators:
104
  })
105
  final_total_results[-1]["Average ⬆️"] = np.array([final_total_results[0]["ARC (25-shot) ⬆️"], final_total_results[0]["HellaSwag (10-shot) ⬆️"],final_total_results[0]["TruthfulQA (0-shot) ⬆️"], final_total_results[0]["MMLU (5-shot) ⬆️"]]).mean()
106
 
 
 
 
107
  demo = gr.Blocks()
108
  with demo:
109
  with gr.Row():
110
  title = gr.Markdown(value=f"""# <p style="text-align: center;"> Bittensor LMEH Leaderboard</p>""")
111
  with gr.Row():
112
  table_1 = gr.Dataframe(pd.DataFrame(final_total_results))
 
 
113
  # with gr.Row(visible = False):
114
  # table_2 = gr.Dataframe(pd.DataFrame(final_split_results))
115
 
 
52
  return sum(p_true)
53
 
54
  final_total_results = []
55
+ final_split_results = []
56
+ results_cumulative = []
57
  for validator in validators:
58
  results_dir_file_list = os.listdir(f"""_results/few-shot/{validator}""")
 
59
  number_of_nas, number_of_results, inference_total = 0,0,0
60
  for task in tasks:
61
  task_results_files = [result_file for result_file in results_dir_file_list if task in result_file]
 
67
  results += json.load(f)
68
 
69
  results = [clean_result(result, task) if "result" in result else result for result in results]
70
+ results_cumulative += results
71
+
72
  # Total results
73
  number_of_nas += len([1 for result in results if ('cleaned_result' in result) and ('N/A' in result['cleaned_result'])])
74
  inference_total += np.array([result['inference_time'] for result in results if 'inference_time' in result]).sum()
 
106
  })
107
  final_total_results[-1]["Average ⬆️"] = np.array([final_total_results[0]["ARC (25-shot) ⬆️"], final_total_results[0]["HellaSwag (10-shot) ⬆️"],final_total_results[0]["TruthfulQA (0-shot) ⬆️"], final_total_results[0]["MMLU (5-shot) ⬆️"]]).mean()
108
 
109
+ df = pd.DataFrame(results_cumulative)
110
+ df = df[df["cleaned_result"] == "N/A"].groupby("result", as_index=False).count().sort_values(by = ["id"], ascending = False).head(5)[["result","id"]]
111
+
112
  demo = gr.Blocks()
113
  with demo:
114
  with gr.Row():
115
  title = gr.Markdown(value=f"""# <p style="text-align: center;"> Bittensor LMEH Leaderboard</p>""")
116
  with gr.Row():
117
  table_1 = gr.Dataframe(pd.DataFrame(final_total_results))
118
+ with gr.Row():
119
+ table_2 = gr.Dataframe(df)
120
  # with gr.Row(visible = False):
121
  # table_2 = gr.Dataframe(pd.DataFrame(final_split_results))
122