zhuohan-7 commited on
Commit
4babe21
1 Parent(s): 4ee9f29

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app/draw_diagram.py +14 -13
  2. app/pages.py +10 -7
app/draw_diagram.py CHANGED
@@ -65,19 +65,20 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
65
  max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
66
 
67
  display_names = {
68
- 'cross_mmlu' : 'Cross-MMLU',
69
- 'cross_mmlu_no_prompt' : 'Cross-MMLU-No-Prompt',
70
- 'cross_logiqa' : 'Cross-LogiQA',
71
- 'cross_logiqa_no_prompt': 'Cross-LogiQA-No-Prompt',
72
- 'cross_xquad' : 'Cross-XQUAD',
73
- 'cross_xquad_no_prompt' : 'Cross-XQUAD-No-Prompt',
74
- 'sg_eval' : 'SG EVAL',
75
- 'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
76
- 'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
77
- 'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
78
- 'us_eval' : 'US EVAL',
79
- 'cn_eval' : 'CN EVAL',
80
- 'ph_eval' : 'PH EVAL'
 
81
  }
82
 
83
  data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
 
65
  max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
66
 
67
  display_names = {
68
+ 'cross_mmlu' : 'Cross-MMLU',
69
+ 'cross_mmlu_no_prompt' : 'Cross-MMLU-No-Prompt',
70
+ 'cross_logiqa' : 'Cross-LogiQA',
71
+ 'cross_logiqa_no_prompt' : 'Cross-LogiQA-No-Prompt',
72
+ 'cross_xquad' : 'Cross-XQUAD',
73
+ 'cross_xquad_no_prompt' : 'Cross-XQUAD-No-Prompt',
74
+ 'sg_eval' : 'SG EVAL',
75
+ 'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
76
+ 'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
77
+ 'sg_eval_v2_mcq_no_prompt': 'SG EVAL V2 MCQ No Prompt',
78
+ 'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
79
+ 'us_eval' : 'US EVAL',
80
+ 'cn_eval' : 'CN EVAL',
81
+ 'ph_eval' : 'PH EVAL'
82
  }
83
 
84
  data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
app/pages.py CHANGED
@@ -126,6 +126,7 @@ def cultural_reasoning():
126
  filters_levelone = ['Zero Shot', 'Few Shot']
127
  filters_leveltwo = [
128
  'SG EVAL V2 MCQ',
 
129
  'SG EVAL V2 Open Ended',
130
  'SG EVAL',
131
  'SG EVAL V1 Cleaned',
@@ -138,13 +139,15 @@ def cultural_reasoning():
138
  'Few Shot': 'few_shot'
139
  }
140
 
141
- category_two_dict = {'SG EVAL': 'sg_eval',
142
- 'SG EVAL V1 Cleaned' : 'sg_eval_v1_cleaned',
143
- 'SG EVAL V2 MCQ' : 'sg_eval_v2_mcq',
144
- 'SG EVAL V2 Open Ended': 'sg_eval_v2_open',
145
- 'US EVAL' : 'us_eval',
146
- 'CN EVAL' : 'cn_eval',
147
- 'PH EVAL' : 'ph_eval'
 
 
148
  }
149
 
150
  left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
 
126
  filters_levelone = ['Zero Shot', 'Few Shot']
127
  filters_leveltwo = [
128
  'SG EVAL V2 MCQ',
129
+ 'SG EVAL V2 MCQ No Prompt',
130
  'SG EVAL V2 Open Ended',
131
  'SG EVAL',
132
  'SG EVAL V1 Cleaned',
 
139
  'Few Shot': 'few_shot'
140
  }
141
 
142
+ category_two_dict = {
143
+ 'SG EVAL' : 'sg_eval',
144
+ 'SG EVAL V1 Cleaned' : 'sg_eval_v1_cleaned',
145
+ 'SG EVAL V2 MCQ' : 'sg_eval_v2_mcq',
146
+ 'SG EVAL V2 MCQ No Prompt': 'sg_eval_v2_mcq_no_prompt',
147
+ 'SG EVAL V2 Open Ended' : 'sg_eval_v2_open',
148
+ 'US EVAL' : 'us_eval',
149
+ 'CN EVAL' : 'cn_eval',
150
+ 'PH EVAL' : 'ph_eval'
151
  }
152
 
153
  left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])