Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app/draw_diagram.py +14 -13
- app/pages.py +10 -7
app/draw_diagram.py
CHANGED
@@ -65,19 +65,20 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
|
|
65 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
66 |
|
67 |
display_names = {
|
68 |
-
'cross_mmlu'
|
69 |
-
'cross_mmlu_no_prompt'
|
70 |
-
'cross_logiqa'
|
71 |
-
'cross_logiqa_no_prompt': 'Cross-LogiQA-No-Prompt',
|
72 |
-
'cross_xquad'
|
73 |
-
'cross_xquad_no_prompt'
|
74 |
-
'sg_eval'
|
75 |
-
'sg_eval_v1_cleaned'
|
76 |
-
'sg_eval_v2_mcq'
|
77 |
-
'
|
78 |
-
'
|
79 |
-
'
|
80 |
-
'
|
|
|
81 |
}
|
82 |
|
83 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
|
|
65 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
66 |
|
67 |
display_names = {
|
68 |
+
'cross_mmlu' : 'Cross-MMLU',
|
69 |
+
'cross_mmlu_no_prompt' : 'Cross-MMLU-No-Prompt',
|
70 |
+
'cross_logiqa' : 'Cross-LogiQA',
|
71 |
+
'cross_logiqa_no_prompt' : 'Cross-LogiQA-No-Prompt',
|
72 |
+
'cross_xquad' : 'Cross-XQUAD',
|
73 |
+
'cross_xquad_no_prompt' : 'Cross-XQUAD-No-Prompt',
|
74 |
+
'sg_eval' : 'SG EVAL',
|
75 |
+
'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
|
76 |
+
'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
|
77 |
+
'sg_eval_v2_mcq_no_prompt': 'SG EVAL V2 MCQ No Prompt',
|
78 |
+
'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
|
79 |
+
'us_eval' : 'US EVAL',
|
80 |
+
'cn_eval' : 'CN EVAL',
|
81 |
+
'ph_eval' : 'PH EVAL'
|
82 |
}
|
83 |
|
84 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
app/pages.py
CHANGED
@@ -126,6 +126,7 @@ def cultural_reasoning():
|
|
126 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
127 |
filters_leveltwo = [
|
128 |
'SG EVAL V2 MCQ',
|
|
|
129 |
'SG EVAL V2 Open Ended',
|
130 |
'SG EVAL',
|
131 |
'SG EVAL V1 Cleaned',
|
@@ -138,13 +139,15 @@ def cultural_reasoning():
|
|
138 |
'Few Shot': 'few_shot'
|
139 |
}
|
140 |
|
141 |
-
category_two_dict = {
|
142 |
-
'SG EVAL
|
143 |
-
'SG EVAL
|
144 |
-
'SG EVAL V2
|
145 |
-
'
|
146 |
-
'
|
147 |
-
'
|
|
|
|
|
148 |
}
|
149 |
|
150 |
left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|
|
|
126 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
127 |
filters_leveltwo = [
|
128 |
'SG EVAL V2 MCQ',
|
129 |
+
'SG EVAL V2 MCQ No Prompt',
|
130 |
'SG EVAL V2 Open Ended',
|
131 |
'SG EVAL',
|
132 |
'SG EVAL V1 Cleaned',
|
|
|
139 |
'Few Shot': 'few_shot'
|
140 |
}
|
141 |
|
142 |
+
category_two_dict = {
|
143 |
+
'SG EVAL' : 'sg_eval',
|
144 |
+
'SG EVAL V1 Cleaned' : 'sg_eval_v1_cleaned',
|
145 |
+
'SG EVAL V2 MCQ' : 'sg_eval_v2_mcq',
|
146 |
+
'SG EVAL V2 MCQ No Prompt': 'sg_eval_v2_mcq_no_prompt',
|
147 |
+
'SG EVAL V2 Open Ended' : 'sg_eval_v2_open',
|
148 |
+
'US EVAL' : 'us_eval',
|
149 |
+
'CN EVAL' : 'cn_eval',
|
150 |
+
'PH EVAL' : 'ph_eval'
|
151 |
}
|
152 |
|
153 |
left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|