Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- app/draw_diagram.py +13 -11
- app/pages.py +9 -5
app/draw_diagram.py
CHANGED
@@ -65,17 +65,19 @@ def draw(folder_name, category_one, category_two, sort, num_sort, model_size_ran
|
|
65 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
66 |
|
67 |
display_names = {
|
68 |
-
'cross_mmlu'
|
69 |
-
'
|
70 |
-
'
|
71 |
-
'
|
72 |
-
'
|
73 |
-
'
|
74 |
-
'
|
75 |
-
'
|
76 |
-
'
|
77 |
-
'
|
78 |
-
'
|
|
|
|
|
79 |
}
|
80 |
|
81 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
|
|
65 |
max_value = round(max(chart_data.iloc[:, 1]) + 0.1*max(chart_data.iloc[:, 1]), 1)
|
66 |
|
67 |
display_names = {
|
68 |
+
'cross_mmlu' : 'Cross-MMLU',
|
69 |
+
'cross_mmlu_no_prompt' : 'Cross-MMLU-No-Prompt',
|
70 |
+
'cross_logiqa' : 'Cross-LogiQA',
|
71 |
+
'cross_logiqa_no_prompt': 'Cross-LogiQA-No-Prompt',
|
72 |
+
'cross_xquad' : 'Cross-XQUAD',
|
73 |
+
'cross_xquad_no_prompt' : 'Cross-XQUAD-No-Prompt',
|
74 |
+
'sg_eval' : 'SG EVAL',
|
75 |
+
'sg_eval_v1_cleaned' : 'SG EVAL V1 Cleaned',
|
76 |
+
'sg_eval_v2_mcq' : 'SG EVAL V2 MCQ',
|
77 |
+
'sg_eval_v2_open' : 'SG EVAL V2 Open Ended',
|
78 |
+
'us_eval' : 'US EVAL',
|
79 |
+
'cn_eval' : 'CN EVAL',
|
80 |
+
'ph_eval' : 'PH EVAL'
|
81 |
}
|
82 |
|
83 |
data_columns = [i for i in chart_data.columns if i not in ['Model', 'model_show']]
|
app/pages.py
CHANGED
@@ -78,9 +78,11 @@ def cross_lingual_consistency():
|
|
78 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
79 |
filters_leveltwo = [
|
80 |
'Cross-MMLU',
|
|
|
81 |
'Cross-XQUAD',
|
|
|
82 |
'Cross-LogiQA',
|
83 |
-
'Cross-
|
84 |
]
|
85 |
|
86 |
category_one_dict = {
|
@@ -89,10 +91,12 @@ def cross_lingual_consistency():
|
|
89 |
}
|
90 |
|
91 |
category_two_dict = {
|
92 |
-
'Cross-MMLU'
|
93 |
-
'Cross-
|
94 |
-
'Cross-
|
95 |
-
'Cross-
|
|
|
|
|
96 |
}
|
97 |
|
98 |
left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|
|
|
78 |
filters_levelone = ['Zero Shot', 'Few Shot']
|
79 |
filters_leveltwo = [
|
80 |
'Cross-MMLU',
|
81 |
+
'Cross-MMLU-No-Prompt',
|
82 |
'Cross-XQUAD',
|
83 |
+
'Cross-XQUAD-No-Prompt',
|
84 |
'Cross-LogiQA',
|
85 |
+
'Cross-LogiQA-No-Prompt',
|
86 |
]
|
87 |
|
88 |
category_one_dict = {
|
|
|
91 |
}
|
92 |
|
93 |
category_two_dict = {
|
94 |
+
'Cross-MMLU' : 'cross_mmlu',
|
95 |
+
'Cross-MMLU-No-Prompt' : 'cross_mmlu_no_prompt'
|
96 |
+
'Cross-XQUAD' : 'cross_xquad',
|
97 |
+
'Cross-XQUAD-No-Prompt' : 'cross_xquad_no_prompt',
|
98 |
+
'Cross-LogiQA' : 'cross_logiqa',
|
99 |
+
'Cross-LogiQA-No-Prompt': 'cross_logiqa_no_prompt',
|
100 |
}
|
101 |
|
102 |
left, center, middle, _, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|