zhuohan-7 commited on
Commit
6d0d847
·
verified ·
1 Parent(s): 1dd96df

Upload functions

Browse files
Files changed (2) hide show
  1. app/draw_diagram.py +556 -0
  2. app/pages.py +191 -0
app/draw_diagram.py ADDED
@@ -0,0 +1,556 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from streamlit_echarts import st_echarts
5
+ # from streamlit_echarts import JsCode
6
+ from streamlit_javascript import st_javascript
7
+ # from PIL import Image
8
+
9
+ links_dic = {
10
+ "Meta-Llama-3-8B-Instruct": 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct',
11
+ "Meta-Llama-3-70B-Instruct": 'https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct',
12
+ "Meta-Llama-3-8B": "https://huggingface.co/meta-llama/Meta-Llama-3-8B"
13
+ }
14
+
15
+
16
+ # huggingface_image = Image.open('style/huggingface.jpg')
17
+
18
+ def nav_to(url):
19
+ # print(url)
20
+ js = f'window.open("{url}", "_blank").then(r => window.parent.location.href);'
21
+ st_javascript(js)
22
+
23
+ # nav_script = """
24
+ # <meta http-equiv="refresh" content="0; url='%s'">
25
+ # """ % (url)
26
+ # st.write(nav_script, unsafe_allow_html=True)
27
+
28
+ def highlight_table_line(model_name):
29
+
30
+ st.write(model_name)
31
+
32
+
33
+ def draw_cross_lingual(category_one, category_two, sort, sorted):
34
+
35
+ folder = "./results/cross_lingual/"
36
+ subtitle = ''
37
+ data_path = f'{folder}/{category_one}/{category_two}.csv'
38
+ chart_data = pd.read_csv(data_path).dropna(axis='columns').round(2)
39
+
40
+ if sorted == 'Ascending':
41
+ ascend = True
42
+ else:
43
+ ascend = False
44
+
45
+ chart_data = chart_data.sort_values(by=[sort], ascending=ascend)
46
+
47
+ min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1)
48
+ max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1)
49
+
50
+ if category_two in ['cross_mmlu', 'cross_logiqa']:
51
+ # print(category_two)
52
+
53
+ if category_two == 'cross_mmlu':
54
+ subtitle = 'Cross-MMLU'
55
+
56
+ elif category_two == 'cross_logiqa':
57
+ subtitle = 'Cross-LogiQA'
58
+
59
+ options = {
60
+ "title": {"text": f"{subtitle}"},
61
+ "tooltip": {
62
+ "trigger": "axis",
63
+ "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
64
+ "triggerOn": 'mousemove',
65
+ },
66
+ "legend": {"data": ['Overall Accuracy','Cross-Lingual Consistency', 'AC3',
67
+ 'English', 'Chinese', 'Spanish', 'Vietnamese', 'Indonesian', 'Malay', 'Filipino']},
68
+ "toolbox": {"feature": {"saveAsImage": {}}},
69
+ "grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
70
+ "xAxis": [
71
+ {
72
+ "type": "category",
73
+ "boundaryGap": False,
74
+ "triggerEvent": True,
75
+ "data": chart_data['Model'].tolist(),
76
+ }
77
+ ],
78
+ "yAxis": [{"type": "value",
79
+ "min": min_value,
80
+ "max": max_value,
81
+ # "splitNumber": 10
82
+ }],
83
+ "series": [
84
+ {
85
+ "name": "Overall Accuracy",
86
+ "type": "line",
87
+ "data": chart_data['Accuracy'].tolist(),
88
+ },
89
+ {
90
+ "name": "Cross-Lingual Consistency",
91
+ "type": "line",
92
+ "data": chart_data["Cross-Lingual Consistency"].tolist(),
93
+ },
94
+ {
95
+ "name": "AC3",
96
+ "type": "line",
97
+ "data": chart_data["AC3"].tolist(),
98
+ },
99
+ {
100
+ "name": "English",
101
+ "type": "line",
102
+ "data": chart_data["English"].tolist(),
103
+ },
104
+ {
105
+ "name": "Chinese",
106
+ "type": "line",
107
+ "data": chart_data["Chinese"].tolist(),
108
+ },
109
+ {
110
+ "name": "Spanish",
111
+ "type": "line",
112
+ "data": chart_data["Spanish"].tolist(),
113
+ },
114
+ {
115
+ "name": "Vietnamese",
116
+ "type": "line",
117
+ "data": chart_data["Vietnamese"].tolist(),
118
+ },
119
+ {
120
+ "name": "Indonesian",
121
+ "type": "line",
122
+ "data": chart_data["Indonesian"].tolist(),
123
+ },
124
+ {
125
+ "name": "Malay",
126
+ "type": "line",
127
+ "data": chart_data["Malay"].tolist(),
128
+ },
129
+ {
130
+ "name": "Filipino",
131
+ "type": "line",
132
+ "data": chart_data["Filipino"].tolist(),
133
+ },
134
+ ],
135
+ }
136
+
137
+ events = {
138
+ "click": "function(params) { return params.value }",
139
+ # "dblclick": "function(params) { return params.value }"
140
+ }
141
+
142
+ value = st_echarts(options=options, events=events, height="500px")
143
+
144
+
145
+ if value != None:
146
+ # print(value)
147
+ nav_to(links_dic[value])
148
+
149
+ # if value != None:
150
+ # highlight_table_line(value)
151
+
152
+
153
+ elif category_two == 'cross_xquad':
154
+
155
+ subtitle = 'Cross-XQUAD'
156
+
157
+ options = {
158
+ "title": {"text": f"{subtitle}"},
159
+ "tooltip": {
160
+ "trigger": "axis",
161
+ "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
162
+ "triggerOn": 'mousemove',
163
+ },
164
+ "legend": {"data": ['Overall Accuracy','Cross-Lingual Consistency', 'AC3',
165
+ 'English', 'Chinese', 'Spanish', 'Vietnamese', 'Indonesian', 'Malay', 'Filipino']},
166
+ "toolbox": {"feature": {"saveAsImage": {}}},
167
+ "grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
168
+ "xAxis": [
169
+ {
170
+ "type": "category",
171
+ "boundaryGap": False,
172
+ "data": chart_data['Model'].tolist(),
173
+ }
174
+ ],
175
+ "yAxis": [{"type": "value",
176
+ "min": min_value,
177
+ "max": max_value,
178
+ # "splitNumber": 10
179
+ }],
180
+ "series": [
181
+ {
182
+ "name": "Overall Accuracy",
183
+ "type": "line",
184
+ "data": chart_data['Accuracy'].tolist(),
185
+ },
186
+ {
187
+ "name": "Cross-Lingual Consistency",
188
+ "type": "line",
189
+ "data": chart_data["Cross-Lingual Consistency"].tolist(),
190
+ },
191
+ {
192
+ "name": "AC3",
193
+ "type": "line",
194
+ "data": chart_data["AC3"].tolist(),
195
+ },
196
+ {
197
+ "name": "English",
198
+ "type": "line",
199
+ "data": chart_data["English"].tolist(),
200
+ },
201
+ {
202
+ "name": "Chinese",
203
+ "type": "line",
204
+ "data": chart_data["Chinese"].tolist(),
205
+ },
206
+ {
207
+ "name": "Spanish",
208
+ "type": "line",
209
+ "data": chart_data["Spanish"].tolist(),
210
+ },
211
+ {
212
+ "name": "Vietnamese",
213
+ "type": "line",
214
+ "data": chart_data["Vietnamese"].tolist(),
215
+ },
216
+ ],
217
+ }
218
+
219
+ events = {
220
+ "click": "function(params) { return params.value }"
221
+ }
222
+
223
+ value = st_echarts(options=options, events=events, height="500px")
224
+
225
+ if value != None:
226
+ # print(value)
227
+ nav_to(links_dic[value])
228
+
229
+ # if value != None:
230
+ # highlight_table_line(value)
231
+
232
+ ### create table
233
+ st.divider()
234
+ # chart_data['Link'] = chart_data['Model'].map(links_dic)
235
+ st.dataframe(chart_data,
236
+ # column_config = {
237
+ # "Link": st.column_config.LinkColumn(
238
+ # display_text= st.image(huggingface_image)
239
+ # ),
240
+ # },
241
+ hide_index = True,
242
+ use_container_width=True)
243
+
244
+
245
+
246
+ def draw_only_acc(folder_name, category_one, category_two, sorted):
247
+ # Cultural Reasonling / General Reasoning / Emotion / Fundamental NLP Tasks
248
+
249
+ folder = f"./results/{folder_name}/"
250
+ category_two_dict = {}
251
+
252
+ if folder_name == 'cultural_reasoning':
253
+ category_two_dict = {'SG EVAL': 'sg_eval',
254
+ 'US EVAL': 'us_eval',
255
+ 'CN EVAL': 'cn_eval',
256
+ 'PH EVAL': 'ph_eval'}
257
+ elif folder_name == 'general_reasoning':
258
+ category_two_dict = {'MMLU': 'mmlu',
259
+ 'C Eval': 'c_eval',
260
+ 'CMMLU': 'cmmlu',
261
+ 'ZBench': 'zbench',
262
+ 'IndoMMLU': 'indommlu'}
263
+
264
+ elif folder_name == 'emotion':
265
+ category_two_dict = {'Indonesian Emotion Classification': 'ind_emotion',
266
+ 'SST2': 'sst2'}
267
+
268
+ elif folder_name == 'fundamental_nlp_tasks':
269
+ category_two_dict = {'OCNLI': 'ocnli',
270
+ 'C3': 'c3',
271
+ 'COLA': 'cola',
272
+ 'QQP': 'qqp',
273
+ 'MNLI': 'mnli',
274
+ 'QNLI': 'qnli',
275
+ 'WNLI': 'wnli',
276
+ 'RTE': 'rte',
277
+ 'MRPC': 'mrpc'}
278
+
279
+ subtitle = category_two_dict[category_two]
280
+ data_path = f'{folder}/{category_one}/{subtitle}.csv'
281
+ chart_data = pd.read_csv(data_path).round(2)
282
+
283
+ if sorted == 'Ascending':
284
+ ascend = True
285
+ else:
286
+ ascend = False
287
+
288
+ chart_data = chart_data.sort_values(by=['Accuracy'], ascending=ascend)
289
+
290
+ min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1)
291
+ max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1)
292
+
293
+ options = {
294
+ "title": {"text": f"{category_two}"},
295
+ "tooltip": {
296
+ "trigger": "axis",
297
+ "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
298
+ "triggerOn": 'mousemove',
299
+ },
300
+ "legend": {"data": ['Overall Accuracy']},
301
+ "toolbox": {"feature": {"saveAsImage": {}}},
302
+ "grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
303
+ "xAxis": [
304
+ {
305
+ "type": "category",
306
+ "boundaryGap": False,
307
+ "triggerEvent": True,
308
+ "data": chart_data['Model'].tolist(),
309
+ }
310
+ ],
311
+ "yAxis": [{"type": "value",
312
+ "min": min_value,
313
+ "max": max_value,
314
+ # "splitNumber": 10
315
+ }],
316
+ "series": [
317
+ {
318
+ "name": "Overall Accuracy",
319
+ "type": "line",
320
+ "data": chart_data['Accuracy'].tolist(),
321
+ },
322
+
323
+ ],
324
+ }
325
+
326
+ events = {
327
+ "click": "function(params) { return params.value }"
328
+ }
329
+
330
+ value = st_echarts(options=options, events=events, height="500px")
331
+
332
+ if value != None:
333
+ # print(value)
334
+ nav_to(links_dic[value])
335
+
336
+ # if value != None:
337
+ # highlight_table_line(value)
338
+
339
+ ### create table
340
+ st.divider()
341
+ # chart_data['Link'] = chart_data['Model'].map(links_dic)
342
+ st.dataframe(chart_data,
343
+ # column_config = {
344
+ # "Link": st.column_config.LinkColumn(
345
+ # display_text= st.image(huggingface_image)
346
+ # ),
347
+ # },
348
+ hide_index = True,
349
+ use_container_width=True)
350
+
351
+ def draw_flores_translation(category_one, category_two, sorted):
352
+ folder = "./results/flores_translation/"
353
+ category_two_dict = {'Indonesian to English': 'ind2eng',
354
+ 'Vitenamese to English': 'vie2eng',
355
+ 'Chinese to English': 'zho2eng',
356
+ 'Nalay to English': 'zsm2eng'}
357
+
358
+ subtitle = category_two_dict[category_two]
359
+
360
+ data_path = f'{folder}/{category_one}/{subtitle}.csv'
361
+ chart_data = pd.read_csv(data_path).round(2)
362
+
363
+ if sorted == 'Ascending':
364
+ ascend = True
365
+ else:
366
+ ascend = False
367
+
368
+ chart_data = chart_data.sort_values(by=['BLEU'], ascending=ascend)
369
+
370
+ min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1)
371
+ max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1)
372
+
373
+ options = {
374
+ "title": {"text": f"{category_two}"},
375
+ "tooltip": {
376
+ "trigger": "axis",
377
+ "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
378
+ "triggerOn": 'mousemove',
379
+ },
380
+ "legend": {"data": ['BLEU']},
381
+ "toolbox": {"feature": {"saveAsImage": {}}},
382
+ "grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
383
+ "xAxis": [
384
+ {
385
+ "type": "category",
386
+ "boundaryGap": False,
387
+ "triggerEvent": True,
388
+ "data": chart_data['Model'].tolist(),
389
+ }
390
+ ],
391
+ "yAxis": [{"type": "value",
392
+ "min": min_value,
393
+ "max": max_value,
394
+ # "splitNumber": 10
395
+ }],
396
+ "series": [
397
+ {
398
+ "name": "BLEU",
399
+ "type": "line",
400
+ "data": chart_data['BLEU'].tolist(),
401
+ },
402
+
403
+ ],
404
+ }
405
+
406
+ events = {
407
+ "click": "function(params) { return params.value }"
408
+ }
409
+
410
+ value = st_echarts(options=options, events=events, height="500px")
411
+
412
+ if value != None:
413
+ # print(value)
414
+ nav_to(links_dic[value])
415
+
416
+
417
+ ### create table
418
+ st.divider()
419
+ # chart_data['Link'] = chart_data['Model'].map(links_dic)
420
+ st.dataframe(chart_data,
421
+ # column_config = {
422
+ # "Link": st.column_config.LinkColumn(
423
+ # display_text= st.image(huggingface_image)
424
+ # ),
425
+ # },
426
+ hide_index = True,
427
+ use_container_width=True)
428
+
429
+
430
+ def draw_dialogue(category_one, category_two, sort, sorted):
431
+ folder = "./results/dialogue"
432
+ category_two_dict = {'DREAM': 'dream',
433
+ 'SAMSum': 'samsum',
434
+ 'DialogSum': 'dialogsum'}
435
+
436
+ subtitle = category_two_dict[category_two]
437
+
438
+ data_path = f'{folder}/{category_one}/{subtitle}.csv'
439
+ chart_data = pd.read_csv(data_path).round(2)
440
+
441
+ if sorted == 'Ascending':
442
+ ascend = True
443
+ else:
444
+ ascend = False
445
+
446
+ chart_data = chart_data.sort_values(by=[sort], ascending=ascend)
447
+
448
+ min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1)
449
+ max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1)
450
+
451
+ options = {}
452
+ if category_two in ['SAMSum', 'DialogSum']:
453
+ options = {
454
+ "title": {"text": f"{category_two}"},
455
+ "tooltip": {
456
+ "trigger": "axis",
457
+ "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
458
+ "triggerOn": 'mousemove',
459
+ },
460
+ "legend": {"data": list(chart_data.columns)},
461
+ "toolbox": {"feature": {"saveAsImage": {}}},
462
+ "grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
463
+ "xAxis": [
464
+ {
465
+ "type": "category",
466
+ "boundaryGap": False,
467
+ "triggerEvent": True,
468
+ "data": chart_data['Model'].tolist(),
469
+ }
470
+ ],
471
+ "yAxis": [{"type": "value",
472
+ "min": min_value,
473
+ "max": max_value,
474
+ # "splitNumber": 10
475
+ }],
476
+ "series": [
477
+ {
478
+ "name": "Average",
479
+ "type": "line",
480
+ "data": chart_data['Average'].tolist(),
481
+ },
482
+ {
483
+ "name": "ROUGE-1",
484
+ "type": "line",
485
+ "data": chart_data["ROUGE-1"].tolist(),
486
+ },
487
+ {
488
+ "name": "ROUGE-2",
489
+ "type": "line",
490
+ "data": chart_data["ROUGE-2"].tolist(),
491
+ },
492
+ {
493
+ "name": "ROUGE-L",
494
+ "type": "line",
495
+ "data": chart_data["ROUGE-L"].tolist(),
496
+ },
497
+
498
+ ],
499
+ }
500
+
501
+ elif category_two == 'DREAM':
502
+ options = {
503
+ "title": {"text": f"{category_two}"},
504
+ "tooltip": {
505
+ "trigger": "axis",
506
+ "axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
507
+ "triggerOn": 'mousemove',
508
+ },
509
+ "legend": {"data": list(chart_data.columns)},
510
+ "toolbox": {"feature": {"saveAsImage": {}}},
511
+ "grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
512
+ "xAxis": [
513
+ {
514
+ "type": "category",
515
+ "boundaryGap": False,
516
+ "triggerEvent": True,
517
+ "data": chart_data['Model'].tolist(),
518
+ }
519
+ ],
520
+ "yAxis": [{"type": "value",
521
+ "min": min_value,
522
+ "max": max_value,
523
+ # "splitNumber": 10
524
+ }],
525
+ "series": [
526
+ {
527
+ "name": "Accuracy",
528
+ "type": "line",
529
+ "data": chart_data['Accuracy'].tolist(),
530
+ },
531
+
532
+ ],
533
+ }
534
+
535
+ events = {
536
+ "click": "function(params) { return params.value }"
537
+ }
538
+
539
+ value = st_echarts(options=options, events=events, height="500px")
540
+
541
+ if value != None:
542
+ # print(value)
543
+ nav_to(links_dic[value])
544
+
545
+
546
+ ### create table
547
+ st.divider()
548
+ # chart_data['Link'] = chart_data['Model'].map(links_dic)
549
+ st.dataframe(chart_data,
550
+ # column_config = {
551
+ # "Link": st.column_config.LinkColumn(
552
+ # display_text= st.image(huggingface_image)
553
+ # ),
554
+ # },
555
+ hide_index = True,
556
+ use_container_width=True)
app/pages.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from draw_diagram import *
3
+
4
+ def dashboard():
5
+ st.title("SeaEval")
6
+
7
+ """
8
+ [gh]: https://github.com/SeaEval/SeaEval
9
+ [![GitHub Repo stars](https://img.shields.io/github/stars/SeaEval/SeaEval?style=social)][gh]
10
+ """
11
+
12
+ seaeval_url = "https://seaeval.github.io/"
13
+ st.markdown("[SeaEval](%s) is the new benchmark for multilingual foundation models consisting of 28 dataset." % seaeval_url)
14
+ st.markdown(".... haven't finished yet ...")
15
+
16
+ def cross_lingual_consistency():
17
+ st.title("Cross-Lingual Consistency")
18
+
19
+ filters_levelone = ['Zero Shot', 'Few Shot']
20
+ filters_leveltwo = ['Cross-MMLU', 'Cross-XQUAD', 'Cross-LogiQA']
21
+
22
+ category_one_dict = {'Zero Shot': 'zero_shot',
23
+ 'Few Shot': 'few_shot'}
24
+ category_two_dict = {'Cross-MMLU': 'cross_mmlu',
25
+ 'Cross-XQUAD': 'cross_xquad',
26
+ 'Cross-LogiQA': 'cross_logiqa'}
27
+
28
+ left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
29
+ with left:
30
+ category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
31
+ with center:
32
+ category_two = st.selectbox('Select the sub-category', filters_leveltwo)
33
+ with middle:
34
+ sort = st.selectbox('Sort', ['Accuracy','Cross-Lingual Consistency', 'AC3',
35
+ 'English', 'Chinese', 'Spanish', 'Vietnamese'])
36
+ with right:
37
+ sorted = st.selectbox('by', ['Ascending', 'Descending'])
38
+
39
+ if category_one or category_two or sort or sorted:
40
+ category_one = category_one_dict[category_one]
41
+ category_two = category_two_dict[category_two]
42
+
43
+ draw_cross_lingual(category_one, category_two, sort, sorted)
44
+ else:
45
+ draw_cross_lingual('zero_shot', 'cross_mmlu', 'Accuracy', 'Descending')
46
+
47
+ def cultural_reasoning():
48
+ st.title("Cultural Reasoning")
49
+
50
+ filters_levelone = ['Zero Shot', 'Few Shot']
51
+ filters_leveltwo = ['SG EVAL', 'CN EVAL', 'PH EVAL', 'US EVAL']
52
+
53
+ category_one_dict = {'Zero Shot': 'zero_shot',
54
+ 'Few Shot': 'few_shot'}
55
+
56
+ left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
57
+ with left:
58
+ category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
59
+ with center:
60
+ category_two = st.selectbox('Select the sub-category', filters_leveltwo)
61
+ with right:
62
+ sorted = st.selectbox('sorted by', ['Ascending', 'Descending'])
63
+
64
+ if category_one or category_two or sorted:
65
+ category_one = category_one_dict[category_one]
66
+ draw_only_acc('cultural_reasoning', category_one, category_two, sorted)
67
+ else:
68
+ draw_only_acc('cultural_reasoning', 'zero_shot', 'sg_eval', 'Descending')
69
+
70
+
71
+ def general_reasoning():
72
+ st.title("General Reasoning")
73
+
74
+ filters_levelone = ['Zero Shot', 'Few Shot']
75
+ filters_leveltwo = ['MMLU', 'C Eval', 'CMMLU', 'ZBench', 'IndoMMLU']
76
+
77
+ category_one_dict = {'Zero Shot': 'zero_shot',
78
+ 'Few Shot': 'few_shot'}
79
+
80
+ left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
81
+ with left:
82
+ category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
83
+ with center:
84
+ category_two = st.selectbox('Select the sub-category', filters_leveltwo)
85
+ with right:
86
+ sorted = st.selectbox('sorted by', ['Ascending', 'Descending'])
87
+
88
+ if category_one or category_two or sorted:
89
+ category_one = category_one_dict[category_one]
90
+ draw_only_acc('general_reasoning', category_one, category_two, sorted)
91
+ else:
92
+ draw_only_acc('general_reasoning', 'zero_shot', 'MMLU Full', 'Descending')
93
+
94
+ def flores():
95
+ st.title("FLORES-Translation")
96
+
97
+ filters_levelone = ['Zero Shot', 'Few Shot']
98
+ filters_leveltwo = ['Indonesian to English', 'Vitenamese to English', 'Chinese to English', 'Nalay to English']
99
+
100
+ category_one_dict = {'Zero Shot': 'zero_shot',
101
+ 'Few Shot': 'few_shot'}
102
+
103
+
104
+ left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
105
+ with left:
106
+ category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
107
+ with center:
108
+ category_two = st.selectbox('Select the sub-category', filters_leveltwo)
109
+ with right:
110
+ sorted = st.selectbox('sorted by', ['Ascending', 'Descending'])
111
+
112
+ if category_one or category_two or sorted:
113
+ category_one = category_one_dict[category_one]
114
+ draw_flores_translation(category_one, category_two, sorted)
115
+ else:
116
+ draw_flores_translation('zero_shot', 'Indonesian to English', 'Descending')
117
+
118
+ def emotion():
119
+ st.title("Emotion")
120
+
121
+ filters_levelone = ['Zero Shot', 'Few Shot']
122
+ filters_leveltwo = ['Indonesian Emotion Classification', 'SST2']
123
+
124
+ category_one_dict = {'Zero Shot': 'zero_shot',
125
+ 'Few Shot': 'few_shot'}
126
+
127
+ left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
128
+ with left:
129
+ category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
130
+ with center:
131
+ category_two = st.selectbox('Select the sub-category', filters_leveltwo)
132
+ with right:
133
+ sorted = st.selectbox('sorted by', ['Ascending', 'Descending'])
134
+
135
+ if category_one or category_two or sorted:
136
+ category_one = category_one_dict[category_one]
137
+ draw_only_acc('emotion', category_one, category_two, sorted)
138
+ else:
139
+ draw_only_acc('emotion', 'zero_shot', 'Indonesian Emotion Classification', 'Descending')
140
+
141
+ def dialogue():
142
+ st.title("Dialogue")
143
+
144
+ filters_levelone = ['Zero Shot', 'Few Shot']
145
+ filters_leveltwo = ['DREAM', 'SAMSum', 'DialogSum']
146
+
147
+ category_one_dict = {'Zero Shot': 'zero_shot',
148
+ 'Few Shot': 'few_shot'}
149
+
150
+ left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
151
+ with left:
152
+ category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
153
+ with center:
154
+ category_two = st.selectbox('Select the sub-category', filters_leveltwo)
155
+ with middle:
156
+ if category_two == 'DREAM':
157
+ sort = st.selectbox('Sort', ['Accuracy'])
158
+ else:
159
+ sort = st.selectbox('Sort', ['Average', 'ROUGE-1', 'ROUGE-2', 'ROUGE-L'])
160
+
161
+ with right:
162
+ sorted = st.selectbox('by', ['Ascending', 'Descending'])
163
+
164
+ if category_one or category_two or sort or sorted:
165
+ category_one = category_one_dict[category_one]
166
+ draw_dialogue(category_one, category_two, sort, sorted)
167
+ else:
168
+ draw_dialogue('zero_shot', 'DREAM', sort[0],'Descending')
169
+
170
+ def fundamental_nlp_tasks():
171
+ st.title("Fundamental NLP Tasks")
172
+
173
+ filters_levelone = ['Zero Shot', 'Few Shot']
174
+ filters_leveltwo = ['OCNLI', 'C3', 'COLA', 'QQP', 'MNLI', 'QNLI', 'WNLI', 'RTE', 'MRPC']
175
+
176
+ category_one_dict = {'Zero Shot': 'zero_shot',
177
+ 'Few Shot': 'few_shot'}
178
+
179
+ left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
180
+ with left:
181
+ category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
182
+ with center:
183
+ category_two = st.selectbox('Select the sub-category', filters_leveltwo)
184
+ with right:
185
+ sorted = st.selectbox('sorted by', ['Ascending', 'Descending'])
186
+
187
+ if category_one or category_two or sorted:
188
+ category_one = category_one_dict[category_one]
189
+ draw_only_acc('fundamental_nlp_tasks', category_one, category_two, sorted)
190
+ else:
191
+ draw_only_acc('fundamental_nlp_tasks', 'zero_shot', 'OCNLI', 'Descending')