Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from streamlit_echarts import st_echarts | |
# from streamlit_echarts import JsCode | |
from streamlit_javascript import st_javascript | |
# from PIL import Image | |
# links_dic = {"random": "https://seaeval.github.io/", | |
# "meta_llama_3_8b": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", | |
# "mistral_7b_instruct_v0_2": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2", | |
# "sailor_0_5b": "https://huggingface.co/sail/Sailor-0.5B", | |
# "sailor_1_8b": "https://huggingface.co/sail/Sailor-1.8B", | |
# "sailor_4b": "https://huggingface.co/sail/Sailor-4B", | |
# "sailor_7b": "https://huggingface.co/sail/Sailor-7B", | |
# "sailor_0_5b_chat": "https://huggingface.co/sail/Sailor-0.5B-Chat", | |
# "sailor_1_8b_chat": "https://huggingface.co/sail/Sailor-1.8B-Chat", | |
# "sailor_4b_chat": "https://huggingface.co/sail/Sailor-4B-Chat", | |
# "sailor_7b_chat": "https://huggingface.co/sail/Sailor-7B-Chat", | |
# "sea_mistral_highest_acc_inst_7b": "https://seaeval.github.io/", | |
# "meta_llama_3_8b_instruct": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct", | |
# "flan_t5_base": "https://huggingface.co/google/flan-t5-base", | |
# "flan_t5_large": "https://huggingface.co/google/flan-t5-large", | |
# "flan_t5_xl": "https://huggingface.co/google/flan-t5-xl", | |
# "flan_t5_xxl": "https://huggingface.co/google/flan-t5-xxl", | |
# "flan_ul2": "https://huggingface.co/google/flan-t5-ul2", | |
# "flan_t5_small": "https://huggingface.co/google/flan-t5-small", | |
# "mt0_xxl": "https://huggingface.co/bigscience/mt0-xxl", | |
# "seallm_7b_v2": "https://huggingface.co/SeaLLMs/SeaLLM-7B-v2", | |
# "gpt_35_turbo_1106": "https://openai.com/blog/chatgpt", | |
# "meta_llama_3_70b": "https://huggingface.co/meta-llama/Meta-Llama-3-70B", | |
# "meta_llama_3_70b_instruct": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct", | |
# "sea_lion_3b": "https://huggingface.co/aisingapore/sea-lion-3b", | |
# "sea_lion_7b": "https://huggingface.co/aisingapore/sea-lion-7b", | |
# "qwen1_5_110b": "https://huggingface.co/Qwen/Qwen1.5-110B", | |
# "qwen1_5_110b_chat": "https://huggingface.co/Qwen/Qwen1.5-110B-Chat", | |
# "llama_2_7b_chat": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", | |
# "gpt4_1106_preview": "https://openai.com/blog/chatgpt", | |
# "gemma_2b": "https://huggingface.co/google/gemma-2b", | |
# "gemma_7b": "https://huggingface.co/google/gemma-7b", | |
# "gemma_2b_it": "https://huggingface.co/google/gemma-2b-it", | |
# "gemma_7b_it": "https://huggingface.co/google/gemma-7b-it", | |
# "qwen_1_5_7b": "https://huggingface.co/Qwen/Qwen1.5-7B", | |
# "qwen_1_5_7b_chat": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat", | |
# "sea_lion_7b_instruct": "https://huggingface.co/aisingapore/sea-lion-7b-instruct", | |
# "sea_lion_7b_instruct_research": "https://huggingface.co/aisingapore/sea-lion-7b-instruct-research", | |
# "LLaMA_3_Merlion_8B": "https://seaeval.github.io/", | |
# "LLaMA_3_Merlion_8B_v1_1": "https://seaeval.github.io/"} | |
# links_dic = {k.lower().replace('_', '-') : v for k, v in links_dic.items()} | |
# # huggingface_image = Image.open('style/huggingface.jpg') | |
# def nav_to(value): | |
# try: | |
# url = links_dic[str(value).lower()] | |
# js = f'window.open("{url}", "_blank").then(r => window.parent.location.href);' | |
# st_javascript(js) | |
# except: | |
# pass | |
# # nav_script = """ | |
# # <meta http-equiv="refresh" content="0; url='%s'"> | |
# # """ % (url) | |
# # st.write(nav_script, unsafe_allow_html=True) | |
# def highlight_table_line(model_name): | |
# st.write(model_name) | |
def draw_cross_lingual(category_one, category_two, sort, sorted): | |
folder = "./results/cross_lingual/" | |
subtitle = '' | |
data_path = f'{folder}/{category_one}/{category_two}.csv' | |
chart_data = pd.read_csv(data_path).dropna(axis='columns').round(3) | |
if sorted == 'Ascending': | |
ascend = True | |
else: | |
ascend = False | |
chart_data = chart_data.sort_values(by=[sort], ascending=ascend) | |
min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1) | |
max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1) | |
st.markdown(""" | |
<style> | |
.stMultiSelect [data-baseweb=select] span{ | |
max-width: 800px; | |
font-size: 0.9rem; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
models = st.multiselect("Please choose the models", chart_data['Model'].tolist(), default = chart_data['Model'].tolist()) | |
if category_two in ['cross_mmlu', 'cross_logiqa']: | |
# print(category_two) | |
if category_two == 'cross_mmlu': | |
subtitle = 'Cross-MMLU' | |
elif category_two == 'cross_logiqa': | |
subtitle = 'Cross-LogiQA' | |
options = { | |
"title": {"text": f"{subtitle}"}, | |
"tooltip": { | |
"trigger": "axis", | |
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}}, | |
"triggerOn": 'mousemove', | |
}, | |
"legend": {"data": ['Overall Accuracy','Cross-Lingual Consistency', 'AC3', | |
'English', 'Chinese', 'Spanish', 'Vietnamese', 'Indonesian', 'Malay', 'Filipino']}, | |
"toolbox": {"feature": {"saveAsImage": {}}}, | |
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True}, | |
"xAxis": [ | |
{ | |
"type": "category", | |
"boundaryGap": True, | |
"triggerEvent": True, | |
"data": models, | |
} | |
], | |
"yAxis": [{"type": "value", | |
"min": min_value, | |
"max": max_value, | |
"boundaryGap": True | |
# "splitNumber": 10 | |
}], | |
"series": [ | |
{ | |
"name": "Overall Accuracy", | |
"type": "bar", # "line" | |
"data": chart_data['Accuracy'].tolist(), | |
}, | |
{ | |
"name": "Cross-Lingual Consistency", | |
"type": "bar", | |
"data": chart_data["Cross-Lingual Consistency"].tolist(), | |
}, | |
{ | |
"name": "AC3", | |
"type": "bar", | |
"data": chart_data["AC3"].tolist(), | |
}, | |
{ | |
"name": "English", | |
"type": "bar", | |
"data": chart_data["English"].tolist(), | |
}, | |
{ | |
"name": "Chinese", | |
"type": "bar", | |
"data": chart_data["Chinese"].tolist(), | |
}, | |
{ | |
"name": "Spanish", | |
"type": "bar", | |
"data": chart_data["Spanish"].tolist(), | |
}, | |
{ | |
"name": "Vietnamese", | |
"type": "bar", | |
"data": chart_data["Vietnamese"].tolist(), | |
}, | |
{ | |
"name": "Indonesian", | |
"type": "bar", | |
"data": chart_data["Indonesian"].tolist(), | |
}, | |
{ | |
"name": "Malay", | |
"type": "bar", | |
"data": chart_data["Malay"].tolist(), | |
}, | |
{ | |
"name": "Filipino", | |
"type": "bar", | |
"data": chart_data["Filipino"].tolist(), | |
}, | |
], | |
} | |
# events = { | |
# "click": "function(params) { return params.value }", | |
# # "dblclick": "function(params) { return params.value }" | |
# } | |
value = st_echarts(options=options, height="500px") #events=events, | |
# if value != None: | |
# # print(value) | |
# nav_to(value) | |
# if value != None: | |
# highlight_table_line(value) | |
elif category_two == 'cross_xquad': | |
subtitle = 'Cross-XQUAD' | |
options = { | |
"title": {"text": f"{subtitle}"}, | |
"tooltip": { | |
"trigger": "axis", | |
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}}, | |
"triggerOn": 'mousemove', | |
}, | |
"legend": {"data": ['Overall Accuracy','Cross-Lingual Consistency', 'AC3', | |
'English', 'Chinese', 'Spanish', 'Vietnamese', 'Indonesian', 'Malay', 'Filipino']}, | |
"toolbox": {"feature": {"saveAsImage": {}}}, | |
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True}, | |
"xAxis": [ | |
{ | |
"type": "category", | |
"boundaryGap": True, | |
"data": models, | |
} | |
], | |
"yAxis": [{"type": "value", | |
"min": min_value, | |
"max": max_value, | |
"boundaryGap": True | |
# "splitNumber": 10 | |
}], | |
"series": [ | |
{ | |
"name": "Overall Accuracy", | |
"type": "bar", | |
"data": chart_data['Accuracy'].tolist(), | |
}, | |
{ | |
"name": "Cross-Lingual Consistency", | |
"type": "bar", | |
"data": chart_data["Cross-Lingual Consistency"].tolist(), | |
}, | |
{ | |
"name": "AC3", | |
"type": "bar", | |
"data": chart_data["AC3"].tolist(), | |
}, | |
{ | |
"name": "English", | |
"type": "bar", | |
"data": chart_data["English"].tolist(), | |
}, | |
{ | |
"name": "Chinese", | |
"type": "bar", | |
"data": chart_data["Chinese"].tolist(), | |
}, | |
{ | |
"name": "Spanish", | |
"type": "bar", | |
"data": chart_data["Spanish"].tolist(), | |
}, | |
{ | |
"name": "Vietnamese", | |
"type": "bar", | |
"data": chart_data["Vietnamese"].tolist(), | |
}, | |
], | |
} | |
# events = { | |
# "click": "function(params) { return params.value }" | |
# } | |
value = st_echarts(options=options, height="500px") | |
# if value != None: | |
# # print(value) | |
# nav_to(value) | |
# if value != None: | |
# highlight_table_line(value) | |
### create table | |
st.divider() | |
# chart_data['Link'] = chart_data['Model'].map(links_dic) | |
st.dataframe(chart_data, | |
# column_config = { | |
# "Link": st.column_config.LinkColumn( | |
# display_text= st.image(huggingface_image) | |
# ), | |
# }, | |
hide_index = True, | |
use_container_width=True) | |
def draw_only_acc(folder_name, category_one, category_two, sorted): | |
# Cultural Reasonling / General Reasoning / Emotion / Fundamental NLP Tasks | |
folder = f"./results/{folder_name}/" | |
category_two_dict = {} | |
if folder_name == 'cultural_reasoning': | |
category_two_dict = {'SG EVAL': 'sg_eval', | |
'SG EVAL V1 Cleaned': 'sg_eval_v1_cleaned', | |
'SG EVAL V2 MCQ': 'sg_eval_v2_mcq', | |
'SG EVAL V2 Open Ended': 'sg_eval_v2_open', | |
'US EVAL': 'us_eval', | |
'CN EVAL': 'cn_eval', | |
'PH EVAL': 'ph_eval'} | |
elif folder_name == 'general_reasoning': | |
category_two_dict = {'MMLU': 'mmlu', | |
'C Eval': 'c_eval', | |
'CMMLU': 'cmmlu', | |
'ZBench': 'zbench', | |
'IndoMMLU': 'indommlu'} | |
elif folder_name == 'emotion': | |
category_two_dict = {'Indonesian Emotion Classification': 'ind_emotion', | |
'SST2': 'sst2'} | |
elif folder_name == 'fundamental_nlp_tasks': | |
category_two_dict = {'OCNLI': 'ocnli', | |
'C3': 'c3', | |
'COLA': 'cola', | |
'QQP': 'qqp', | |
'MNLI': 'mnli', | |
'QNLI': 'qnli', | |
'WNLI': 'wnli', | |
'RTE': 'rte', | |
'MRPC': 'mrpc'} | |
subtitle = category_two_dict[category_two] | |
data_path = f'{folder}/{category_one}/{subtitle}.csv' | |
chart_data = pd.read_csv(data_path).round(3) | |
st.markdown(""" | |
<style> | |
.stMultiSelect [data-baseweb=select] span{ | |
max-width: 800px; | |
font-size: 0.9rem; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
models = st.multiselect("Please choose the models", chart_data['Model'].tolist(), default = chart_data['Model'].tolist()) | |
if sorted == 'Ascending': | |
ascend = True | |
else: | |
ascend = False | |
chart_data = chart_data.sort_values(by=['Accuracy'], ascending=ascend) | |
min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1) | |
max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1) | |
options = { | |
"title": {"text": f"{category_two}"}, | |
"tooltip": { | |
"trigger": "axis", | |
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}}, | |
"triggerOn": 'mousemove', | |
}, | |
"legend": {"data": ['Overall Accuracy']}, | |
"toolbox": {"feature": {"saveAsImage": {}}}, | |
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True}, | |
"xAxis": [ | |
{ | |
"type": "category", | |
"boundaryGap": True, | |
"triggerEvent": True, | |
"data": models, | |
} | |
], | |
"yAxis": [{"type": "value", | |
"min": min_value, | |
"max": max_value, | |
"boundaryGap": True | |
# "splitNumber": 10 | |
}], | |
"series": [ | |
{ | |
"name": "Overall Accuracy", | |
"type": "bar", | |
"data": chart_data['Accuracy'].tolist(), | |
}, | |
], | |
} | |
# events = { | |
# "click": "function(params) { return params.value }" | |
# } | |
value = st_echarts(options=options, height="500px") | |
# if value != None: | |
# # print(value) | |
# nav_to(value) | |
# if value != None: | |
# highlight_table_line(value) | |
### create table | |
st.divider() | |
# chart_data['Link'] = chart_data['Model'].map(links_dic) | |
st.dataframe(chart_data, | |
# column_config = { | |
# "Link": st.column_config.LinkColumn( | |
# display_text= st.image(huggingface_image) | |
# ), | |
# }, | |
hide_index = True, | |
use_container_width=True) | |
def draw_flores_translation(category_one, category_two, sorted): | |
folder = "./results/flores_translation/" | |
category_two_dict = {'Indonesian to English': 'ind2eng', | |
'Vitenamese to English': 'vie2eng', | |
'Chinese to English': 'zho2eng', | |
'Malay to English': 'zsm2eng'} | |
subtitle = category_two_dict[category_two] | |
data_path = f'{folder}/{category_one}/{subtitle}.csv' | |
chart_data = pd.read_csv(data_path).round(3) | |
if sorted == 'Ascending': | |
ascend = True | |
else: | |
ascend = False | |
chart_data = chart_data.sort_values(by=['BLEU'], ascending=ascend) | |
min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1) | |
max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1) | |
st.markdown(""" | |
<style> | |
.stMultiSelect [data-baseweb=select] span{ | |
max-width: 800px; | |
font-size: 0.9rem; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
models = st.multiselect("Please choose the models", chart_data['Model'].tolist(), default = chart_data['Model'].tolist()) | |
options = { | |
"title": {"text": f"{category_two}"}, | |
"tooltip": { | |
"trigger": "axis", | |
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}}, | |
"triggerOn": 'mousemove', | |
}, | |
"legend": {"data": ['BLEU']}, | |
"toolbox": {"feature": {"saveAsImage": {}}}, | |
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True}, | |
"xAxis": [ | |
{ | |
"type": "category", | |
"boundaryGap": True, | |
"triggerEvent": True, | |
"data": models, | |
} | |
], | |
"yAxis": [{"type": "value", | |
"min": min_value, | |
"max": max_value, | |
"boundaryGap": True | |
# "splitNumber": 10 | |
}], | |
"series": [ | |
{ | |
"name": "BLEU", | |
"type": "bar", | |
"data": chart_data['BLEU'].tolist(), | |
}, | |
], | |
} | |
# events = { | |
# "click": "function(params) { return params.value }" | |
# } | |
value = st_echarts(options=options, height="500px") | |
# if value != None: | |
# # print(value) | |
# nav_to(value) | |
### create table | |
st.divider() | |
# chart_data['Link'] = chart_data['Model'].map(links_dic) | |
st.dataframe(chart_data, | |
# column_config = { | |
# "Link": st.column_config.LinkColumn( | |
# display_text= st.image(huggingface_image) | |
# ), | |
# }, | |
hide_index = True, | |
use_container_width=True) | |
def draw_dialogue(category_one, category_two, sort, sorted): | |
folder = "./results/dialogue" | |
category_two_dict = {'DREAM': 'dream', | |
'SAMSum': 'samsum', | |
'DialogSum': 'dialogsum'} | |
subtitle = category_two_dict[category_two] | |
data_path = f'{folder}/{category_one}/{subtitle}.csv' | |
chart_data = pd.read_csv(data_path).round(3) | |
st.markdown(""" | |
<style> | |
.stMultiSelect [data-baseweb=select] span{ | |
max-width: 800px; | |
font-size: 0.9rem; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
models = st.multiselect("Please choose the models", chart_data['Model'].tolist(), default = chart_data['Model'].tolist()) | |
if sorted == 'Ascending': | |
ascend = True | |
else: | |
ascend = False | |
chart_data = chart_data.sort_values(by=[sort], ascending=ascend) | |
min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1) | |
max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1) | |
options = {} | |
if category_two in ['SAMSum', 'DialogSum']: | |
options = { | |
"title": {"text": f"{category_two}"}, | |
"tooltip": { | |
"trigger": "axis", | |
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}}, | |
"triggerOn": 'mousemove', | |
}, | |
"legend": {"data": list(chart_data.columns)}, | |
"toolbox": {"feature": {"saveAsImage": {}}}, | |
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True}, | |
"xAxis": [ | |
{ | |
"type": "category", | |
"boundaryGap": True, | |
"triggerEvent": True, | |
"data": models, | |
} | |
], | |
"yAxis": [{"type": "value", | |
"min": min_value, | |
"max": max_value, | |
"boundaryGap": True | |
# "splitNumber": 10 | |
}], | |
"series": [ | |
{ | |
"name": "Average", | |
"type": "bar", | |
"data": chart_data['Average'].tolist(), | |
}, | |
{ | |
"name": "ROUGE-1", | |
"type": "bar", | |
"data": chart_data["ROUGE-1"].tolist(), | |
}, | |
{ | |
"name": "ROUGE-2", | |
"type": "bar", | |
"data": chart_data["ROUGE-2"].tolist(), | |
}, | |
{ | |
"name": "ROUGE-L", | |
"type": "bar", | |
"data": chart_data["ROUGE-L"].tolist(), | |
}, | |
], | |
} | |
elif category_two == 'DREAM': | |
options = { | |
"title": {"text": f"{category_two}"}, | |
"tooltip": { | |
"trigger": "axis", | |
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}}, | |
"triggerOn": 'mousemove', | |
}, | |
"legend": {"data": list(chart_data.columns)}, | |
"toolbox": {"feature": {"saveAsImage": {}}}, | |
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True}, | |
"xAxis": [ | |
{ | |
"type": "category", | |
"boundaryGap": True, | |
"triggerEvent": True, | |
"data": models, | |
} | |
], | |
"yAxis": [{"type": "value", | |
"min": min_value, | |
"max": max_value, | |
# "splitNumber": 10 | |
"boundaryGap": True | |
}], | |
"series": [ | |
{ | |
"name": "Accuracy", | |
"type": "bar", | |
"data": chart_data['Accuracy'].tolist(), | |
}, | |
], | |
} | |
# events = { | |
# "click": "function(params) { return params.value }" | |
# } | |
value = st_echarts(options=options, height="500px") | |
# if value != None: | |
# # print(value) | |
# nav_to(value) | |
### create table | |
st.divider() | |
# chart_data['Link'] = chart_data['Model'].map(links_dic) | |
st.dataframe(chart_data, | |
# column_config = { | |
# "Link": st.column_config.LinkColumn( | |
# display_text= st.image(huggingface_image) | |
# ), | |
# }, | |
hide_index = True, | |
use_container_width=True) |