SeaEval_Leaderboard / app /draw_diagram.py
zhuohan-7's picture
Upload functions
6d0d847 verified
raw
history blame
18.7 kB
import streamlit as st
import pandas as pd
import numpy as np
from streamlit_echarts import st_echarts
# from streamlit_echarts import JsCode
from streamlit_javascript import st_javascript
# from PIL import Image
links_dic = {
"Meta-Llama-3-8B-Instruct": 'https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct',
"Meta-Llama-3-70B-Instruct": 'https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct',
"Meta-Llama-3-8B": "https://huggingface.co/meta-llama/Meta-Llama-3-8B"
}
# huggingface_image = Image.open('style/huggingface.jpg')
def nav_to(url):
# print(url)
js = f'window.open("{url}", "_blank").then(r => window.parent.location.href);'
st_javascript(js)
# nav_script = """
# <meta http-equiv="refresh" content="0; url='%s'">
# """ % (url)
# st.write(nav_script, unsafe_allow_html=True)
def highlight_table_line(model_name):
st.write(model_name)
def draw_cross_lingual(category_one, category_two, sort, sorted):
folder = "./results/cross_lingual/"
subtitle = ''
data_path = f'{folder}/{category_one}/{category_two}.csv'
chart_data = pd.read_csv(data_path).dropna(axis='columns').round(2)
if sorted == 'Ascending':
ascend = True
else:
ascend = False
chart_data = chart_data.sort_values(by=[sort], ascending=ascend)
min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1)
max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1)
if category_two in ['cross_mmlu', 'cross_logiqa']:
# print(category_two)
if category_two == 'cross_mmlu':
subtitle = 'Cross-MMLU'
elif category_two == 'cross_logiqa':
subtitle = 'Cross-LogiQA'
options = {
"title": {"text": f"{subtitle}"},
"tooltip": {
"trigger": "axis",
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
"triggerOn": 'mousemove',
},
"legend": {"data": ['Overall Accuracy','Cross-Lingual Consistency', 'AC3',
'English', 'Chinese', 'Spanish', 'Vietnamese', 'Indonesian', 'Malay', 'Filipino']},
"toolbox": {"feature": {"saveAsImage": {}}},
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
"xAxis": [
{
"type": "category",
"boundaryGap": False,
"triggerEvent": True,
"data": chart_data['Model'].tolist(),
}
],
"yAxis": [{"type": "value",
"min": min_value,
"max": max_value,
# "splitNumber": 10
}],
"series": [
{
"name": "Overall Accuracy",
"type": "line",
"data": chart_data['Accuracy'].tolist(),
},
{
"name": "Cross-Lingual Consistency",
"type": "line",
"data": chart_data["Cross-Lingual Consistency"].tolist(),
},
{
"name": "AC3",
"type": "line",
"data": chart_data["AC3"].tolist(),
},
{
"name": "English",
"type": "line",
"data": chart_data["English"].tolist(),
},
{
"name": "Chinese",
"type": "line",
"data": chart_data["Chinese"].tolist(),
},
{
"name": "Spanish",
"type": "line",
"data": chart_data["Spanish"].tolist(),
},
{
"name": "Vietnamese",
"type": "line",
"data": chart_data["Vietnamese"].tolist(),
},
{
"name": "Indonesian",
"type": "line",
"data": chart_data["Indonesian"].tolist(),
},
{
"name": "Malay",
"type": "line",
"data": chart_data["Malay"].tolist(),
},
{
"name": "Filipino",
"type": "line",
"data": chart_data["Filipino"].tolist(),
},
],
}
events = {
"click": "function(params) { return params.value }",
# "dblclick": "function(params) { return params.value }"
}
value = st_echarts(options=options, events=events, height="500px")
if value != None:
# print(value)
nav_to(links_dic[value])
# if value != None:
# highlight_table_line(value)
elif category_two == 'cross_xquad':
subtitle = 'Cross-XQUAD'
options = {
"title": {"text": f"{subtitle}"},
"tooltip": {
"trigger": "axis",
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
"triggerOn": 'mousemove',
},
"legend": {"data": ['Overall Accuracy','Cross-Lingual Consistency', 'AC3',
'English', 'Chinese', 'Spanish', 'Vietnamese', 'Indonesian', 'Malay', 'Filipino']},
"toolbox": {"feature": {"saveAsImage": {}}},
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
"xAxis": [
{
"type": "category",
"boundaryGap": False,
"data": chart_data['Model'].tolist(),
}
],
"yAxis": [{"type": "value",
"min": min_value,
"max": max_value,
# "splitNumber": 10
}],
"series": [
{
"name": "Overall Accuracy",
"type": "line",
"data": chart_data['Accuracy'].tolist(),
},
{
"name": "Cross-Lingual Consistency",
"type": "line",
"data": chart_data["Cross-Lingual Consistency"].tolist(),
},
{
"name": "AC3",
"type": "line",
"data": chart_data["AC3"].tolist(),
},
{
"name": "English",
"type": "line",
"data": chart_data["English"].tolist(),
},
{
"name": "Chinese",
"type": "line",
"data": chart_data["Chinese"].tolist(),
},
{
"name": "Spanish",
"type": "line",
"data": chart_data["Spanish"].tolist(),
},
{
"name": "Vietnamese",
"type": "line",
"data": chart_data["Vietnamese"].tolist(),
},
],
}
events = {
"click": "function(params) { return params.value }"
}
value = st_echarts(options=options, events=events, height="500px")
if value != None:
# print(value)
nav_to(links_dic[value])
# if value != None:
# highlight_table_line(value)
### create table
st.divider()
# chart_data['Link'] = chart_data['Model'].map(links_dic)
st.dataframe(chart_data,
# column_config = {
# "Link": st.column_config.LinkColumn(
# display_text= st.image(huggingface_image)
# ),
# },
hide_index = True,
use_container_width=True)
def draw_only_acc(folder_name, category_one, category_two, sorted):
# Cultural Reasonling / General Reasoning / Emotion / Fundamental NLP Tasks
folder = f"./results/{folder_name}/"
category_two_dict = {}
if folder_name == 'cultural_reasoning':
category_two_dict = {'SG EVAL': 'sg_eval',
'US EVAL': 'us_eval',
'CN EVAL': 'cn_eval',
'PH EVAL': 'ph_eval'}
elif folder_name == 'general_reasoning':
category_two_dict = {'MMLU': 'mmlu',
'C Eval': 'c_eval',
'CMMLU': 'cmmlu',
'ZBench': 'zbench',
'IndoMMLU': 'indommlu'}
elif folder_name == 'emotion':
category_two_dict = {'Indonesian Emotion Classification': 'ind_emotion',
'SST2': 'sst2'}
elif folder_name == 'fundamental_nlp_tasks':
category_two_dict = {'OCNLI': 'ocnli',
'C3': 'c3',
'COLA': 'cola',
'QQP': 'qqp',
'MNLI': 'mnli',
'QNLI': 'qnli',
'WNLI': 'wnli',
'RTE': 'rte',
'MRPC': 'mrpc'}
subtitle = category_two_dict[category_two]
data_path = f'{folder}/{category_one}/{subtitle}.csv'
chart_data = pd.read_csv(data_path).round(2)
if sorted == 'Ascending':
ascend = True
else:
ascend = False
chart_data = chart_data.sort_values(by=['Accuracy'], ascending=ascend)
min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1)
max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1)
options = {
"title": {"text": f"{category_two}"},
"tooltip": {
"trigger": "axis",
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
"triggerOn": 'mousemove',
},
"legend": {"data": ['Overall Accuracy']},
"toolbox": {"feature": {"saveAsImage": {}}},
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
"xAxis": [
{
"type": "category",
"boundaryGap": False,
"triggerEvent": True,
"data": chart_data['Model'].tolist(),
}
],
"yAxis": [{"type": "value",
"min": min_value,
"max": max_value,
# "splitNumber": 10
}],
"series": [
{
"name": "Overall Accuracy",
"type": "line",
"data": chart_data['Accuracy'].tolist(),
},
],
}
events = {
"click": "function(params) { return params.value }"
}
value = st_echarts(options=options, events=events, height="500px")
if value != None:
# print(value)
nav_to(links_dic[value])
# if value != None:
# highlight_table_line(value)
### create table
st.divider()
# chart_data['Link'] = chart_data['Model'].map(links_dic)
st.dataframe(chart_data,
# column_config = {
# "Link": st.column_config.LinkColumn(
# display_text= st.image(huggingface_image)
# ),
# },
hide_index = True,
use_container_width=True)
def draw_flores_translation(category_one, category_two, sorted):
folder = "./results/flores_translation/"
category_two_dict = {'Indonesian to English': 'ind2eng',
'Vitenamese to English': 'vie2eng',
'Chinese to English': 'zho2eng',
'Nalay to English': 'zsm2eng'}
subtitle = category_two_dict[category_two]
data_path = f'{folder}/{category_one}/{subtitle}.csv'
chart_data = pd.read_csv(data_path).round(2)
if sorted == 'Ascending':
ascend = True
else:
ascend = False
chart_data = chart_data.sort_values(by=['BLEU'], ascending=ascend)
min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1)
max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1)
options = {
"title": {"text": f"{category_two}"},
"tooltip": {
"trigger": "axis",
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
"triggerOn": 'mousemove',
},
"legend": {"data": ['BLEU']},
"toolbox": {"feature": {"saveAsImage": {}}},
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
"xAxis": [
{
"type": "category",
"boundaryGap": False,
"triggerEvent": True,
"data": chart_data['Model'].tolist(),
}
],
"yAxis": [{"type": "value",
"min": min_value,
"max": max_value,
# "splitNumber": 10
}],
"series": [
{
"name": "BLEU",
"type": "line",
"data": chart_data['BLEU'].tolist(),
},
],
}
events = {
"click": "function(params) { return params.value }"
}
value = st_echarts(options=options, events=events, height="500px")
if value != None:
# print(value)
nav_to(links_dic[value])
### create table
st.divider()
# chart_data['Link'] = chart_data['Model'].map(links_dic)
st.dataframe(chart_data,
# column_config = {
# "Link": st.column_config.LinkColumn(
# display_text= st.image(huggingface_image)
# ),
# },
hide_index = True,
use_container_width=True)
def draw_dialogue(category_one, category_two, sort, sorted):
folder = "./results/dialogue"
category_two_dict = {'DREAM': 'dream',
'SAMSum': 'samsum',
'DialogSum': 'dialogsum'}
subtitle = category_two_dict[category_two]
data_path = f'{folder}/{category_one}/{subtitle}.csv'
chart_data = pd.read_csv(data_path).round(2)
if sorted == 'Ascending':
ascend = True
else:
ascend = False
chart_data = chart_data.sort_values(by=[sort], ascending=ascend)
min_value = round(chart_data.iloc[:, 1::].min().min() - 0.1, 1)
max_value = round(chart_data.iloc[:, 1::].max().max() + 0.1, 1)
options = {}
if category_two in ['SAMSum', 'DialogSum']:
options = {
"title": {"text": f"{category_two}"},
"tooltip": {
"trigger": "axis",
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
"triggerOn": 'mousemove',
},
"legend": {"data": list(chart_data.columns)},
"toolbox": {"feature": {"saveAsImage": {}}},
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
"xAxis": [
{
"type": "category",
"boundaryGap": False,
"triggerEvent": True,
"data": chart_data['Model'].tolist(),
}
],
"yAxis": [{"type": "value",
"min": min_value,
"max": max_value,
# "splitNumber": 10
}],
"series": [
{
"name": "Average",
"type": "line",
"data": chart_data['Average'].tolist(),
},
{
"name": "ROUGE-1",
"type": "line",
"data": chart_data["ROUGE-1"].tolist(),
},
{
"name": "ROUGE-2",
"type": "line",
"data": chart_data["ROUGE-2"].tolist(),
},
{
"name": "ROUGE-L",
"type": "line",
"data": chart_data["ROUGE-L"].tolist(),
},
],
}
elif category_two == 'DREAM':
options = {
"title": {"text": f"{category_two}"},
"tooltip": {
"trigger": "axis",
"axisPointer": {"type": "cross", "label": {"backgroundColor": "#6a7985"}},
"triggerOn": 'mousemove',
},
"legend": {"data": list(chart_data.columns)},
"toolbox": {"feature": {"saveAsImage": {}}},
"grid": {"left": "3%", "right": "4%", "bottom": "3%", "containLabel": True},
"xAxis": [
{
"type": "category",
"boundaryGap": False,
"triggerEvent": True,
"data": chart_data['Model'].tolist(),
}
],
"yAxis": [{"type": "value",
"min": min_value,
"max": max_value,
# "splitNumber": 10
}],
"series": [
{
"name": "Accuracy",
"type": "line",
"data": chart_data['Accuracy'].tolist(),
},
],
}
events = {
"click": "function(params) { return params.value }"
}
value = st_echarts(options=options, events=events, height="500px")
if value != None:
# print(value)
nav_to(links_dic[value])
### create table
st.divider()
# chart_data['Link'] = chart_data['Model'].map(links_dic)
st.dataframe(chart_data,
# column_config = {
# "Link": st.column_config.LinkColumn(
# display_text= st.image(huggingface_image)
# ),
# },
hide_index = True,
use_container_width=True)