import streamlit as st
from app.draw_diagram import *

def dashboard():

    with st.container():
        st.title("SeaEval")
   
        st.markdown("""
            [gh]: https://github.com/SeaEval/SeaEval
            [![GitHub watchers](https://img.shields.io/github/watchers/SeaEval/SeaEval?style=social)][gh]
            [![GitHub Repo stars](https://img.shields.io/github/stars/SeaEval/SeaEval?style=social)][gh]
            """)

    seaeval_url = "https://seaeval.github.io/"

    st.divider()
    st.markdown("#### What is [SeaEval](%s)?" % seaeval_url)

    with st.container():
        left_co, cent_co,last_co = st.columns(3)
        with cent_co:
            st.image("./style/seaeval_overall.png", 
                    #  caption="SeaEval data range", 
                    width=500)
        st.markdown('''

                    ''')
        st.markdown("##### A new benchmark for multilingual, multicultral foundation model evaluation consisting of 28 dataset as the core and keep expanding over time.")
        st.markdown(''':star: How models understand and reason with natural language?    
                    :balloon: Languages: English, Chinese, Malay, Spainish, Indonedian, Vietnamese, Filipino.
                    ''')
        
        st.markdown(''':star: How models comprehend cultural practices, nuances and values?    
                    :balloon: 4 new datasets on Cultural Understanding.
                    ''')

        st.markdown(''':star: How models perform across languages in terms of consistency?    
                    :balloon: 2 new datasets with curated metrics for Cross-Linugal Consistency.
                    ''')
    with st.container():
        left_co, cent_co,last_co = st.columns(3)
        with cent_co:
            st.image("./style/consistency.png", 
                    #  caption="SeaEval data range", 
                    width=500)
        st.markdown("##### Evaluation with enhanced cross-lingual capabilities.")
        st.markdown(''':star: How models perform according to different (paraphrased) instructions?    
                    :balloon: Each dataset is equipped with 5 different prompts to avoid randomness introduced by instructions,
                    which is non-negligible..
                    ''')
        
        st.markdown(''':star: Multilingual accuracy and performance consistency across languages.    
                    :balloon: If you can answer the question in your native language, can you answer the same question 
                    correctly in your second/third language?
                    ''')

    st.divider()
    with st.container():
        st.markdown("##### Citations")

        st.markdown('''
                    :round_pushpin: SeaEval Paper \n
                        @article{SeaEval,
                        title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
                        author={Wang, Bin and Liu, Zhengyuan and Huang, Xin and Jiao, Fangkai and Ding, Yang and Aw, Ai Ti and Chen, Nancy F.},
                        journal={NAACL},
                        year={2024}
                        }
                    ''')

def cross_lingual_consistency():
    st.title("Cross-Lingual Consistency")
    
    filters_levelone = ['Zero Shot', 'Few Shot']
    filters_leveltwo = ['Cross-MMLU', 'Cross-XQUAD', 'Cross-LogiQA']
    
    category_one_dict = {'Zero Shot': 'zero_shot', 
                         'Few Shot': 'few_shot'}
    category_two_dict = {'Cross-MMLU': 'cross_mmlu', 
                         'Cross-XQUAD': 'cross_xquad', 
                         'Cross-LogiQA': 'cross_logiqa'}

    left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
    with left:
        category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
    with center:
        category_two = st.selectbox('Select the sub-category', filters_leveltwo)
    with middle:
        sort = st.selectbox('Sort', ['Accuracy','Cross-Lingual Consistency', 'AC3',
                                     'English', 'Chinese', 'Spanish', 'Vietnamese'])
    with right:
        sortby = st.selectbox('by', ['Ascending', 'Descending'])

    if category_one or category_two or sort or sortby:
        category_one = category_one_dict[category_one]
        category_two = category_two_dict[category_two]
        
        draw('cross_lingual', category_one, category_two, sort, sortby)
    # else:
    #     draw('zero_shot', 'cross_mmlu', 'Accuracy', 'Descending')

def cultural_reasoning():
    st.title("Cultural Reasoning")

    filters_levelone = ['Zero Shot', 'Few Shot']
    filters_leveltwo = [
                        'SG EVAL V2 MCQ',
                        'SG EVAL V2 Open Ended',
                        'SG EVAL', 
                        'SG EVAL V1 Cleaned',
                        'CN EVAL', 
                        'PH EVAL', 
                        'US EVAL'
                        ]
    
    category_one_dict = {'Zero Shot': 'zero_shot', 
                         'Few Shot': 'few_shot'}
    category_two_dict = {'SG EVAL': 'sg_eval',
                         'SG EVAL V1 Cleaned': 'sg_eval_v1_cleaned',
                         'SG EVAL V2 MCQ': 'sg_eval_v2_mcq',
                         'SG EVAL V2 Open Ended': 'sg_eval_v2_open',
                         'US EVAL': 'us_eval', 
                         'CN EVAL': 'cn_eval', 
                         'PH EVAL': 'ph_eval'}

    left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
    with left:
        category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
    with center:
        category_two = st.selectbox('Select the sub-category', filters_leveltwo)
    with right:
        sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
    
    if category_one or category_two or sortby:
        category_one = category_one_dict[category_one]
        category_two = category_two_dict[category_two]
        draw('cultural_reasoning', category_one, category_two, 'Accuracy',sortby)
    # else:
    #     draw_only_acc('cultural_reasoning', 'zero_shot', 'sg_eval', 'Descending')


def general_reasoning():
    st.title("General Reasoning")

    filters_levelone = ['Zero Shot', 'Few Shot']
    filters_leveltwo = [
                        'MMLU', 
                        'CMMLU', 
                        'IndoMMLU',
                        'C Eval', 
                        'ZBench', 
                        ]
    
    category_one_dict = {'Zero Shot': 'zero_shot', 
                         'Few Shot': 'few_shot'}
    category_two_dict = {'MMLU': 'mmlu', 
                         'C Eval': 'c_eval', 
                         'CMMLU': 'cmmlu', 
                         'ZBench': 'zbench', 
                         'IndoMMLU': 'indommlu'}

    left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
    with left:
        category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
    with center:
        category_two = st.selectbox('Select the sub-category', filters_leveltwo)
    with right:
        sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
    
    if category_one or category_two or sortby:
        category_one = category_one_dict[category_one]
        category_two = category_two_dict[category_two]
        draw('general_reasoning', category_one, category_two, 'Accuracy',sortby)
    # else:
    #     draw_only_acc('general_reasoning', 'zero_shot', 'MMLU Full', 'Descending')

def flores():
    st.title("FLORES-Translation")

    filters_levelone = ['Zero Shot', 'Few Shot']
    filters_leveltwo = ['Indonesian to English', 
                        'Vitenamese to English', 
                        'Chinese to English', 
                        'Malay to English'
                        ]
    
    category_one_dict = {'Zero Shot': 'zero_shot', 
                         'Few Shot': 'few_shot'}
    category_two_dict = {'Indonesian to English': 'ind2eng', 
                         'Vitenamese to English': 'vie2eng', 
                         'Chinese to English': 'zho2eng', 
                         'Malay to English': 'zsm2eng'}
    

    left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
    with left:
        category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
    with center:
        category_two = st.selectbox('Select the sub-category', filters_leveltwo)
    with right:
        sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
    
    if category_one or category_two or sortby:
        category_one = category_one_dict[category_one]
        category_two = category_two_dict[category_two]
        draw('flores_translation', category_one, category_two, 'BLEU',sortby)
    # else:
    #     draw_flores_translation('zero_shot', 'Indonesian to English', 'Descending')

def emotion():
    st.title("Emotion")

    filters_levelone = ['Zero Shot', 'Few Shot']
    filters_leveltwo = [
                        'Indonesian Emotion  Classification', 
                        'SST2',
                        ]
    
    category_one_dict = {'Zero Shot': 'zero_shot', 
                         'Few Shot': 'few_shot'}
    category_two_dict = {'Indonesian Emotion  Classification': 'ind_emotion', 
                             'SST2': 'sst2'}

    left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
    with left:
        category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
    with center:
        category_two = st.selectbox('Select the sub-category', filters_leveltwo)
    with right:
        sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
    
    if category_one or category_two or sortby:
        category_one = category_one_dict[category_one]
        category_two = category_two_dict[category_two]
        draw('emotion', category_one, category_two, 'Accuracy', sortby)
    # else:
    #     draw_only_acc('emotion', 'zero_shot', 'Indonesian Emotion  Classification', 'Descending')

def dialogue():
    st.title("Dialogue")

    filters_levelone = ['Zero Shot', 'Few Shot']
    filters_leveltwo = [
                        'DREAM', 
                        'SAMSum', 
                        'DialogSum',
                        ]
    
    category_one_dict = {'Zero Shot': 'zero_shot', 
                         'Few Shot': 'few_shot'}
    category_two_dict = {'DREAM': 'dream', 
                         'SAMSum': 'samsum', 
                         'DialogSum': 'dialogsum'}

    left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
    with left:
        category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
    with center:
        category_two = st.selectbox('Select the sub-category', filters_leveltwo)
    with middle:
        if category_two == 'DREAM':
            sort = st.selectbox('Sort', ['Accuracy'])
        else:
            sort = st.selectbox('Sort', ['Average', 'ROUGE-1', 'ROUGE-2', 'ROUGE-L'])
    
    with right:
        sortby = st.selectbox('by', ['Ascending', 'Descending'])
    
    if category_one or category_two or sort or sortby:
        category_one = category_one_dict[category_one]
        category_two = category_two_dict[category_two]
        draw('dialogue', category_one, category_two, sort, sortby)
    # else:
    #     draw_dialogue('zero_shot', 'DREAM', sort[0],'Descending')

def fundamental_nlp_tasks():
    st.title("Fundamental NLP Tasks")

    filters_levelone = ['Zero Shot', 'Few Shot']
    filters_leveltwo = ['OCNLI', 'C3', 'COLA', 'QQP', 'MNLI', 'QNLI', 'WNLI', 'RTE', 'MRPC']
    
    category_one_dict = {'Zero Shot': 'zero_shot', 
                         'Few Shot': 'few_shot'}
    category_two_dict = {'OCNLI': 'ocnli', 
                        'C3': 'c3', 
                        'COLA': 'cola', 
                        'QQP': 'qqp', 
                        'MNLI': 'mnli', 
                        'QNLI': 'qnli', 
                        'WNLI': 'wnli', 
                        'RTE': 'rte', 
                        'MRPC': 'mrpc'}

    left, center, _, right = st.columns([0.2, 0.2, 0.4, 0.2])
    with left:
        category_one = st.selectbox('Select Zero / Few shot', filters_levelone)
    with center:
        category_two = st.selectbox('Select the sub-category', filters_leveltwo)
    with right:
        sortby = st.selectbox('sorted by', ['Ascending', 'Descending'])
    
    if category_one or category_two or sortby:
        category_one = category_one_dict[category_one]
        category_two = category_two_dict[category_two]
        draw('fundamental_nlp_tasks', category_one, category_two, 'Accuracy', sortby)
    # else:
    #     draw_only_acc('fundamental_nlp_tasks', 'zero_shot', 'OCNLI', 'Descending')