Spaces:

MERaLiON
/

SeaEval_Leaderboard

Running

App Files Files Community

binwang commited on Apr 25, 2024

Commit

fe2997e

1 Parent(s): 4687701

update leaderboard

Browse files

Files changed (2) hide show

all_results.json +0 -0
app.py +32 -29

all_results.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -2031,89 +2031,92 @@ with block:
         with gr.TabItem("Cross-Lingual Consistency"):
             # dataset 1: cross-mmlu
-            with gr.TabItem("Cross-XQUAD"):
                 with gr.TabItem("Zero Shot"):
                     with gr.TabItem("Overall"):
                         with gr.Row():
-                            cross_xquad_zero_shot_overall = gr.components.Dataframe(
-                                CROSS_XQUAD_ZERO_SHOT_OVERALL,
-                                datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_ZERO_SHOT_OVERALL.columns),
                                 type="pandas",
                             )
                     with gr.TabItem("Language Performance"):
                         with gr.Row():
-                            cross_xquad_zero_shot_overall = gr.components.Dataframe(
-                                CROSS_XQUAD_ZERO_SHOT_LANGUAGE,
-                                datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_ZERO_SHOT_LANGUAGE.columns),
                                 type="pandas",
                             )
                 with gr.TabItem("Five Shot"):
                     with gr.TabItem("Overall"):
                         with gr.Row():
-                            cross_xquad_zero_shot_overall = gr.components.Dataframe(
-                                CROSS_XQUAD_FIVE_SHOT_OVERALL,
-                                datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_FIVE_SHOT_OVERALL.columns),
                                 type="pandas",
                             )
                     with gr.TabItem("Language Performance"):
                         with gr.Row():
                             gr.components.Dataframe(
-                                CROSS_XQUAD_FIVE_SHOT_LANGUAGE,
-                                datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_FIVE_SHOT_LANGUAGE.columns),
                                 type="pandas",
                             )
                 with gr.Row():
                     gr.Markdown("""
-                    **Cross-XQUAD Leaderboard** 🔮
                     - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
-                    - **Languages:** English, Chinese, Spanish, Vietnamese
                     """)
-            # dataset 1: cross-mmlu
-            with gr.TabItem("Cross-MMLU"):
                 with gr.TabItem("Zero Shot"):
                     with gr.TabItem("Overall"):
                         with gr.Row():
-                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
-                                CROSS_MMLU_ZERO_SHOT_OVERALL,
-                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_OVERALL.columns),
                                 type="pandas",
                             )
                     with gr.TabItem("Language Performance"):
                         with gr.Row():
-                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
-                                CROSS_MMLU_ZERO_SHOT_LANGUAGE,
-                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_LANGUAGE.columns),
                                 type="pandas",
                             )
                 with gr.TabItem("Five Shot"):
                     with gr.TabItem("Overall"):
                         with gr.Row():
-                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
-                                CROSS_MMLU_FIVE_SHOT_OVERALL,
-                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_OVERALL.columns),
                                 type="pandas",
                             )
                     with gr.TabItem("Language Performance"):
                         with gr.Row():
                             gr.components.Dataframe(
-                                CROSS_MMLU_FIVE_SHOT_LANGUAGE,
-                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_LANGUAGE.columns),
                                 type="pandas",
                             )
                 with gr.Row():
                     gr.Markdown("""
-                    **Cross-MMLU Leaderboard** 🔮
                     - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
-                    - **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
                     """)

         with gr.TabItem("Cross-Lingual Consistency"):
             # dataset 1: cross-mmlu
+            # dataset 1: cross-mmlu
+            with gr.TabItem("Cross-MMLU"):
                 with gr.TabItem("Zero Shot"):
                     with gr.TabItem("Overall"):
                         with gr.Row():
+                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_MMLU_ZERO_SHOT_OVERALL,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_OVERALL.columns),
                                 type="pandas",
                             )
                     with gr.TabItem("Language Performance"):
                         with gr.Row():
+                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_MMLU_ZERO_SHOT_LANGUAGE,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_LANGUAGE.columns),
                                 type="pandas",
                             )
                 with gr.TabItem("Five Shot"):
                     with gr.TabItem("Overall"):
                         with gr.Row():
+                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_MMLU_FIVE_SHOT_OVERALL,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_OVERALL.columns),
                                 type="pandas",
                             )
                     with gr.TabItem("Language Performance"):
                         with gr.Row():
                             gr.components.Dataframe(
+                                CROSS_MMLU_FIVE_SHOT_LANGUAGE,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_LANGUAGE.columns),
                                 type="pandas",
                             )
                 with gr.Row():
                     gr.Markdown("""
+                    **Cross-MMLU Leaderboard** 🔮
                     - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
+                    - **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
                     """)
+            with gr.TabItem("Cross-XQUAD"):
                 with gr.TabItem("Zero Shot"):
                     with gr.TabItem("Overall"):
                         with gr.Row():
+                            cross_xquad_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_XQUAD_ZERO_SHOT_OVERALL,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_ZERO_SHOT_OVERALL.columns),
                                 type="pandas",
                             )
                     with gr.TabItem("Language Performance"):
                         with gr.Row():
+                            cross_xquad_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_XQUAD_ZERO_SHOT_LANGUAGE,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_ZERO_SHOT_LANGUAGE.columns),
                                 type="pandas",
                             )
                 with gr.TabItem("Five Shot"):
                     with gr.TabItem("Overall"):
                         with gr.Row():
+                            cross_xquad_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_XQUAD_FIVE_SHOT_OVERALL,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_FIVE_SHOT_OVERALL.columns),
                                 type="pandas",
                             )
                     with gr.TabItem("Language Performance"):
                         with gr.Row():
                             gr.components.Dataframe(
+                                CROSS_XQUAD_FIVE_SHOT_LANGUAGE,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_FIVE_SHOT_LANGUAGE.columns),
                                 type="pandas",
                             )
                 with gr.Row():
                     gr.Markdown("""
+                    **Cross-XQUAD Leaderboard** 🔮
                     - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
+                    - **Languages:** English, Chinese, Spanish, Vietnamese
                     """)