binwang commited on
Commit
fe2997e
·
1 Parent(s): 4687701

update leaderboard

Browse files
Files changed (2) hide show
  1. all_results.json +0 -0
  2. app.py +32 -29
all_results.json CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -2031,89 +2031,92 @@ with block:
2031
  with gr.TabItem("Cross-Lingual Consistency"):
2032
 
2033
  # dataset 1: cross-mmlu
2034
- with gr.TabItem("Cross-XQUAD"):
 
 
 
2035
  with gr.TabItem("Zero Shot"):
2036
  with gr.TabItem("Overall"):
2037
  with gr.Row():
2038
- cross_xquad_zero_shot_overall = gr.components.Dataframe(
2039
- CROSS_XQUAD_ZERO_SHOT_OVERALL,
2040
- datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_ZERO_SHOT_OVERALL.columns),
2041
  type="pandas",
2042
  )
2043
  with gr.TabItem("Language Performance"):
2044
 
2045
  with gr.Row():
2046
- cross_xquad_zero_shot_overall = gr.components.Dataframe(
2047
- CROSS_XQUAD_ZERO_SHOT_LANGUAGE,
2048
- datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_ZERO_SHOT_LANGUAGE.columns),
2049
  type="pandas",
2050
  )
2051
  with gr.TabItem("Five Shot"):
2052
  with gr.TabItem("Overall"):
2053
 
2054
  with gr.Row():
2055
- cross_xquad_zero_shot_overall = gr.components.Dataframe(
2056
- CROSS_XQUAD_FIVE_SHOT_OVERALL,
2057
- datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_FIVE_SHOT_OVERALL.columns),
2058
  type="pandas",
2059
  )
2060
  with gr.TabItem("Language Performance"):
2061
 
2062
  with gr.Row():
2063
  gr.components.Dataframe(
2064
- CROSS_XQUAD_FIVE_SHOT_LANGUAGE,
2065
- datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_FIVE_SHOT_LANGUAGE.columns),
2066
  type="pandas",
2067
  )
2068
 
2069
  with gr.Row():
2070
  gr.Markdown("""
2071
- **Cross-XQUAD Leaderboard** 🔮
2072
  - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
2073
- - **Languages:** English, Chinese, Spanish, Vietnamese
2074
  """)
2075
 
2076
- # dataset 1: cross-mmlu
2077
- with gr.TabItem("Cross-MMLU"):
2078
  with gr.TabItem("Zero Shot"):
2079
  with gr.TabItem("Overall"):
2080
  with gr.Row():
2081
- cross_mmlu_zero_shot_overall = gr.components.Dataframe(
2082
- CROSS_MMLU_ZERO_SHOT_OVERALL,
2083
- datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_OVERALL.columns),
2084
  type="pandas",
2085
  )
2086
  with gr.TabItem("Language Performance"):
2087
 
2088
  with gr.Row():
2089
- cross_mmlu_zero_shot_overall = gr.components.Dataframe(
2090
- CROSS_MMLU_ZERO_SHOT_LANGUAGE,
2091
- datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_LANGUAGE.columns),
2092
  type="pandas",
2093
  )
2094
  with gr.TabItem("Five Shot"):
2095
  with gr.TabItem("Overall"):
2096
 
2097
  with gr.Row():
2098
- cross_mmlu_zero_shot_overall = gr.components.Dataframe(
2099
- CROSS_MMLU_FIVE_SHOT_OVERALL,
2100
- datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_OVERALL.columns),
2101
  type="pandas",
2102
  )
2103
  with gr.TabItem("Language Performance"):
2104
 
2105
  with gr.Row():
2106
  gr.components.Dataframe(
2107
- CROSS_MMLU_FIVE_SHOT_LANGUAGE,
2108
- datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_LANGUAGE.columns),
2109
  type="pandas",
2110
  )
2111
 
2112
  with gr.Row():
2113
  gr.Markdown("""
2114
- **Cross-MMLU Leaderboard** 🔮
2115
  - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
2116
- - **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
2117
  """)
2118
 
2119
 
 
2031
  with gr.TabItem("Cross-Lingual Consistency"):
2032
 
2033
  # dataset 1: cross-mmlu
2034
+
2035
+
2036
+ # dataset 1: cross-mmlu
2037
+ with gr.TabItem("Cross-MMLU"):
2038
  with gr.TabItem("Zero Shot"):
2039
  with gr.TabItem("Overall"):
2040
  with gr.Row():
2041
+ cross_mmlu_zero_shot_overall = gr.components.Dataframe(
2042
+ CROSS_MMLU_ZERO_SHOT_OVERALL,
2043
+ datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_OVERALL.columns),
2044
  type="pandas",
2045
  )
2046
  with gr.TabItem("Language Performance"):
2047
 
2048
  with gr.Row():
2049
+ cross_mmlu_zero_shot_overall = gr.components.Dataframe(
2050
+ CROSS_MMLU_ZERO_SHOT_LANGUAGE,
2051
+ datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_LANGUAGE.columns),
2052
  type="pandas",
2053
  )
2054
  with gr.TabItem("Five Shot"):
2055
  with gr.TabItem("Overall"):
2056
 
2057
  with gr.Row():
2058
+ cross_mmlu_zero_shot_overall = gr.components.Dataframe(
2059
+ CROSS_MMLU_FIVE_SHOT_OVERALL,
2060
+ datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_OVERALL.columns),
2061
  type="pandas",
2062
  )
2063
  with gr.TabItem("Language Performance"):
2064
 
2065
  with gr.Row():
2066
  gr.components.Dataframe(
2067
+ CROSS_MMLU_FIVE_SHOT_LANGUAGE,
2068
+ datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_LANGUAGE.columns),
2069
  type="pandas",
2070
  )
2071
 
2072
  with gr.Row():
2073
  gr.Markdown("""
2074
+ **Cross-MMLU Leaderboard** 🔮
2075
  - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
2076
+ - **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
2077
  """)
2078
 
2079
+
2080
+ with gr.TabItem("Cross-XQUAD"):
2081
  with gr.TabItem("Zero Shot"):
2082
  with gr.TabItem("Overall"):
2083
  with gr.Row():
2084
+ cross_xquad_zero_shot_overall = gr.components.Dataframe(
2085
+ CROSS_XQUAD_ZERO_SHOT_OVERALL,
2086
+ datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_ZERO_SHOT_OVERALL.columns),
2087
  type="pandas",
2088
  )
2089
  with gr.TabItem("Language Performance"):
2090
 
2091
  with gr.Row():
2092
+ cross_xquad_zero_shot_overall = gr.components.Dataframe(
2093
+ CROSS_XQUAD_ZERO_SHOT_LANGUAGE,
2094
+ datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_ZERO_SHOT_LANGUAGE.columns),
2095
  type="pandas",
2096
  )
2097
  with gr.TabItem("Five Shot"):
2098
  with gr.TabItem("Overall"):
2099
 
2100
  with gr.Row():
2101
+ cross_xquad_zero_shot_overall = gr.components.Dataframe(
2102
+ CROSS_XQUAD_FIVE_SHOT_OVERALL,
2103
+ datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_FIVE_SHOT_OVERALL.columns),
2104
  type="pandas",
2105
  )
2106
  with gr.TabItem("Language Performance"):
2107
 
2108
  with gr.Row():
2109
  gr.components.Dataframe(
2110
+ CROSS_XQUAD_FIVE_SHOT_LANGUAGE,
2111
+ datatype=["number", "markdown"] + ["number"] * len(CROSS_XQUAD_FIVE_SHOT_LANGUAGE.columns),
2112
  type="pandas",
2113
  )
2114
 
2115
  with gr.Row():
2116
  gr.Markdown("""
2117
+ **Cross-XQUAD Leaderboard** 🔮
2118
  - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
2119
+ - **Languages:** English, Chinese, Spanish, Vietnamese
2120
  """)
2121
 
2122