Spaces:
Running
Running
new
Browse files
app.py
CHANGED
@@ -2193,7 +2193,6 @@ MRPC_FIVE_SHOT = get_data_mrpc(eval_mode="five_shot")
|
|
2193 |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
2194 |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
2195 |
|
2196 |
-
# block = gr.Blocks(theme=gr.themes.Soft())
|
2197 |
|
2198 |
theme = gr.themes.Soft().set(
|
2199 |
background_fill_primary='*secondary_50'
|
@@ -2204,13 +2203,13 @@ block = gr.Blocks(theme='rottenlittlecreature/Moon_Goblin')
|
|
2204 |
|
2205 |
with block:
|
2206 |
gr.Markdown(f"""
|
2207 |
-
|
2208 |
- **Number of Datasets**: > 30
|
2209 |
- **Number of Languages**: > 8
|
2210 |
- **Number of Models**: {NUM_MODELS}
|
2211 |
- **Mode of Evaluation**: Zero-Shot, Five-Shot
|
2212 |
|
2213 |
-
|
2214 |
- For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
|
2215 |
|
2216 |
### The following table shows the performance of the models on the SeaEval benchmark.
|
@@ -2220,11 +2219,7 @@ with block:
|
|
2220 |
|
2221 |
""")
|
2222 |
|
2223 |
-
|
2224 |
-
|
2225 |
with gr.Tabs():
|
2226 |
-
|
2227 |
-
|
2228 |
with gr.TabItem("Cross-Lingual Consistency"):
|
2229 |
|
2230 |
# dataset 1: cross-mmlu
|
@@ -3225,8 +3220,8 @@ with block:
|
|
3225 |
""")
|
3226 |
|
3227 |
|
3228 |
-
gr.Markdown(
|
3229 |
-
|
3230 |
```bibtex
|
3231 |
@article{SeaEval,
|
3232 |
title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
|
@@ -3237,25 +3232,6 @@ with block:
|
|
3237 |
""")
|
3238 |
|
3239 |
|
3240 |
-
# Running the functions on page load in addition to when the button is clicked
|
3241 |
-
# This is optional - If deactivated the data loaded at "Build time" is shown like for Overall tab
|
3242 |
-
"""
|
3243 |
-
block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
|
3244 |
-
"""
|
3245 |
-
|
3246 |
-
|
3247 |
-
|
3248 |
-
|
3249 |
-
|
3250 |
block.queue(max_size=10)
|
3251 |
-
block.launch(server_name="0.0.0.0", share=False)
|
3252 |
-
|
3253 |
-
|
3254 |
-
# Possible changes:
|
3255 |
-
# Could add graphs / other visual content
|
3256 |
-
# Could add verification marks
|
3257 |
-
|
3258 |
-
# Sources:
|
3259 |
-
# https://huggingface.co/spaces/gradio/leaderboard
|
3260 |
-
# https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard
|
3261 |
-
# https://getemoji.com/
|
|
|
2193 |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
2194 |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
|
2195 |
|
|
|
2196 |
|
2197 |
theme = gr.themes.Soft().set(
|
2198 |
background_fill_primary='*secondary_50'
|
|
|
2203 |
|
2204 |
with block:
|
2205 |
gr.Markdown(f"""
|
2206 |
+
### SeaEval Leaderboard. To submit, refer to the <a href="https://seaeval.github.io/" target="_blank" style="text-decoration: underline">SeaEval Website</a>. Refer to the [SeaEval paper](https://arxiv.org/abs/2309.04766) for details on metrics, tasks and models.
|
2207 |
- **Number of Datasets**: > 30
|
2208 |
- **Number of Languages**: > 8
|
2209 |
- **Number of Models**: {NUM_MODELS}
|
2210 |
- **Mode of Evaluation**: Zero-Shot, Five-Shot
|
2211 |
|
2212 |
+
### Know Issues:
|
2213 |
- For base models, the output of base model is not truncated as no EOS detected. Evaluation could be affected, especially with length-aware metrics.
|
2214 |
|
2215 |
### The following table shows the performance of the models on the SeaEval benchmark.
|
|
|
2219 |
|
2220 |
""")
|
2221 |
|
|
|
|
|
2222 |
with gr.Tabs():
|
|
|
|
|
2223 |
with gr.TabItem("Cross-Lingual Consistency"):
|
2224 |
|
2225 |
# dataset 1: cross-mmlu
|
|
|
3220 |
""")
|
3221 |
|
3222 |
|
3223 |
+
gr.Markdown(r"""
|
3224 |
+
### If our datasets and leaderboard are useful, please consider cite:
|
3225 |
```bibtex
|
3226 |
@article{SeaEval,
|
3227 |
title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
|
|
|
3232 |
""")
|
3233 |
|
3234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3235 |
block.queue(max_size=10)
|
3236 |
+
# block.launch(server_name="0.0.0.0", share=False)
|
3237 |
+
block.launch(server_name="0.0.0.0", share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|