|
from enum import Enum |
|
from dataclasses import dataclass |
|
|
|
|
|
@dataclass |
|
class TaskInfo: |
|
benchmark: str |
|
col_name: str |
|
metric: str |
|
|
|
|
|
class Tasks(Enum): |
|
History = TaskInfo(benchmark='History', col_name='History', metric='accuracy') |
|
Mathematics = TaskInfo(benchmark='Mathematics', col_name='Mathematics', metric='accuracy') |
|
Science = TaskInfo(benchmark='Science', col_name='Science', metric='accuracy') |
|
Geography = TaskInfo(benchmark='Geography', col_name='Geography', metric='accuracy') |
|
Literature = TaskInfo(benchmark='Literature', col_name='Literature', metric='accuracy') |
|
Art = TaskInfo(benchmark='Art', col_name='Art', metric='accuracy') |
|
Physics = TaskInfo(benchmark='Physics', col_name='Physics', metric='accuracy') |
|
Chemistry = TaskInfo(benchmark='Chemistry', col_name='Chemistry', metric='accuracy') |
|
Biology = TaskInfo(benchmark='Biology', col_name='Biology', metric='accuracy') |
|
ComputerScience = TaskInfo(benchmark='Computer Science', col_name='Computer Science', metric='accuracy') |
|
|
|
|
|
|
|
TITLE = """ |
|
<h1 align="center">π LLM Leaderboard for MMMLU Evaluation π</h1> |
|
""" |
|
|
|
INTRODUCTION_TEXT = """ |
|
Welcome to the LLM Leaderboard for the MMMLU dataset evaluation. This leaderboard displays the performance of various language models on the MMMLU dataset across different subjects. |
|
""" |
|
|
|
LLM_BENCHMARKS_TEXT = """ |
|
## About the MMMLU Benchmark |
|
|
|
The Massive Multitask Multilingual Language Understanding (MMMLU) benchmark is designed to evaluate models on a wide range of subjects. |
|
|
|
## How to Interpret the Leaderboard |
|
|
|
- **Model**: The name of the model evaluated. |
|
- **Average β¬οΈ**: The average accuracy across all subjects. |
|
- **Subject Columns**: The accuracy (%) for each individual subject. |
|
|
|
## How to Submit Your Model |
|
|
|
Go to the **Submit here!** tab and provide your model details to have it evaluated and appear on the leaderboard. |
|
""" |
|
|
|
EVALUATION_QUEUE_TEXT = """ |
|
Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation. |
|
""" |
|
|
|
CITATION_BUTTON_LABEL = "Citation" |
|
CITATION_BUTTON_TEXT = """ |
|
If you use this leaderboard or the MMMLU dataset in your research, please cite: |
|
@article{your_citation_here, title={Your Title}, author={Your Name}, journal={Your Journal}, year={2024} } |
|
""" |
|
|
|
|