Omartificial-Intelligence-Space's picture
update about.py
0b82379 verified
raw
history blame
7.58 kB
from enum import Enum
from dataclasses import dataclass
# Define TaskInfo and Tasks as before
@dataclass
class TaskInfo:
benchmark: str
col_name: str
metric: str
# src/about.py
from enum import Enum
from dataclasses import dataclass
# Define TaskInfo dataclass
@dataclass
class TaskInfo:
benchmark: str
col_name: str
metric: str
# Define Tasks enum with your specific subjects, excluding the unwanted ones
class Tasks(Enum):
Professional_Law = TaskInfo(benchmark='professional_law', col_name='Professional Law', metric='accuracy')
Moral_Scenarios = TaskInfo(benchmark='moral_scenarios', col_name='Moral Scenarios', metric='accuracy')
Miscellaneous = TaskInfo(benchmark='miscellaneous', col_name='Miscellaneous', metric='accuracy')
High_School_Psychology = TaskInfo(benchmark='high_school_psychology', col_name='High School Psychology', metric='accuracy')
High_School_Macroeconomics = TaskInfo(benchmark='high_school_macroeconomics', col_name='High School Macroeconomics', metric='accuracy')
Elementary_Mathematics = TaskInfo(benchmark='elementary_mathematics', col_name='Elementary Mathematics', metric='accuracy')
Prehistory = TaskInfo(benchmark='prehistory', col_name='Prehistory', metric='accuracy')
Philosophy = TaskInfo(benchmark='philosophy', col_name='Philosophy', metric='accuracy')
High_School_Biology = TaskInfo(benchmark='high_school_biology', col_name='High School Biology', metric='accuracy')
Nutrition = TaskInfo(benchmark='nutrition', col_name='Nutrition', metric='accuracy')
Professional_Accounting = TaskInfo(benchmark='professional_accounting', col_name='Professional Accounting', metric='accuracy')
Professional_Medicine = TaskInfo(benchmark='professional_medicine', col_name='Professional Medicine', metric='accuracy')
High_School_Mathematics = TaskInfo(benchmark='high_school_mathematics', col_name='High School Mathematics', metric='accuracy')
Clinical_Knowledge = TaskInfo(benchmark='clinical_knowledge', col_name='Clinical Knowledge', metric='accuracy')
Security_Studies = TaskInfo(benchmark='security_studies', col_name='Security Studies', metric='accuracy')
High_School_Microeconomics = TaskInfo(benchmark='high_school_microeconomics', col_name='High School Microeconomics', metric='accuracy')
High_School_World_History = TaskInfo(benchmark='high_school_world_history', col_name='High School World History', metric='accuracy')
Conceptual_Physics = TaskInfo(benchmark='conceptual_physics', col_name='Conceptual Physics', metric='accuracy')
Marketing = TaskInfo(benchmark='marketing', col_name='Marketing', metric='accuracy')
High_School_Statistics = TaskInfo(benchmark='high_school_statistics', col_name='High School Statistics', metric='accuracy')
High_School_US_History = TaskInfo(benchmark='high_school_us_history', col_name='High School US History', metric='accuracy')
High_School_Chemistry = TaskInfo(benchmark='high_school_chemistry', col_name='High School Chemistry', metric='accuracy')
Sociology = TaskInfo(benchmark='sociology', col_name='Sociology', metric='accuracy')
High_School_Geography = TaskInfo(benchmark='high_school_geography', col_name='High School Geography', metric='accuracy')
High_School_Government_and_Politics = TaskInfo(benchmark='high_school_government_and_politics', col_name='High School Government and Politics', metric='accuracy')
College_Medicine = TaskInfo(benchmark='college_medicine', col_name='College Medicine', metric='accuracy')
Virology = TaskInfo(benchmark='virology', col_name='Virology', metric='accuracy')
High_School_European_History = TaskInfo(benchmark='high_school_european_history', col_name='High School European History', metric='accuracy')
Logical_Fallacies = TaskInfo(benchmark='logical_fallacies', col_name='Logical Fallacies', metric='accuracy')
Astronomy = TaskInfo(benchmark='astronomy', col_name='Astronomy', metric='accuracy')
High_School_Physics = TaskInfo(benchmark='high_school_physics', col_name='High School Physics', metric='accuracy')
Electrical_Engineering = TaskInfo(benchmark='electrical_engineering', col_name='Electrical Engineering', metric='accuracy')
College_Biology = TaskInfo(benchmark='college_biology', col_name='College Biology', metric='accuracy')
Anatomy = TaskInfo(benchmark='anatomy', col_name='Anatomy', metric='accuracy')
Formal_Logic = TaskInfo(benchmark='formal_logic', col_name='Formal Logic', metric='accuracy')
International_Law = TaskInfo(benchmark='international_law', col_name='International Law', metric='accuracy')
Econometrics = TaskInfo(benchmark='econometrics', col_name='Econometrics', metric='accuracy')
Machine_Learning = TaskInfo(benchmark='machine_learning', col_name='Machine Learning', metric='accuracy')
Management = TaskInfo(benchmark='management', col_name='Management', metric='accuracy')
College_Physics = TaskInfo(benchmark='college_physics', col_name='College Physics', metric='accuracy')
US_Foreign_Policy = TaskInfo(benchmark='us_foreign_policy', col_name='US Foreign Policy', metric='accuracy')
Business_Ethics = TaskInfo(benchmark='business_ethics', col_name='Business Ethics', metric='accuracy')
College_Mathematics = TaskInfo(benchmark='college_mathematics', col_name='College Mathematics', metric='accuracy')
College_Chemistry = TaskInfo(benchmark='college_chemistry', col_name='College Chemistry', metric='accuracy')
College_Computer_Science = TaskInfo(benchmark='college_computer_science', col_name='College Computer Science', metric='accuracy')
High_School_Computer_Science = TaskInfo(benchmark='high_school_computer_science', col_name='High School Computer Science', metric='accuracy')
Computer_Security = TaskInfo(benchmark='computer_security', col_name='Computer Security', metric='accuracy')
Global_Facts = TaskInfo(benchmark='global_facts', col_name='Global Facts', metric='accuracy')
Medical_Genetics = TaskInfo(benchmark='medical_genetics', col_name='Medical Genetics', metric='accuracy')
Abstract_Algebra = TaskInfo(benchmark='abstract_algebra', col_name='Abstract Algebra', metric='accuracy')
# Now include the variables expected by app.py
TITLE = """
<h1 align="center">🌐 Arabic MMMLU Evaluation Leaderboard for LLMs 🌐</h1>
"""
INTRODUCTION_TEXT = """
Welcome to the Arabic MMMLU Evaluation for LLMs Leaderboard for the MMMLU dataset evaluation. This leaderboard displays the performance of various language models on the MMMLU dataset across different subjects.
"""
LLM_BENCHMARKS_TEXT = """
## About the MMMLU Benchmark
The Massive Multitask Multilingual Language Understanding (MMMLU) benchmark is designed to evaluate models on a wide range of subjects.
## How to Interpret the Leaderboard
- **Model**: The name of the model evaluated.
- **Average ⬆️**: The average accuracy across all subjects.
- **Subject Columns**: The accuracy (%) for each individual subject.
## How to Submit Your Model
Go to the **Submit here!** tab and provide your model details to have it evaluated and appear on the leaderboard.
"""
EVALUATION_QUEUE_TEXT = """
Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation.
"""
CITATION_BUTTON_LABEL = "Citation"
CITATION_BUTTON_TEXT = """
If you use this leaderboard or the MMMLU dataset in your research, please cite:
@misc{AMMMLU,
author = {Nacar, Omer},
title = {Arabic MMMLU Evaluation for LLMs Leaderboard},
year = {2024},
publisher = {Omartificial-Intelligence-Space}}"
}"""