Omartificial-Intelligence-Space's picture
update file
6f9c828 verified
raw
history blame
8.32 kB
from enum import Enum
from dataclasses import dataclass
# Define TaskInfo and Tasks as before
@dataclass
class TaskInfo:
benchmark: str
col_name: str
metric: str
# src/about.py
from enum import Enum
from dataclasses import dataclass
# Define TaskInfo dataclass
@dataclass
class TaskInfo:
benchmark: str
col_name: str
metric: str
# Define Tasks enum with your specific subjects, excluding the unwanted ones
class Tasks(Enum):
Professional_Law = TaskInfo(benchmark='professional_law', col_name='Professional Law', metric='accuracy')
Moral_Scenarios = TaskInfo(benchmark='moral_scenarios', col_name='Moral Scenarios', metric='accuracy')
Miscellaneous = TaskInfo(benchmark='miscellaneous', col_name='Miscellaneous', metric='accuracy')
High_School_Psychology = TaskInfo(benchmark='high_school_psychology', col_name='High School Psychology', metric='accuracy')
High_School_Macroeconomics = TaskInfo(benchmark='high_school_macroeconomics', col_name='High School Macroeconomics', metric='accuracy')
Elementary_Mathematics = TaskInfo(benchmark='elementary_mathematics', col_name='Elementary Mathematics', metric='accuracy')
Prehistory = TaskInfo(benchmark='prehistory', col_name='Prehistory', metric='accuracy')
Philosophy = TaskInfo(benchmark='philosophy', col_name='Philosophy', metric='accuracy')
High_School_Biology = TaskInfo(benchmark='high_school_biology', col_name='High School Biology', metric='accuracy')
Nutrition = TaskInfo(benchmark='nutrition', col_name='Nutrition', metric='accuracy')
Professional_Accounting = TaskInfo(benchmark='professional_accounting', col_name='Professional Accounting', metric='accuracy')
Professional_Medicine = TaskInfo(benchmark='professional_medicine', col_name='Professional Medicine', metric='accuracy')
High_School_Mathematics = TaskInfo(benchmark='high_school_mathematics', col_name='High School Mathematics', metric='accuracy')
Clinical_Knowledge = TaskInfo(benchmark='clinical_knowledge', col_name='Clinical Knowledge', metric='accuracy')
Security_Studies = TaskInfo(benchmark='security_studies', col_name='Security Studies', metric='accuracy')
High_School_Microeconomics = TaskInfo(benchmark='high_school_microeconomics', col_name='High School Microeconomics', metric='accuracy')
High_School_World_History = TaskInfo(benchmark='high_school_world_history', col_name='High School World History', metric='accuracy')
Conceptual_Physics = TaskInfo(benchmark='conceptual_physics', col_name='Conceptual Physics', metric='accuracy')
Marketing = TaskInfo(benchmark='marketing', col_name='Marketing', metric='accuracy')
High_School_Statistics = TaskInfo(benchmark='high_school_statistics', col_name='High School Statistics', metric='accuracy')
High_School_US_History = TaskInfo(benchmark='high_school_us_history', col_name='High School US History', metric='accuracy')
High_School_Chemistry = TaskInfo(benchmark='high_school_chemistry', col_name='High School Chemistry', metric='accuracy')
Sociology = TaskInfo(benchmark='sociology', col_name='Sociology', metric='accuracy')
High_School_Geography = TaskInfo(benchmark='high_school_geography', col_name='High School Geography', metric='accuracy')
High_School_Government_and_Politics = TaskInfo(benchmark='high_school_government_and_politics', col_name='High School Government and Politics', metric='accuracy')
College_Medicine = TaskInfo(benchmark='college_medicine', col_name='College Medicine', metric='accuracy')
Virology = TaskInfo(benchmark='virology', col_name='Virology', metric='accuracy')
High_School_European_History = TaskInfo(benchmark='high_school_european_history', col_name='High School European History', metric='accuracy')
Logical_Fallacies = TaskInfo(benchmark='logical_fallacies', col_name='Logical Fallacies', metric='accuracy')
Astronomy = TaskInfo(benchmark='astronomy', col_name='Astronomy', metric='accuracy')
High_School_Physics = TaskInfo(benchmark='high_school_physics', col_name='High School Physics', metric='accuracy')
Electrical_Engineering = TaskInfo(benchmark='electrical_engineering', col_name='Electrical Engineering', metric='accuracy')
College_Biology = TaskInfo(benchmark='college_biology', col_name='College Biology', metric='accuracy')
Anatomy = TaskInfo(benchmark='anatomy', col_name='Anatomy', metric='accuracy')
Formal_Logic = TaskInfo(benchmark='formal_logic', col_name='Formal Logic', metric='accuracy')
International_Law = TaskInfo(benchmark='international_law', col_name='International Law', metric='accuracy')
Econometrics = TaskInfo(benchmark='econometrics', col_name='Econometrics', metric='accuracy')
Machine_Learning = TaskInfo(benchmark='machine_learning', col_name='Machine Learning', metric='accuracy')
Management = TaskInfo(benchmark='management', col_name='Management', metric='accuracy')
College_Physics = TaskInfo(benchmark='college_physics', col_name='College Physics', metric='accuracy')
US_Foreign_Policy = TaskInfo(benchmark='us_foreign_policy', col_name='US Foreign Policy', metric='accuracy')
Business_Ethics = TaskInfo(benchmark='business_ethics', col_name='Business Ethics', metric='accuracy')
College_Mathematics = TaskInfo(benchmark='college_mathematics', col_name='College Mathematics', metric='accuracy')
College_Chemistry = TaskInfo(benchmark='college_chemistry', col_name='College Chemistry', metric='accuracy')
College_Computer_Science = TaskInfo(benchmark='college_computer_science', col_name='College Computer Science', metric='accuracy')
High_School_Computer_Science = TaskInfo(benchmark='high_school_computer_science', col_name='High School Computer Science', metric='accuracy')
Computer_Security = TaskInfo(benchmark='computer_security', col_name='Computer Security', metric='accuracy')
Global_Facts = TaskInfo(benchmark='global_facts', col_name='Global Facts', metric='accuracy')
Medical_Genetics = TaskInfo(benchmark='medical_genetics', col_name='Medical Genetics', metric='accuracy')
Abstract_Algebra = TaskInfo(benchmark='abstract_algebra', col_name='Abstract Algebra', metric='accuracy')
# Now include the variables expected by app.py
TITLE = """
<div align="center">
<a href="https://imgbb.com/">
<img src="https://i.ibb.co/k1gQsTw/Blue-and-White-Modern-Technology-Company-Logo-2.png" alt="Blue-and-White-Modern-Technology-Company-Logo-2" border="0" width="500" height="auto">
</a>
</div>
"""
INTRODUCTION_TEXT = """
<div style="background-color:#001f3f; padding: 20px; border-radius: 10px;">
<h1 style="color:#ffffff; font-family: Arial, sans-serif; text-align: center;">
Welcome to <span style="color:#f39c12;">ILMAAM</span>: Benchmark for Arabic System in Multitask Assessment
</h1>
<p style="color:#d4d4d4; font-family: 'Verdana', sans-serif; font-size: 18px; text-align: center;">
This leaderboard showcases the performance of various Arabic LLMs on the
<strong style="color:#d4d4d4;">newly released MMMLU OpenAI Benchmark</strong> across different subjects.
</p>
</div>
"""
LLM_BENCHMARKS_TEXT = """
## About ILMAAM
ILMAAM is based on The Massive Multitask Multilingual Language Understanding benchmark which is designed to evaluate Arabic models on a wide range of subjects.
## How to Interpret the Leaderboard
- **Model**: The name of the model evaluated.
- **Average ⬆️**: The average accuracy across all subjects.
- **Subject Columns**: The accuracy (%) for each individual subject.
## How to Submit Your Model
Go to the **Submit here!** tab and provide your model details to have it evaluated and appear on the leaderboard.
"""
EVALUATION_QUEUE_TEXT = """
Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation.
"""
CITATION_BUTTON_LABEL = "Citation"
CITATION_BUTTON_TEXT = """
If you use this leaderboard or the MMMLU dataset in your research, please cite:
@misc{ILMAAM,
author = {Nacar, Omer},
title = {ILMAAM: Index for Language Models For Arabic Assessment on Multitasks},
year = {2024},
publisher = {Robotics and Internet-of-Things Lab, Prince Sultan University, Riyadh}"
Acknowledgment:
Thanks for Prince Sultan University and RIOTU Lab for their support.
}"""