Spaces:

Omartificial-Intelligence-Space
/

Arabic-MMMLU-Leaderborad

Restarting

App Files Files Community

Arabic-MMMLU-Leaderborad / src /about.py

Omartificial-Intelligence-Space

update file

6f9c828 verified about 2 months ago

raw

history blame

8.32 kB

	from enum import Enum
	from dataclasses import dataclass

	# Define TaskInfo and Tasks as before
	@dataclass
	class TaskInfo:
	benchmark: str
	col_name: str
	metric: str


	# src/about.py

	from enum import Enum
	from dataclasses import dataclass

	# Define TaskInfo dataclass
	@dataclass
	class TaskInfo:
	benchmark: str
	col_name: str
	metric: str

	# Define Tasks enum with your specific subjects, excluding the unwanted ones
	class Tasks(Enum):
	Professional_Law = TaskInfo(benchmark='professional_law', col_name='Professional Law', metric='accuracy')
	Moral_Scenarios = TaskInfo(benchmark='moral_scenarios', col_name='Moral Scenarios', metric='accuracy')
	Miscellaneous = TaskInfo(benchmark='miscellaneous', col_name='Miscellaneous', metric='accuracy')
	High_School_Psychology = TaskInfo(benchmark='high_school_psychology', col_name='High School Psychology', metric='accuracy')
	High_School_Macroeconomics = TaskInfo(benchmark='high_school_macroeconomics', col_name='High School Macroeconomics', metric='accuracy')
	Elementary_Mathematics = TaskInfo(benchmark='elementary_mathematics', col_name='Elementary Mathematics', metric='accuracy')
	Prehistory = TaskInfo(benchmark='prehistory', col_name='Prehistory', metric='accuracy')
	Philosophy = TaskInfo(benchmark='philosophy', col_name='Philosophy', metric='accuracy')
	High_School_Biology = TaskInfo(benchmark='high_school_biology', col_name='High School Biology', metric='accuracy')
	Nutrition = TaskInfo(benchmark='nutrition', col_name='Nutrition', metric='accuracy')
	Professional_Accounting = TaskInfo(benchmark='professional_accounting', col_name='Professional Accounting', metric='accuracy')
	Professional_Medicine = TaskInfo(benchmark='professional_medicine', col_name='Professional Medicine', metric='accuracy')
	High_School_Mathematics = TaskInfo(benchmark='high_school_mathematics', col_name='High School Mathematics', metric='accuracy')
	Clinical_Knowledge = TaskInfo(benchmark='clinical_knowledge', col_name='Clinical Knowledge', metric='accuracy')
	Security_Studies = TaskInfo(benchmark='security_studies', col_name='Security Studies', metric='accuracy')
	High_School_Microeconomics = TaskInfo(benchmark='high_school_microeconomics', col_name='High School Microeconomics', metric='accuracy')
	High_School_World_History = TaskInfo(benchmark='high_school_world_history', col_name='High School World History', metric='accuracy')
	Conceptual_Physics = TaskInfo(benchmark='conceptual_physics', col_name='Conceptual Physics', metric='accuracy')
	Marketing = TaskInfo(benchmark='marketing', col_name='Marketing', metric='accuracy')
	High_School_Statistics = TaskInfo(benchmark='high_school_statistics', col_name='High School Statistics', metric='accuracy')
	High_School_US_History = TaskInfo(benchmark='high_school_us_history', col_name='High School US History', metric='accuracy')
	High_School_Chemistry = TaskInfo(benchmark='high_school_chemistry', col_name='High School Chemistry', metric='accuracy')
	Sociology = TaskInfo(benchmark='sociology', col_name='Sociology', metric='accuracy')
	High_School_Geography = TaskInfo(benchmark='high_school_geography', col_name='High School Geography', metric='accuracy')
	High_School_Government_and_Politics = TaskInfo(benchmark='high_school_government_and_politics', col_name='High School Government and Politics', metric='accuracy')
	College_Medicine = TaskInfo(benchmark='college_medicine', col_name='College Medicine', metric='accuracy')
	Virology = TaskInfo(benchmark='virology', col_name='Virology', metric='accuracy')
	High_School_European_History = TaskInfo(benchmark='high_school_european_history', col_name='High School European History', metric='accuracy')
	Logical_Fallacies = TaskInfo(benchmark='logical_fallacies', col_name='Logical Fallacies', metric='accuracy')
	Astronomy = TaskInfo(benchmark='astronomy', col_name='Astronomy', metric='accuracy')
	High_School_Physics = TaskInfo(benchmark='high_school_physics', col_name='High School Physics', metric='accuracy')
	Electrical_Engineering = TaskInfo(benchmark='electrical_engineering', col_name='Electrical Engineering', metric='accuracy')
	College_Biology = TaskInfo(benchmark='college_biology', col_name='College Biology', metric='accuracy')
	Anatomy = TaskInfo(benchmark='anatomy', col_name='Anatomy', metric='accuracy')
	Formal_Logic = TaskInfo(benchmark='formal_logic', col_name='Formal Logic', metric='accuracy')
	International_Law = TaskInfo(benchmark='international_law', col_name='International Law', metric='accuracy')
	Econometrics = TaskInfo(benchmark='econometrics', col_name='Econometrics', metric='accuracy')
	Machine_Learning = TaskInfo(benchmark='machine_learning', col_name='Machine Learning', metric='accuracy')
	Management = TaskInfo(benchmark='management', col_name='Management', metric='accuracy')
	College_Physics = TaskInfo(benchmark='college_physics', col_name='College Physics', metric='accuracy')
	US_Foreign_Policy = TaskInfo(benchmark='us_foreign_policy', col_name='US Foreign Policy', metric='accuracy')
	Business_Ethics = TaskInfo(benchmark='business_ethics', col_name='Business Ethics', metric='accuracy')
	College_Mathematics = TaskInfo(benchmark='college_mathematics', col_name='College Mathematics', metric='accuracy')
	College_Chemistry = TaskInfo(benchmark='college_chemistry', col_name='College Chemistry', metric='accuracy')
	College_Computer_Science = TaskInfo(benchmark='college_computer_science', col_name='College Computer Science', metric='accuracy')
	High_School_Computer_Science = TaskInfo(benchmark='high_school_computer_science', col_name='High School Computer Science', metric='accuracy')
	Computer_Security = TaskInfo(benchmark='computer_security', col_name='Computer Security', metric='accuracy')
	Global_Facts = TaskInfo(benchmark='global_facts', col_name='Global Facts', metric='accuracy')
	Medical_Genetics = TaskInfo(benchmark='medical_genetics', col_name='Medical Genetics', metric='accuracy')
	Abstract_Algebra = TaskInfo(benchmark='abstract_algebra', col_name='Abstract Algebra', metric='accuracy')


	# Now include the variables expected by app.py

	TITLE = """
	<div align="center">
	<a href="https://imgbb.com/">
	<img src="https://i.ibb.co/k1gQsTw/Blue-and-White-Modern-Technology-Company-Logo-2.png" alt="Blue-and-White-Modern-Technology-Company-Logo-2" border="0" width="500" height="auto">
	</a>
	</div>
	"""


	INTRODUCTION_TEXT = """
	<div style="background-color:#001f3f; padding: 20px; border-radius: 10px;">
	<h1 style="color:#ffffff; font-family: Arial, sans-serif; text-align: center;">
	Welcome to <span style="color:#f39c12;">ILMAAM</span>: Benchmark for Arabic System in Multitask Assessment
	</h1>
	<p style="color:#d4d4d4; font-family: 'Verdana', sans-serif; font-size: 18px; text-align: center;">
	This leaderboard showcases the performance of various Arabic LLMs on the
	<strong style="color:#d4d4d4;">newly released MMMLU OpenAI Benchmark</strong> across different subjects.
	</p>
	</div>
	"""


	LLM_BENCHMARKS_TEXT = """
	## About ILMAAM

	ILMAAM is based on The Massive Multitask Multilingual Language Understanding benchmark which is designed to evaluate Arabic models on a wide range of subjects.

	## How to Interpret the Leaderboard

	- Model: The name of the model evaluated.
	- Average ⬆️: The average accuracy across all subjects.
	- Subject Columns: The accuracy (%) for each individual subject.

	## How to Submit Your Model

	Go to the Submit here! tab and provide your model details to have it evaluated and appear on the leaderboard.
	"""

	EVALUATION_QUEUE_TEXT = """
	Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation.
	"""

	CITATION_BUTTON_LABEL = "Citation"
	CITATION_BUTTON_TEXT = """
	If you use this leaderboard or the MMMLU dataset in your research, please cite:
	@misc{ILMAAM,
	author = {Nacar, Omer},
	title = {ILMAAM: Index for Language Models For Arabic Assessment on Multitasks},
	year = {2024},
	publisher = {Robotics and Internet-of-Things Lab, Prince Sultan University, Riyadh}"


	Acknowledgment:

	Thanks for Prince Sultan University and RIOTU Lab for their support.

	}"""