from enum import Enum from dataclasses import dataclass # Define TaskInfo and Tasks as before @dataclass class TaskInfo: benchmark: str col_name: str metric: str # src/about.py from enum import Enum from dataclasses import dataclass # Define TaskInfo dataclass @dataclass class TaskInfo: benchmark: str col_name: str metric: str # Define Tasks enum with your specific subjects, excluding the unwanted ones class Tasks(Enum): Professional_Law = TaskInfo(benchmark='professional_law', col_name='Professional Law', metric='accuracy') Moral_Scenarios = TaskInfo(benchmark='moral_scenarios', col_name='Moral Scenarios', metric='accuracy') Miscellaneous = TaskInfo(benchmark='miscellaneous', col_name='Miscellaneous', metric='accuracy') High_School_Psychology = TaskInfo(benchmark='high_school_psychology', col_name='High School Psychology', metric='accuracy') High_School_Macroeconomics = TaskInfo(benchmark='high_school_macroeconomics', col_name='High School Macroeconomics', metric='accuracy') Elementary_Mathematics = TaskInfo(benchmark='elementary_mathematics', col_name='Elementary Mathematics', metric='accuracy') Prehistory = TaskInfo(benchmark='prehistory', col_name='Prehistory', metric='accuracy') Philosophy = TaskInfo(benchmark='philosophy', col_name='Philosophy', metric='accuracy') High_School_Biology = TaskInfo(benchmark='high_school_biology', col_name='High School Biology', metric='accuracy') Nutrition = TaskInfo(benchmark='nutrition', col_name='Nutrition', metric='accuracy') Professional_Accounting = TaskInfo(benchmark='professional_accounting', col_name='Professional Accounting', metric='accuracy') Professional_Medicine = TaskInfo(benchmark='professional_medicine', col_name='Professional Medicine', metric='accuracy') High_School_Mathematics = TaskInfo(benchmark='high_school_mathematics', col_name='High School Mathematics', metric='accuracy') Clinical_Knowledge = TaskInfo(benchmark='clinical_knowledge', col_name='Clinical Knowledge', metric='accuracy') Security_Studies = TaskInfo(benchmark='security_studies', col_name='Security Studies', metric='accuracy') High_School_Microeconomics = TaskInfo(benchmark='high_school_microeconomics', col_name='High School Microeconomics', metric='accuracy') High_School_World_History = TaskInfo(benchmark='high_school_world_history', col_name='High School World History', metric='accuracy') Conceptual_Physics = TaskInfo(benchmark='conceptual_physics', col_name='Conceptual Physics', metric='accuracy') Marketing = TaskInfo(benchmark='marketing', col_name='Marketing', metric='accuracy') High_School_Statistics = TaskInfo(benchmark='high_school_statistics', col_name='High School Statistics', metric='accuracy') High_School_US_History = TaskInfo(benchmark='high_school_us_history', col_name='High School US History', metric='accuracy') High_School_Chemistry = TaskInfo(benchmark='high_school_chemistry', col_name='High School Chemistry', metric='accuracy') Sociology = TaskInfo(benchmark='sociology', col_name='Sociology', metric='accuracy') High_School_Geography = TaskInfo(benchmark='high_school_geography', col_name='High School Geography', metric='accuracy') High_School_Government_and_Politics = TaskInfo(benchmark='high_school_government_and_politics', col_name='High School Government and Politics', metric='accuracy') College_Medicine = TaskInfo(benchmark='college_medicine', col_name='College Medicine', metric='accuracy') Virology = TaskInfo(benchmark='virology', col_name='Virology', metric='accuracy') High_School_European_History = TaskInfo(benchmark='high_school_european_history', col_name='High School European History', metric='accuracy') Logical_Fallacies = TaskInfo(benchmark='logical_fallacies', col_name='Logical Fallacies', metric='accuracy') Astronomy = TaskInfo(benchmark='astronomy', col_name='Astronomy', metric='accuracy') High_School_Physics = TaskInfo(benchmark='high_school_physics', col_name='High School Physics', metric='accuracy') Electrical_Engineering = TaskInfo(benchmark='electrical_engineering', col_name='Electrical Engineering', metric='accuracy') College_Biology = TaskInfo(benchmark='college_biology', col_name='College Biology', metric='accuracy') Anatomy = TaskInfo(benchmark='anatomy', col_name='Anatomy', metric='accuracy') Formal_Logic = TaskInfo(benchmark='formal_logic', col_name='Formal Logic', metric='accuracy') International_Law = TaskInfo(benchmark='international_law', col_name='International Law', metric='accuracy') Econometrics = TaskInfo(benchmark='econometrics', col_name='Econometrics', metric='accuracy') Machine_Learning = TaskInfo(benchmark='machine_learning', col_name='Machine Learning', metric='accuracy') Management = TaskInfo(benchmark='management', col_name='Management', metric='accuracy') College_Physics = TaskInfo(benchmark='college_physics', col_name='College Physics', metric='accuracy') US_Foreign_Policy = TaskInfo(benchmark='us_foreign_policy', col_name='US Foreign Policy', metric='accuracy') Business_Ethics = TaskInfo(benchmark='business_ethics', col_name='Business Ethics', metric='accuracy') College_Mathematics = TaskInfo(benchmark='college_mathematics', col_name='College Mathematics', metric='accuracy') College_Chemistry = TaskInfo(benchmark='college_chemistry', col_name='College Chemistry', metric='accuracy') College_Computer_Science = TaskInfo(benchmark='college_computer_science', col_name='College Computer Science', metric='accuracy') High_School_Computer_Science = TaskInfo(benchmark='high_school_computer_science', col_name='High School Computer Science', metric='accuracy') Computer_Security = TaskInfo(benchmark='computer_security', col_name='Computer Security', metric='accuracy') Global_Facts = TaskInfo(benchmark='global_facts', col_name='Global Facts', metric='accuracy') Medical_Genetics = TaskInfo(benchmark='medical_genetics', col_name='Medical Genetics', metric='accuracy') Abstract_Algebra = TaskInfo(benchmark='abstract_algebra', col_name='Abstract Algebra', metric='accuracy') # Now include the variables expected by app.py TITLE = """
Blue-and-White-Modern-Technology-Company-Logo-2
""" INTRODUCTION_TEXT = """

Welcome to ILMAAM: Benchmark for Arabic System in Multitask Assessment

This leaderboard showcases the performance of various Arabic LLMs on the newly released MMMLU OpenAI Benchmark across different subjects.

""" LLM_BENCHMARKS_TEXT = """ ## About ILMAAM ILMAAM is based on The Massive Multitask Multilingual Language Understanding benchmark which is designed to evaluate Arabic models on a wide range of subjects. ## How to Interpret the Leaderboard - **Model**: The name of the model evaluated. - **Average ⬆️**: The average accuracy across all subjects. - **Subject Columns**: The accuracy (%) for each individual subject. ## How to Submit Your Model Go to the **Submit here!** tab and provide your model details to have it evaluated and appear on the leaderboard. """ EVALUATION_QUEUE_TEXT = """ Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation. """ CITATION_BUTTON_LABEL = "Citation" CITATION_BUTTON_TEXT = """ If you use this leaderboard or the MMMLU dataset in your research, please cite: @misc{ILMAAM, author = {Nacar, Omer}, title = {ILMAAM: Index for Language Models For Arabic Assessment on Multitasks}, year = {2024}, publisher = {Robotics and Internet-of-Things Lab, Prince Sultan University, Riyadh}" Acknowledgment: Thanks for Prince Sultan University and RIOTU Lab for their support. }"""