from enum import Enum from dataclasses import dataclass # Define TaskInfo and Tasks as before @dataclass class TaskInfo: benchmark: str col_name: str metric: str # src/about.py from enum import Enum from dataclasses import dataclass # Define TaskInfo dataclass @dataclass class TaskInfo: benchmark: str col_name: str metric: str # Define Tasks enum with your specific subjects, excluding the unwanted ones class Tasks(Enum): Professional_Law = TaskInfo(benchmark='professional_law', col_name='Professional Law', metric='accuracy') Moral_Scenarios = TaskInfo(benchmark='moral_scenarios', col_name='Moral Scenarios', metric='accuracy') Miscellaneous = TaskInfo(benchmark='miscellaneous', col_name='Miscellaneous', metric='accuracy') High_School_Psychology = TaskInfo(benchmark='high_school_psychology', col_name='High School Psychology', metric='accuracy') High_School_Macroeconomics = TaskInfo(benchmark='high_school_macroeconomics', col_name='High School Macroeconomics', metric='accuracy') Elementary_Mathematics = TaskInfo(benchmark='elementary_mathematics', col_name='Elementary Mathematics', metric='accuracy') Prehistory = TaskInfo(benchmark='prehistory', col_name='Prehistory', metric='accuracy') Philosophy = TaskInfo(benchmark='philosophy', col_name='Philosophy', metric='accuracy') High_School_Biology = TaskInfo(benchmark='high_school_biology', col_name='High School Biology', metric='accuracy') Nutrition = TaskInfo(benchmark='nutrition', col_name='Nutrition', metric='accuracy') Professional_Accounting = TaskInfo(benchmark='professional_accounting', col_name='Professional Accounting', metric='accuracy') Professional_Medicine = TaskInfo(benchmark='professional_medicine', col_name='Professional Medicine', metric='accuracy') High_School_Mathematics = TaskInfo(benchmark='high_school_mathematics', col_name='High School Mathematics', metric='accuracy') Clinical_Knowledge = TaskInfo(benchmark='clinical_knowledge', col_name='Clinical Knowledge', metric='accuracy') Security_Studies = TaskInfo(benchmark='security_studies', col_name='Security Studies', metric='accuracy') High_School_Microeconomics = TaskInfo(benchmark='high_school_microeconomics', col_name='High School Microeconomics', metric='accuracy') High_School_World_History = TaskInfo(benchmark='high_school_world_history', col_name='High School World History', metric='accuracy') Conceptual_Physics = TaskInfo(benchmark='conceptual_physics', col_name='Conceptual Physics', metric='accuracy') Marketing = TaskInfo(benchmark='marketing', col_name='Marketing', metric='accuracy') High_School_Statistics = TaskInfo(benchmark='high_school_statistics', col_name='High School Statistics', metric='accuracy') High_School_US_History = TaskInfo(benchmark='high_school_us_history', col_name='High School US History', metric='accuracy') High_School_Chemistry = TaskInfo(benchmark='high_school_chemistry', col_name='High School Chemistry', metric='accuracy') Sociology = TaskInfo(benchmark='sociology', col_name='Sociology', metric='accuracy') High_School_Geography = TaskInfo(benchmark='high_school_geography', col_name='High School Geography', metric='accuracy') High_School_Government_and_Politics = TaskInfo(benchmark='high_school_government_and_politics', col_name='High School Government and Politics', metric='accuracy') College_Medicine = TaskInfo(benchmark='college_medicine', col_name='College Medicine', metric='accuracy') Virology = TaskInfo(benchmark='virology', col_name='Virology', metric='accuracy') High_School_European_History = TaskInfo(benchmark='high_school_european_history', col_name='High School European History', metric='accuracy') Logical_Fallacies = TaskInfo(benchmark='logical_fallacies', col_name='Logical Fallacies', metric='accuracy') Astronomy = TaskInfo(benchmark='astronomy', col_name='Astronomy', metric='accuracy') High_School_Physics = TaskInfo(benchmark='high_school_physics', col_name='High School Physics', metric='accuracy') Electrical_Engineering = TaskInfo(benchmark='electrical_engineering', col_name='Electrical Engineering', metric='accuracy') College_Biology = TaskInfo(benchmark='college_biology', col_name='College Biology', metric='accuracy') Anatomy = TaskInfo(benchmark='anatomy', col_name='Anatomy', metric='accuracy') Formal_Logic = TaskInfo(benchmark='formal_logic', col_name='Formal Logic', metric='accuracy') International_Law = TaskInfo(benchmark='international_law', col_name='International Law', metric='accuracy') Econometrics = TaskInfo(benchmark='econometrics', col_name='Econometrics', metric='accuracy') Machine_Learning = TaskInfo(benchmark='machine_learning', col_name='Machine Learning', metric='accuracy') Management = TaskInfo(benchmark='management', col_name='Management', metric='accuracy') College_Physics = TaskInfo(benchmark='college_physics', col_name='College Physics', metric='accuracy') US_Foreign_Policy = TaskInfo(benchmark='us_foreign_policy', col_name='US Foreign Policy', metric='accuracy') Business_Ethics = TaskInfo(benchmark='business_ethics', col_name='Business Ethics', metric='accuracy') College_Mathematics = TaskInfo(benchmark='college_mathematics', col_name='College Mathematics', metric='accuracy') College_Chemistry = TaskInfo(benchmark='college_chemistry', col_name='College Chemistry', metric='accuracy') College_Computer_Science = TaskInfo(benchmark='college_computer_science', col_name='College Computer Science', metric='accuracy') High_School_Computer_Science = TaskInfo(benchmark='high_school_computer_science', col_name='High School Computer Science', metric='accuracy') Computer_Security = TaskInfo(benchmark='computer_security', col_name='Computer Security', metric='accuracy') Global_Facts = TaskInfo(benchmark='global_facts', col_name='Global Facts', metric='accuracy') Medical_Genetics = TaskInfo(benchmark='medical_genetics', col_name='Medical Genetics', metric='accuracy') Abstract_Algebra = TaskInfo(benchmark='abstract_algebra', col_name='Abstract Algebra', metric='accuracy') # Now include the variables expected by app.py TITLE = """

🌐 Arabic MMMLU Evaluation Leaderboard for LLMs 🌐

""" INTRODUCTION_TEXT = """ Welcome to the Arabic MMMLU Evaluation for LLMs Leaderboard for the MMMLU dataset evaluation. This leaderboard displays the performance of various language models on the MMMLU dataset across different subjects. """ LLM_BENCHMARKS_TEXT = """ ## About the MMMLU Benchmark The Massive Multitask Multilingual Language Understanding (MMMLU) benchmark is designed to evaluate models on a wide range of subjects. ## How to Interpret the Leaderboard - **Model**: The name of the model evaluated. - **Average ⬆️**: The average accuracy across all subjects. - **Subject Columns**: The accuracy (%) for each individual subject. ## How to Submit Your Model Go to the **Submit here!** tab and provide your model details to have it evaluated and appear on the leaderboard. """ EVALUATION_QUEUE_TEXT = """ Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation. """ CITATION_BUTTON_LABEL = "Citation" CITATION_BUTTON_TEXT = """ If you use this leaderboard or the MMMLU dataset in your research, please cite: @misc{AMMMLU, author = {Nacar, Omer}, title = {Arabic MMMLU Evaluation for LLMs Leaderboard}, year = {2024}, publisher = {Omartificial-Intelligence-Space}}" }"""