from enum import Enum, auto #from dataclasses import dataclass SubmissionType = Enum( "SubmissionType", [ "Automatic", "SemiAutomatic", "Manual", "Closed", "Arena" ] ) Evaluators = Enum( "Evaluators", [ "Humans", # Arena "Automatic", "Model" ] ) TestSet = Enum( "TestSet", [ "Private", "Public", "Mix", "Rolling", "N/A" ] ) Categories = Enum( "Categories", [ "Text", "Image", "Audio", "Video", "Multimodal", "Generation", "Math", "Code", "LanguageSpecific", "Performance", "Safety", "VibeCheck", "Tools", "Artefacts" ] ) Languages = Enum( "Languages", [ "Chinese", "Korean", "Dutch", "Portuguese", "Italian", "Malay", "Polish", "Turkish" ] ) leaderboard_to_tags = { "HuggingFaceH4/open_llm_leaderboard": [SubmissionType.Automatic, Evaluators.Automatic, TestSet.Public, Categories.Text, Categories.Math], "bigcode/bigcode-models-leaderboard": [SubmissionType.SemiAutomatic, Evaluators.Automatic, TestSet.Public, Categories.Code], "optimum/llm-perf-leaderboard": [SubmissionType.Manual, Evaluators.Automatic, Categories.Performance], "lmsys/chatbot-arena-leaderboard": [SubmissionType.Arena, Evaluators.Humans, Categories.Text, Categories.Generation], "llmonitor/benchmarks": [SubmissionType.Manual, Evaluators.Humans, Categories.Text, Categories.VibeCheck], "mteb/leaderboard": [SubmissionType.SemiAutomatic, Categories.Text, "Embeddings", Categories.Artefacts], "gaia-benchmark/leaderboard": [SubmissionType.Automatic, TestSet.Private, Evaluators.Automatic, Categories.Text, Categories.Tools, Categories.Multimodal], "opencompass/opencompass-llm-leaderboard": [SubmissionType.Manual, Categories.Text, Categories.LanguageSpecific, Languages.Chinese], "upstage/open-ko-llm-leaderboard": [SubmissionType.Automatic, Evaluators.Automatic, TestSet.Mix, Categories.Text, Languages.Korean], "BramVanroy/open_dutch_llm_leaderboard": [SubmissionType.Manual, Evaluators.Automatic, Categories.Text, Languages.Dutch], "vectara/leaderboard": [SubmissionType.SemiAutomatic, Evaluators.Model, Categories.Text, "Hallucinations"], "facebook/CyberSecEval": [SubmissionType.Closed, Categories.Code, Categories.Safety], "mlabonne/Yet_Another_LLM_Leaderboard": [SubmissionType.Manual, Categories.Text, Evaluators.Automatic], "AI-Secure/llm-trustworthy-leaderboard": [SubmissionType.Automatic, Categories.Safety, Categories.Text], "AILab-CVC/EvalCrafter": [SubmissionType.Closed, Categories.Video, Categories.Generation], "mike-ravkine/can-ai-code-results": [SubmissionType.Closed, Categories.Code], "echo840/ocrbench-leaderboard": [SubmissionType.Closed, Categories.Image, "OCR"], "NPHardEval/NPHardEval-leaderboard": [SubmissionType.Closed, Categories.Text, Categories.Math, TestSet.Rolling], "HaizeLabs/red-teaming-resistance-benchmark": [SubmissionType.Manual, Categories.Safety, Categories.Text], "devingulliver/subquadratic-llm-leaderboard": [SubmissionType.SemiAutomatic, Categories.Text, Categories.Math], "WildVision/vision-arena": [SubmissionType.Arena, Categories.Image, Categories.Multimodal], "Vchitect/VBench_Leaderboard": [SubmissionType.SemiAutomatic, Categories.Video, Categories.Generation], "eduagarcia/open_pt_llm_leaderboard": [Categories.Text, Categories.LanguageSpecific, Languages.Portuguese], "FinancialSupport/open_ita_llm_leaderboard": [Categories.Text, Categories.LanguageSpecific, Languages.Italian], "mesolitica/malay-llm-leaderboard": [Categories.Text, Categories.LanguageSpecific, Languages.Malay], "TIGER-Lab/GenAI-Arena": [Categories.Image, Categories.Generation, Evaluators.Humans, SubmissionType.Arena], "q-future/Q-Bench-Leaderboard": [Categories.Image, Evaluators.Automatic, SubmissionType.Closed], "OpenGenAI/parti-prompts-leaderboard": [Categories.Image, Categories.Generation, SubmissionType.Arena, Evaluators.Humans], "speakleash/open_pl_llm_leaderboard": [Categories.LanguageSpecific, Categories.Text, Languages.Polish], "malhajar/OpenLLMTurkishLeaderboard": [Categories.LanguageSpecific, Categories.Text, Languages.Turkish], "allenai/WildBench": [Evaluators.Humans, SubmissionType.Arena, Evaluators.Model, Categories.Text, Categories.Generation], "hf-audio/open_asr_leaderboard": [Evaluators.Automatic, Categories.Audio], "opencompass/open_vlm_leaderboard": [Evaluators.Automatic, Categories.Generation, Categories.Image], "livecodebench/benchmarks": [Evaluators.Automatic, Categories.Code], "allenai/reward-bench": [Evaluators.Automatic, Categories.Artefacts, "Models", Categories.Text], "TTS-AGI/TTS-Arena": [Evaluators.Humans, Categories.Audio] }