- {`from transformers import AutoConfig, AutoModel, AutoTokenizer - -config = AutoConfig.from_pretrained("your-username/your-model", revision="main") -model = AutoModel.from_pretrained("your-username/your-model", revision="main") -tokenizer = AutoTokenizer.from_pretrained("your-username/your-model", revision="main")`} --
{citation}
- {error}
" + + +def styled_warning(warn): + return f"{warn}
" + + +def styled_message(message): + return f"{message}
" + + +def has_no_nan_values(df, columns): + return df[columns].notna().all(axis=1) + + +def has_nan_values(df, columns): + return df[columns].isna().any(axis=1) diff --git a/src/display/utils.py b/src/display/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..1cb6fb1e89cf4397c3d48ba647a75131bb27b8c4 --- /dev/null +++ b/src/display/utils.py @@ -0,0 +1,193 @@ +from dataclasses import dataclass, make_dataclass +from enum import Enum + +import pandas as pd + +def fields(raw_class): + return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] + + +@dataclass +class Task: + benchmark: str + metric: str + col_name: str + +class Tasks(Enum): + arc = Task("arc:challenge", "acc_norm", "ARC") + hellaswag = Task("hellaswag", "acc_norm", "HellaSwag") + mmlu = Task("hendrycksTest", "acc", "MMLU") + truthfulqa = Task("truthfulqa:mc", "mc2", "TruthfulQA") + winogrande = Task("winogrande", "acc", "Winogrande") + gsm8k = Task("gsm8k", "acc", "GSM8K") + +# These classes are for user facing column names, +# to avoid having to change them all around the code +# when a modif is needed +@dataclass +class ColumnContent: + name: str + type: str + displayed_by_default: bool + hidden: bool = False + never_hidden: bool = False + dummy: bool = False + +auto_eval_column_dict = [] +# Init +auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)]) +auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)]) +#Scores +auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)]) +for task in Tasks: + auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)]) +# Model information +auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)]) +auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)]) +auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)]) +auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)]) +auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)]) +auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)]) +auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)]) +auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)]) +auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)]) +auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)]) +auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)]) +auto_eval_column_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)]) +# Dummy column for the search bar (hidden by the custom CSS) +auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)]) + +# We use make dataclass to dynamically fill the scores from Tasks +AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True) + +@dataclass(frozen=True) +class EvalQueueColumn: # Queue column + model = ColumnContent("model", "markdown", True) + revision = ColumnContent("revision", "str", True) + private = ColumnContent("private", "bool", True) + precision = ColumnContent("precision", "str", True) + weight_type = ColumnContent("weight_type", "str", "Original") + status = ColumnContent("status", "str", True) + + +baseline_row = { + AutoEvalColumn.model.name: "Baseline
", + AutoEvalColumn.revision.name: "N/A", + AutoEvalColumn.precision.name: None, + AutoEvalColumn.merged.name: False, + AutoEvalColumn.average.name: 31.0, + AutoEvalColumn.arc.name: 25.0, + AutoEvalColumn.hellaswag.name: 25.0, + AutoEvalColumn.mmlu.name: 25.0, + AutoEvalColumn.truthfulqa.name: 25.0, + AutoEvalColumn.winogrande.name: 50.0, + AutoEvalColumn.gsm8k.name: 0.21, + AutoEvalColumn.dummy.name: "baseline", + AutoEvalColumn.model_type.name: "", + AutoEvalColumn.flagged.name: False, +} + +# Average ⬆️ human baseline is 0.897 (source: averaging human baselines below) +# ARC human baseline is 0.80 (source: https://lab42.global/arc/) +# HellaSwag human baseline is 0.95 (source: https://deepgram.com/learn/hellaswag-llm-benchmark-guide) +# MMLU human baseline is 0.898 (source: https://openreview.net/forum?id=d7KBjmI3GmQ) +# TruthfulQA human baseline is 0.94(source: https://arxiv.org/pdf/2109.07958.pdf) +# Winogrande: https://leaderboard.allenai.org/winogrande/submissions/public +# GSM8K: paper +# Define the human baselines +human_baseline_row = { + AutoEvalColumn.model.name: "Human performance
", + AutoEvalColumn.revision.name: "N/A", + AutoEvalColumn.precision.name: None, + AutoEvalColumn.average.name: 92.75, + AutoEvalColumn.merged.name: False, + AutoEvalColumn.arc.name: 80.0, + AutoEvalColumn.hellaswag.name: 95.0, + AutoEvalColumn.mmlu.name: 89.8, + AutoEvalColumn.truthfulqa.name: 94.0, + AutoEvalColumn.winogrande.name: 94.0, + AutoEvalColumn.gsm8k.name: 100, + AutoEvalColumn.dummy.name: "human_baseline", + AutoEvalColumn.model_type.name: "", + AutoEvalColumn.flagged.name: False, +} + +@dataclass +class ModelDetails: + name: str + symbol: str = "" # emoji, only for the model type + + +class ModelType(Enum): + PT = ModelDetails(name="pretrained", symbol="🟢") + CPT = ModelDetails(name="continuously pretrained", symbol="🟩") + FT = ModelDetails(name="fine-tuned on domain-specific datasets", symbol="🔶") + chat = ModelDetails(name="chat models (RLHF, DPO, IFT, ...)", symbol="💬") + merges = ModelDetails(name="base merges and moerges", symbol="🤝") + Unknown = ModelDetails(name="", symbol="?") + + def to_str(self, separator=" "): + return f"{self.value.symbol}{separator}{self.value.name}" + + @staticmethod + def from_str(type): + if "fine-tuned" in type or "🔶" in type: + return ModelType.FT + if "continously pretrained" in type or "🟩" in type: + return ModelType.CPT + if "pretrained" in type or "🟢" in type: + return ModelType.PT + if any([k in type for k in ["instruction-tuned", "RL-tuned", "chat", "🟦", "⭕", "💬"]]): + return ModelType.chat + if "merge" in type or "🤝" in type: + return ModelType.merges + return ModelType.Unknown + +class WeightType(Enum): + Adapter = ModelDetails("Adapter") + Original = ModelDetails("Original") + Delta = ModelDetails("Delta") + +class Precision(Enum): + float16 = ModelDetails("float16") + bfloat16 = ModelDetails("bfloat16") + qt_8bit = ModelDetails("8bit") + qt_4bit = ModelDetails("4bit") + qt_GPTQ = ModelDetails("GPTQ") + Unknown = ModelDetails("?") + + def from_str(precision): + if precision in ["torch.float16", "float16"]: + return Precision.float16 + if precision in ["torch.bfloat16", "bfloat16"]: + return Precision.bfloat16 + if precision in ["8bit"]: + return Precision.qt_8bit + if precision in ["4bit"]: + return Precision.qt_4bit + if precision in ["GPTQ", "None"]: + return Precision.qt_GPTQ + return Precision.Unknown + + + + +# Column selection +COLS = [c.name for c in fields(AutoEvalColumn)] +TYPES = [c.type for c in fields(AutoEvalColumn)] + +EVAL_COLS = [c.name for c in fields(EvalQueueColumn)] +EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)] + +BENCHMARK_COLS = [t.value.col_name for t in Tasks] + +NUMERIC_INTERVALS = { + "?": pd.Interval(-1, 0, closed="right"), + "~1.5": pd.Interval(0, 2, closed="right"), + "~3": pd.Interval(2, 4, closed="right"), + "~7": pd.Interval(4, 9, closed="right"), + "~13": pd.Interval(9, 20, closed="right"), + "~35": pd.Interval(20, 45, closed="right"), + "~60": pd.Interval(45, 70, closed="right"), + "70+": pd.Interval(70, 10000, closed="right"), +} diff --git a/src/envs.py b/src/envs.py new file mode 100644 index 0000000000000000000000000000000000000000..a3780993ef093473d1d3248cf99eab03e2154907 --- /dev/null +++ b/src/envs.py @@ -0,0 +1,35 @@ +import os + +from huggingface_hub import HfApi + +# clone / pull the lmeh eval data +H4_TOKEN = os.environ.get("H4_TOKEN", None) + +REPO_ID = "HuggingFaceH4/open_llm_leaderboard" +QUEUE_REPO = "open-llm-leaderboard/requests" +DYNAMIC_INFO_REPO = "open-llm-leaderboard/dynamic_model_information" +RESULTS_REPO = "open-llm-leaderboard/results" + +PRIVATE_QUEUE_REPO = "open-llm-leaderboard/private-requests" +PRIVATE_RESULTS_REPO = "open-llm-leaderboard/private-results" + +IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True)) + +CACHE_PATH=os.getenv("HF_HOME", ".") + +EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue") +EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results") +DYNAMIC_INFO_PATH = os.path.join(CACHE_PATH, "dynamic-info") +DYNAMIC_INFO_FILE_PATH = os.path.join(DYNAMIC_INFO_PATH, "model_infos.json") + +EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private" +EVAL_RESULTS_PATH_PRIVATE = "eval-results-private" + +PATH_TO_COLLECTION = "open-llm-leaderboard/llm-leaderboard-best-models-652d6c7965a4619fb5c27a03" + +# Rate limit variables +RATE_LIMIT_PERIOD = 7 +RATE_LIMIT_QUOTA = 5 +HAS_HIGHER_RATE_LIMIT = ["TheBloke"] + +API = HfApi(token=H4_TOKEN) diff --git a/src/leaderboard/filter_models.py b/src/leaderboard/filter_models.py new file mode 100644 index 0000000000000000000000000000000000000000..41836c174a3efa35a85bbb9b93de7786987f5ecc --- /dev/null +++ b/src/leaderboard/filter_models.py @@ -0,0 +1,166 @@ +from src.display.formatting import model_hyperlink +from src.display.utils import AutoEvalColumn + +# Models which have been flagged by users as being problematic for a reason or another +# (Model name to forum discussion link) +FLAGGED_MODELS = { + "merged": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Voicelab/trurl-2-13b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/202", + "deepnight-research/llama-2-70B-inst": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/207", + "Aspik101/trurl-2-13b-pl-instruct_unload": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/213", + "Fredithefish/ReasonixPajama-3B-HF": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/236", + "TigerResearch/tigerbot-7b-sft-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/237", + "gaodrew/gaodrew-gorgonzola-13b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/215", + "AIDC-ai-business/Marcoroni-70B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287", + "AIDC-ai-business/Marcoroni-13B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287", + "AIDC-ai-business/Marcoroni-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287", + "fblgit/una-xaberius-34b-v1beta": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/444", + "jan-hq/trinity-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "rwitz2/go-bruins-v2.1.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "rwitz2/go-bruins-v2.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "GreenNode/GreenNodeLM-v3olet-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "GreenNode/GreenNodeLM-7B-v4leo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "GreenNode/LeoScorpius-GreenNode-7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "viethq188/LeoScorpius-7B-Chat-DPO": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "GreenNode/GreenNodeLM-7B-v2leo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "janai-hq/trinity-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "ignos/LeoScorpius-GreenNode-Alpaca-7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "fblgit/una-cybertron-7b-v3-OMA": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "mncai/mistral-7b-dpo-merge-v1.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "mncai/mistral-7b-dpo-v6": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "Toten5/LeoScorpius-GreenNode-7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "GreenNode/GreenNodeLM-7B-v1olet": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "quantumaikr/quantum-dpo-v0.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "quantumaikr/quantum-v0.01": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "quantumaikr/quantum-trinity-v0.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "mncai/mistral-7b-dpo-v5": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "cookinai/BruinHermes": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "jan-ai/Pandora-10.7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "v1olet/v1olet_marcoroni-go-bruins-merge-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "v1olet/v1olet_merged_dpo_7B_v3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "rwitz2/pee": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "zyh3826 / GML-Mistral-merged-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/503", + "dillfrescott/trinity-medium": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474", + "udkai/Garrulus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/526", + "dfurman/GarrulusMarcoro-7B-v0.1": "https://huggingface.co/dfurman/GarrulusMarcoro-7B-v0.1/discussions/1", + "udkai/Turdus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548", + "eren23/slerp-test-turdus-beagle": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548", + "abideen/NexoNimbus-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548", + "alnrg2arg/test2_3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548", + "nfaheem/Marcoroni-7b-DPO-Merge": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548", + "CultriX/MergeTrix-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548", + "liminerity/Blur-7b-v1.21": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/548", + # Merges not indicated + "gagan3012/MetaModelv2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "gagan3012/MetaModelv3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "kyujinpy/Sakura-SOLRCA-Math-Instruct-DPO-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "kyujinpy/Sakura-SOLAR-Instruct-DPO-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "kyujinpy/Sakura-SOLRCA-Math-Instruct-DPO-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "kyujinpy/Sakura-SOLRCA-Instruct-DPO": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "fblgit/LUNA-SOLARkrautLM-Instruct": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "perlthoughts/Marcoroni-8x7B-v3-MoE": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "rwitz/go-bruins-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "rwitz/go-bruins": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Walmart-the-bag/Solar-10.7B-Cato": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "aqweteddy/mistral_tv-neural-marconroni": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "NExtNewChattingAI/shark_tank_ai_7_b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Q-bert/MetaMath-Cybertron": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "OpenPipe/mistral-ft-optimized-1227": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "perlthoughts/Falkor-7b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "v1olet/v1olet_merged_dpo_7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Ba2han/BruinsV2-OpHermesNeu-11B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "DopeorNope/You_can_cry_Snowman-13B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "PistachioAlt/Synatra-MCS-7B-v0.3-RP-Slerp": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Weyaxi/MetaMath-una-cybertron-v2-bf16-Ties": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Weyaxi/OpenHermes-2.5-neural-chat-7b-v3-2-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "perlthoughts/Falkor-8x7B-MoE": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "elinas/chronos007-70b": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Weyaxi/MetaMath-NeuralHermes-2.5-Mistral-7B-Linear": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Weyaxi/MetaMath-neural-chat-7b-v3-2-Ties": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "diffnamehard/Mistral-CatMacaroni-slerp-uncensored-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Weyaxi/neural-chat-7b-v3-1-OpenHermes-2.5-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Weyaxi/MetaMath-NeuralHermes-2.5-Mistral-7B-Ties": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Walmart-the-bag/Misted-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "garage-bAInd/Camel-Platypus2-70B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "Weyaxi/OpenOrca-Zephyr-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "uukuguy/speechless-mistral-7b-dare-0.85": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510", + "DopeorNope/SOLARC-M-10.7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511", + "cloudyu/Mixtral_11Bx2_MoE_19B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511", + "DopeorNope/SOLARC-MOE-10.7Bx6 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511", + "DopeorNope/SOLARC-MOE-10.7Bx4": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511", + "gagan3012/MetaModelv2 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511", + "udkai/Turdus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "kodonho/Solar-OrcaDPO-Solar-Instruct-SLERP": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "kodonho/SolarM-SakuraSolar-SLERP": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "Yhyu13/LMCocktail-10.7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "mlabonne/NeuralMarcoro14-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "Neuronovo/neuronovo-7B-v0.2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "ryandt/MusingCaterpillar": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "Neuronovo/neuronovo-7B-v0.3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "SanjiWatsuki/Lelantos-DPO-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "bardsai/jaskier-7b-dpo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "cookinai/OpenCM-14": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "bardsai/jaskier-7b-dpo-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "jan-hq/supermario-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + # MoErges + "cloudyu/Yi-34Bx2-MoE-60B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "cloudyu/Mixtral_34Bx2_MoE_60B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "gagan3012/MetaModel_moe":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "macadeliccc/SOLAR-math-2x10.7b-v0.2":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "cloudyu/Mixtral_7Bx2_MoE":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "macadeliccc/SOLAR-math-2x10.7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "macadeliccc/Orca-SOLAR-4x10.7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "macadeliccc/piccolo-8x7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "cloudyu/Mixtral_7Bx4_MOE_24B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "macadeliccc/laser-dolphin-mixtral-2x7b-dpo":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + "macadeliccc/polyglot-math-4x7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540", + # Other - contamination mostly + "DopeorNope/COKAL-v1-70B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/566", + "CultriX/MistralTrix-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/556", +} + +# Models which have been requested by orgs to not be submitted on the leaderboard +DO_NOT_SUBMIT_MODELS = [ + "Voicelab/trurl-2-13b", # trained on MMLU + "TigerResearch/tigerbot-70b-chat", # per authors request + "TigerResearch/tigerbot-70b-chat-v2", # per authors request + "TigerResearch/tigerbot-70b-chat-v4-4k", # per authors request +] + + +def flag_models(leaderboard_data: list[dict]): + for model_data in leaderboard_data: + # Merges and moes are flagged automatically + if model_data[AutoEvalColumn.flagged.name] == True: + flag_key = "merged" + else: + flag_key = model_data["model_name_for_query"] + + if flag_key in FLAGGED_MODELS: + issue_num = FLAGGED_MODELS[flag_key].split("/")[-1] + issue_link = model_hyperlink( + FLAGGED_MODELS[flag_key], + f"See discussion #{issue_num}", + ) + model_data[ + AutoEvalColumn.model.name + ] = f"{model_data[AutoEvalColumn.model.name]} has been flagged! {issue_link}" + model_data[AutoEvalColumn.flagged.name] = True + else: + model_data[AutoEvalColumn.flagged.name] = False + + +def remove_forbidden_models(leaderboard_data: list[dict]): + indices_to_remove = [] + for ix, model in enumerate(leaderboard_data): + if model["model_name_for_query"] in DO_NOT_SUBMIT_MODELS: + indices_to_remove.append(ix) + + for ix in reversed(indices_to_remove): + leaderboard_data.pop(ix) + return leaderboard_data + + +def filter_models_flags(leaderboard_data: list[dict]): + leaderboard_data = remove_forbidden_models(leaderboard_data) + flag_models(leaderboard_data) diff --git a/src/leaderboard/read_evals.py b/src/leaderboard/read_evals.py new file mode 100644 index 0000000000000000000000000000000000000000..6606c991641a0971cbb289ddcf7c4e7a1e4b1a02 --- /dev/null +++ b/src/leaderboard/read_evals.py @@ -0,0 +1,227 @@ +import glob +import json +import math +import os +from dataclasses import dataclass + +import dateutil +import numpy as np + +from huggingface_hub import ModelCard + +from src.display.formatting import make_clickable_model +from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType + + +@dataclass +class EvalResult: + # Also see src.display.utils.AutoEvalColumn for what will be displayed. + eval_name: str # org_model_precision (uid) + full_model: str # org/model (path on hub) + org: str + model: str + revision: str # commit hash, "" if main + results: dict + precision: Precision = Precision.Unknown + model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ... + weight_type: WeightType = WeightType.Original # Original or Adapter + architecture: str = "Unknown" # From config file + license: str = "?" + likes: int = 0 + num_params: int = 0 + date: str = "" # submission date of request file + still_on_hub: bool = True + is_merge: bool = False + flagged: bool = False + status: str = "FINISHED" + tags: list = None + + @classmethod + def init_from_json_file(self, json_filepath): + """Inits the result from the specific model result file""" + with open(json_filepath) as fp: + data = json.load(fp) + + # We manage the legacy config format + config = data.get("config_general") + + # Precision + precision = Precision.from_str(config.get("model_dtype")) + + # Get model and org + org_and_model = config.get("model_name") + org_and_model = org_and_model.split("/", 1) + + if len(org_and_model) == 1: + org = None + model = org_and_model[0] + result_key = f"{model}_{precision.value.name}" + else: + org = org_and_model[0] + model = org_and_model[1] + result_key = f"{org}_{model}_{precision.value.name}" + full_model = "/".join(org_and_model) + + # Extract results available in this file (some results are split in several files) + results = {} + for task in Tasks: + task = task.value + # We skip old mmlu entries + wrong_mmlu_version = False + if task.benchmark == "hendrycksTest": + for mmlu_k in ["harness|hendrycksTest-abstract_algebra|5", "hendrycksTest-abstract_algebra"]: + if mmlu_k in data["versions"] and data["versions"][mmlu_k] == 0: + wrong_mmlu_version = True + + if wrong_mmlu_version: + continue + + # Some truthfulQA values are NaNs + if task.benchmark == "truthfulqa:mc" and "harness|truthfulqa:mc|0" in data["results"]: + if math.isnan(float(data["results"]["harness|truthfulqa:mc|0"][task.metric])): + results[task.benchmark] = 0.0 + continue + + # We average all scores of a given metric (mostly for mmlu) + accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark in k]) + if accs.size == 0 or any([acc is None for acc in accs]): + continue + + mean_acc = np.mean(accs) * 100.0 + results[task.benchmark] = mean_acc + + return self( + eval_name=result_key, + full_model=full_model, + org=org, + model=model, + results=results, + precision=precision, + revision= config.get("model_sha", ""), + ) + + def update_with_request_file(self, requests_path): + """Finds the relevant request file for the current model and updates info with it""" + request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name) + + try: + with open(request_file, "r") as f: + request = json.load(f) + self.model_type = ModelType.from_str(request.get("model_type", "Unknown")) + self.weight_type = WeightType[request.get("weight_type", "Original")] + self.num_params = request.get("params", 0) + self.date = request.get("submitted_time", "") + self.architecture = request.get("architectures", "Unknown") + self.status = request.get("status", "FAILED") + except Exception as e: + self.status = "FAILED" + print(f"Could not find request file for {self.org}/{self.model}") + + def update_with_dynamic_file_dict(self, file_dict): + self.license = file_dict.get("license", "?") + self.likes = file_dict.get("likes", 0) + self.still_on_hub = file_dict["still_on_hub"] + self.tags = file_dict.get("tags", []) + self.flagged = any("flagged" in tag for tag in self.tags) + + + def to_dict(self): + """Converts the Eval Result to a dict compatible with our dataframe display""" + average = sum([v for v in self.results.values() if v is not None]) / len(Tasks) + data_dict = { + "eval_name": self.eval_name, # not a column, just a save name, + AutoEvalColumn.precision.name: self.precision.value.name, + AutoEvalColumn.model_type.name: self.model_type.value.name, + AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol, + AutoEvalColumn.weight_type.name: self.weight_type.value.name, + AutoEvalColumn.architecture.name: self.architecture, + AutoEvalColumn.model.name: make_clickable_model(self.full_model), + AutoEvalColumn.dummy.name: self.full_model, + AutoEvalColumn.revision.name: self.revision, + AutoEvalColumn.average.name: average, + AutoEvalColumn.license.name: self.license, + AutoEvalColumn.likes.name: self.likes, + AutoEvalColumn.params.name: self.num_params, + AutoEvalColumn.still_on_hub.name: self.still_on_hub, + AutoEvalColumn.merged.name: "merge" in self.tags if self.tags else False, + AutoEvalColumn.moe.name: ("moe" in self.tags if self.tags else False) or "moe" in self.full_model.lower(), + AutoEvalColumn.flagged.name: self.flagged + } + + for task in Tasks: + data_dict[task.value.col_name] = self.results[task.value.benchmark] + + return data_dict + + +def get_request_file_for_model(requests_path, model_name, precision): + """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED""" + request_files = os.path.join( + requests_path, + f"{model_name}_eval_request_*.json", + ) + request_files = glob.glob(request_files) + + # Select correct request file (precision) + request_file = "" + request_files = sorted(request_files, reverse=True) + for tmp_request_file in request_files: + with open(tmp_request_file, "r") as f: + req_content = json.load(f) + if ( + req_content["status"] in ["FINISHED"] + and req_content["precision"] == precision.split(".")[-1] + ): + request_file = tmp_request_file + return request_file + + +def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]: + """From the path of the results folder root, extract all needed info for results""" + model_result_filepaths = [] + + for root, _, files in os.walk(results_path): + # We should only have json files in model results + if len(files) == 0 or any([not f.endswith(".json") for f in files]): + continue + + # Sort the files by date + try: + files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7]) + except dateutil.parser._parser.ParserError: + files = [files[-1]] + + for file in files: + model_result_filepaths.append(os.path.join(root, file)) + + with open(dynamic_path) as f: + dynamic_data = json.load(f) + + eval_results = {} + for model_result_filepath in model_result_filepaths: + # Creation of result + eval_result = EvalResult.init_from_json_file(model_result_filepath) + eval_result.update_with_request_file(requests_path) + if eval_result.full_model in dynamic_data: + eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model]) + # Hardcoding because of gating problem + if "meta-llama" in eval_result.full_model: + eval_result.still_on_hub = True + + # Store results of same eval together + eval_name = eval_result.eval_name + if eval_name in eval_results.keys(): + eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None}) + else: + eval_results[eval_name] = eval_result + + results = [] + for v in eval_results.values(): + try: + if v.status == "FINISHED": + v.to_dict() # we test if the dict version is complete + results.append(v) + except KeyError: # not all eval values present + continue + + return results diff --git a/src/populate.py b/src/populate.py new file mode 100644 index 0000000000000000000000000000000000000000..cd31069a765087800c96e7a30f5de081fd0ab80b --- /dev/null +++ b/src/populate.py @@ -0,0 +1,59 @@ +import json +import os + +import pandas as pd + +from src.display.formatting import has_no_nan_values, make_clickable_model +from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row +from src.leaderboard.filter_models import filter_models_flags +from src.leaderboard.read_evals import get_raw_eval_results + + +def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame: + raw_data = get_raw_eval_results(results_path=results_path, requests_path=requests_path, dynamic_path=dynamic_path) + all_data_json = [v.to_dict() for v in raw_data] + all_data_json.append(baseline_row) + filter_models_flags(all_data_json) + + df = pd.DataFrame.from_records(all_data_json) + df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False) + df = df[cols].round(decimals=2) + + # filter out if any of the benchmarks have not been produced + df = df[has_no_nan_values(df, benchmark_cols)] + return raw_data, df + + +def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]: + entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")] + all_evals = [] + + for entry in entries: + if ".json" in entry: + file_path = os.path.join(save_path, entry) + with open(file_path) as fp: + data = json.load(fp) + + data[EvalQueueColumn.model.name] = make_clickable_model(data["model"]) + data[EvalQueueColumn.revision.name] = data.get("revision", "main") + + all_evals.append(data) + elif ".md" not in entry: + # this is a folder + sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if not e.startswith(".")] + for sub_entry in sub_entries: + file_path = os.path.join(save_path, entry, sub_entry) + with open(file_path) as fp: + data = json.load(fp) + + data[EvalQueueColumn.model.name] = make_clickable_model(data["model"]) + data[EvalQueueColumn.revision.name] = data.get("revision", "main") + all_evals.append(data) + + pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]] + running_list = [e for e in all_evals if e["status"] == "RUNNING"] + finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"] + df_pending = pd.DataFrame.from_records(pending_list, columns=cols) + df_running = pd.DataFrame.from_records(running_list, columns=cols) + df_finished = pd.DataFrame.from_records(finished_list, columns=cols) + return df_finished[cols], df_running[cols], df_pending[cols] diff --git a/src/scripts/create_request_file.py b/src/scripts/create_request_file.py new file mode 100644 index 0000000000000000000000000000000000000000..23f690e59b28b94d15b76b12b05d05e5708f8aaa --- /dev/null +++ b/src/scripts/create_request_file.py @@ -0,0 +1,92 @@ +import json +import os +import pprint +from datetime import datetime, timezone + +import click +from colorama import Fore +from huggingface_hub import HfApi, snapshot_download + +from src.submission.check_validity import get_model_size +from src.display.utils import ModelType, WeightType + +EVAL_REQUESTS_PATH = "eval-queue" +QUEUE_REPO = "open-llm-leaderboard/requests" + +precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ") +model_types = [e.name for e in ModelType] +weight_types = [e.name for e in WeightType] + + +def main(): + api = HfApi() + current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset") + + model_name = click.prompt("Enter model name") + revision = click.prompt("Enter revision", default="main") + precision = click.prompt("Enter precision", default="float16", type=click.Choice(precisions)) + model_type = click.prompt("Enter model type", type=click.Choice(model_types)) + weight_type = click.prompt("Enter weight type", default="Original", type=click.Choice(weight_types)) + base_model = click.prompt("Enter base model", default="") + status = click.prompt("Enter status", default="FINISHED") + + try: + model_info = api.model_info(repo_id=model_name, revision=revision) + except Exception as e: + print(f"{Fore.RED}Could not find model info for {model_name} on the Hub\n{e}{Fore.RESET}") + return 1 + + model_size = get_model_size(model_info=model_info, precision=precision) + + try: + license = model_info.cardData["license"] + except Exception: + license = "?" + + eval_entry = { + "model": model_name, + "base_model": base_model, + "revision": revision, + "private": False, + "precision": precision, + "weight_type": weight_type, + "status": status, + "submitted_time": current_time, + "model_type": model_type, + "likes": model_info.likes, + "params": model_size, + "license": license, + } + + user_name = "" + model_path = model_name + if "/" in model_name: + user_name = model_name.split("/")[0] + model_path = model_name.split("/")[1] + + pprint.pprint(eval_entry) + + if click.confirm("Do you want to continue? This request file will be pushed to the hub"): + click.echo("continuing...") + + out_dir = f"{EVAL_REQUESTS_PATH}/{user_name}" + os.makedirs(out_dir, exist_ok=True) + out_path = f"{out_dir}/{model_path}_eval_request_{False}_{precision}_{weight_type}.json" + + with open(out_path, "w") as f: + f.write(json.dumps(eval_entry)) + + api.upload_file( + path_or_fileobj=out_path, + path_in_repo=out_path.split(f"{EVAL_REQUESTS_PATH}/")[1], + repo_id=QUEUE_REPO, + repo_type="dataset", + commit_message=f"Add {model_name} to eval queue", + ) + else: + click.echo("aborting...") + + +if __name__ == "__main__": + main() diff --git a/src/scripts/update_all_request_files.py b/src/scripts/update_all_request_files.py new file mode 100644 index 0000000000000000000000000000000000000000..9ed902a287973f8eae9e61d5054f6821227413ed --- /dev/null +++ b/src/scripts/update_all_request_files.py @@ -0,0 +1,128 @@ +from huggingface_hub import ModelFilter, snapshot_download +from huggingface_hub import ModelCard + +import json +import os +import time + +from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_tags +from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN + +def update_one_model(model_id, data, models_on_the_hub): + # Model no longer on the hub at all + if model_id not in models_on_the_hub: + data['still_on_hub'] = False + data['likes'] = 0 + data['downloads'] = 0 + data['created_at'] = "" + data["tags"] = [] + return data + + # Grabbing model parameters + model_cfg = models_on_the_hub[model_id] + data['likes'] = model_cfg.likes + data['downloads'] = model_cfg.downloads + data['created_at'] = str(model_cfg.created_at) + data['license'] = model_cfg.card_data.license if model_cfg.card_data is not None else "" + + # Grabbing model details + model_name = model_id + if model_cfg.card_data is not None and model_cfg.card_data.base_model is not None: + if isinstance(model_cfg.card_data.base_model, str): + model_name = model_cfg.card_data.base_model # for adapters, we look at the parent model + still_on_hub, _, _ = is_model_on_hub( + model_name=model_name, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False, token=H4_TOKEN + ) + # If the model doesn't have a model card or a license, we consider it's deleted + if still_on_hub: + try: + status, _, model_card = check_model_card(model_id) + if status is False: + still_on_hub = False + except Exception: + model_card = None + still_on_hub = False + data['still_on_hub'] = still_on_hub + + tags = get_model_tags(model_card, model_id) if still_on_hub else [] + + data["tags"] = tags + return data + +def update_models(file_path, models_on_the_hub): + """ + Search through all JSON files in the specified root folder and its subfolders, + and update the likes key in JSON dict from value of input dict + """ + seen_models = [] + with open(file_path, "r") as f: + model_infos = json.load(f) + for model_id in model_infos.keys(): + seen_models.append(model_id) + model_infos[model_id] = update_one_model( + model_id = model_id, + data=model_infos[model_id], + models_on_the_hub=models_on_the_hub + ) + + # If new requests files have been created since we started all this + # we grab them + all_models = [] + try: + for ix, (root, _, files) in enumerate(os.walk(EVAL_REQUESTS_PATH)): + if ix == 0: continue + for file in files: + if "eval_request" in file: + path = root.split("/")[-1] + "/" + file.split("_eval_request")[0] + all_models.append(path) + except Exception as e: + print(e) + pass + + for model_id in all_models: + if model_id not in seen_models: + model_infos[model_id] = update_one_model( + model_id = model_id, + data={}, + models_on_the_hub=models_on_the_hub + ) + + with open(file_path, 'w') as f: + json.dump(model_infos, f, indent=2) + +def update_dynamic_files(): + """ This will only update metadata for models already linked in the repo, not add missing ones. + """ + snapshot_download( + repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30 + ) + + print("UPDATE_DYNAMIC: Loaded snapshot") + # Get models + start = time.time() + + models = list(API.list_models( + #filter=ModelFilter(task="text-generation"), + full=False, + cardData=True, + fetch_config=True, + )) + id_to_model = {model.id : model for model in models} + + print(f"UPDATE_DYNAMIC: Downloaded list of models in {time.time() - start:.2f} seconds") + + start = time.time() + + update_models(DYNAMIC_INFO_FILE_PATH, id_to_model) + + print(f"UPDATE_DYNAMIC: updated in {time.time() - start:.2f} seconds") + + API.upload_file( + path_or_fileobj=DYNAMIC_INFO_FILE_PATH, + path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1], + repo_id=DYNAMIC_INFO_REPO, + repo_type="dataset", + commit_message=f"Daily request file update.", + ) + print(f"UPDATE_DYNAMIC: pushed to hub") + diff --git a/src/submission/check_validity.py b/src/submission/check_validity.py new file mode 100644 index 0000000000000000000000000000000000000000..115de5e50a09ffdb51f46e25ca1097775a265afc --- /dev/null +++ b/src/submission/check_validity.py @@ -0,0 +1,157 @@ +import json +import os +import re +from collections import defaultdict +from datetime import datetime, timedelta, timezone + +import huggingface_hub +from huggingface_hub import ModelCard +from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata +from transformers import AutoConfig, AutoTokenizer + +from src.envs import HAS_HIGHER_RATE_LIMIT + + +# ht to @Wauplin, thank you for the snippet! +# See https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/317 +def check_model_card(repo_id: str) -> tuple[bool, str]: + # Returns operation status, and error message + try: + card = ModelCard.load(repo_id) + except huggingface_hub.utils.EntryNotFoundError: + return False, "Please add a model card to your model to explain how you trained/fine-tuned it.", None + + # Enforce license metadata + if card.data.license is None: + if not ("license_name" in card.data and "license_link" in card.data): + return False, ( + "License not found. Please add a license to your model card using the `license` metadata or a" + " `license_name`/`license_link` pair." + ), None + + # Enforce card content + if len(card.text) < 200: + return False, "Please add a description to your model card, it is too short.", None + + return True, "", card + + +def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str, AutoConfig]: + try: + config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True) + if test_tokenizer: + try: + tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) + except ValueError as e: + return ( + False, + f"uses a tokenizer which is not in a transformers release: {e}", + None + ) + except Exception as e: + return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None) + return True, None, config + + except ValueError as e: + return ( + False, + "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.", + None + ) + + except Exception as e: + if "You are trying to access a gated repo." in str(e): + return True, "uses a gated model.", None + return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None + +def get_model_size(model_info: ModelInfo, precision: str): + size_pattern = re.compile(r"(\d+\.)?\d+(b|m)") + safetensors = None + try: + safetensors = get_safetensors_metadata(model_info.id) + except Exception as e: + print(e) + + if safetensors is not None: + model_size = round(sum(safetensors.parameter_count.values()) / 1e9, 3) + else: + try: + size_match = re.search(size_pattern, model_info.id.lower()) + model_size = size_match.group(0) + model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3) + except AttributeError as e: + return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py + + size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1 + model_size = size_factor * model_size + return model_size + +def get_model_arch(model_info: ModelInfo): + return model_info.config.get("architectures", "Unknown") + +def user_submission_permission(org_or_user, users_to_submission_dates, rate_limit_period, rate_limit_quota): + if org_or_user not in users_to_submission_dates: + return True, "" + submission_dates = sorted(users_to_submission_dates[org_or_user]) + + time_limit = (datetime.now(timezone.utc) - timedelta(days=rate_limit_period)).strftime("%Y-%m-%dT%H:%M:%SZ") + submissions_after_timelimit = [d for d in submission_dates if d > time_limit] + + num_models_submitted_in_period = len(submissions_after_timelimit) + if org_or_user in HAS_HIGHER_RATE_LIMIT: + rate_limit_quota = 2 * rate_limit_quota + + if num_models_submitted_in_period > rate_limit_quota: + error_msg = f"Organisation or user `{org_or_user}`" + error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard " + error_msg += f"in the last {rate_limit_period} days.\n" + error_msg += ( + "Please wait a couple of days before resubmitting, so that everybody can enjoy using the leaderboard 🤗" + ) + return False, error_msg + return True, "" + + +def already_submitted_models(requested_models_dir: str) -> set[str]: + depth = 1 + file_names = [] + users_to_submission_dates = defaultdict(list) + + for root, _, files in os.walk(requested_models_dir): + current_depth = root.count(os.sep) - requested_models_dir.count(os.sep) + if current_depth == depth: + for file in files: + if not file.endswith(".json"): + continue + with open(os.path.join(root, file), "r") as f: + info = json.load(f) + file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}") + + # Select organisation + if info["model"].count("/") == 0 or "submitted_time" not in info: + continue + organisation, _ = info["model"].split("/") + users_to_submission_dates[organisation].append(info["submitted_time"]) + + return set(file_names), users_to_submission_dates + +def get_model_tags(model_card, model: str): + is_merge_from_metadata = False + is_moe_from_metadata = False + + tags = [] + if model_card is None: + return tags + if model_card.data.tags: + is_merge_from_metadata = any([tag in model_card.data.tags for tag in ["merge", "moerge", "mergekit", "lazymergekit"]]) + is_moe_from_metadata = any([tag in model_card.data.tags for tag in ["moe", "moerge"]]) + + is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in ["merged model", "merge model", "moerge"]) + if is_merge_from_model_card or is_merge_from_metadata: + tags.append("merge") + is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"]) + is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-") + if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata: + tags.append("moe") + + return tags diff --git a/src/submission/submit.py b/src/submission/submit.py new file mode 100644 index 0000000000000000000000000000000000000000..667d864cb9d7aa0fe209e36f95631ef3ac11c5c3 --- /dev/null +++ b/src/submission/submit.py @@ -0,0 +1,182 @@ +import json +import os +from datetime import datetime, timezone + +from huggingface_hub import ModelCard, snapshot_download + +from src.display.formatting import styled_error, styled_message, styled_warning +from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_REPO, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA +from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS +from src.submission.check_validity import ( + already_submitted_models, + check_model_card, + get_model_size, + is_model_on_hub, + user_submission_permission, + get_model_tags +) + +REQUESTED_MODELS = None +USERS_TO_SUBMISSION_DATES = None + +def add_new_eval( + model: str, + base_model: str, + revision: str, + precision: str, + private: bool, + weight_type: str, + model_type: str, +): + global REQUESTED_MODELS + global USERS_TO_SUBMISSION_DATES + if not REQUESTED_MODELS: + REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH) + + user_name = "" + model_path = model + if "/" in model: + user_name = model.split("/")[0] + model_path = model.split("/")[1] + + precision = precision.split(" ")[0] + current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + if model_type is None or model_type == "": + return styled_error("Please select a model type.") + + # Is the user rate limited? + if user_name != "": + user_can_submit, error_msg = user_submission_permission( + user_name, USERS_TO_SUBMISSION_DATES, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA + ) + if not user_can_submit: + return styled_error(error_msg) + + # Did the model authors forbid its submission to the leaderboard? + if model in DO_NOT_SUBMIT_MODELS or base_model in DO_NOT_SUBMIT_MODELS: + return styled_warning("Model authors have requested that their model be not submitted on the leaderboard.") + + # Does the model actually exist? + if revision == "": + revision = "main" + + # Is the model on the hub? + if weight_type in ["Delta", "Adapter"]: + base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=H4_TOKEN, test_tokenizer=True) + if not base_model_on_hub: + return styled_error(f'Base model "{base_model}" {error}') + + architecture = "?" + downloads = 0 + created_at = "" + if not weight_type == "Adapter": + model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True) + if not model_on_hub or model_config is None: + return styled_error(f'Model "{model}" {error}') + if model_config is not None: + architectures = getattr(model_config, "architectures", None) + if architectures: + architecture = ";".join(architectures) + downloads = getattr(model_config, 'downloads', 0) + created_at = getattr(model_config, 'created_at', '') + + + + # Is the model info correctly filled? + try: + model_info = API.model_info(repo_id=model, revision=revision) + except Exception: + return styled_error("Could not get your model information. Please fill it up properly.") + + model_size = get_model_size(model_info=model_info, precision=precision) + + # Were the model card and license filled? + try: + license = model_info.cardData["license"] + except Exception: + return styled_error("Please select a license for your model") + + modelcard_OK, error_msg, model_card = check_model_card(model) + if not modelcard_OK: + return styled_error(error_msg) + + tags = get_model_tags(model_card, model) + + # Seems good, creating the eval + print("Adding new eval") + + eval_entry = { + "model": model, + "base_model": base_model, + "revision": revision, + "private": private, + "precision": precision, + "params": model_size, + "architectures": architecture, + "weight_type": weight_type, + "status": "PENDING", + "submitted_time": current_time, + "model_type": model_type, + "job_id": -1, + "job_start_time": None, + } + + supplementary_info = { + "likes": model_info.likes, + "license": license, + "still_on_hub": True, + "tags": tags, + "downloads": downloads, + "created_at": created_at + } + + # Check for duplicate submission + if f"{model}_{revision}_{precision}" in REQUESTED_MODELS: + return styled_warning("This model has been already submitted.") + + print("Creating eval file") + OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" + os.makedirs(OUT_DIR, exist_ok=True) + out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json" + + with open(out_path, "w") as f: + f.write(json.dumps(eval_entry)) + + print("Uploading eval file") + API.upload_file( + path_or_fileobj=out_path, + path_in_repo=out_path.split("eval-queue/")[1], + repo_id=QUEUE_REPO, + repo_type="dataset", + commit_message=f"Add {model} to eval queue", + ) + + # We want to grab the latest version of the submission file to not accidentally overwrite it + snapshot_download( + repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30 + ) + + with open(DYNAMIC_INFO_FILE_PATH) as f: + all_supplementary_info = json.load(f) + + all_supplementary_info[model] = supplementary_info + with open(DYNAMIC_INFO_FILE_PATH, "w") as f: + json.dump(all_supplementary_info, f, indent=2) + + API.upload_file( + path_or_fileobj=DYNAMIC_INFO_FILE_PATH, + path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1], + repo_id=DYNAMIC_INFO_REPO, + repo_type="dataset", + commit_message=f"Add {model} to dynamic info queue", + ) + + + + # Remove the local file + os.remove(out_path) + + return styled_message( + "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list." + ) diff --git a/src/tools/collections.py b/src/tools/collections.py new file mode 100644 index 0000000000000000000000000000000000000000..fd8f218cd95e0cc9e5e0aca9a584d057f05f22e5 --- /dev/null +++ b/src/tools/collections.py @@ -0,0 +1,83 @@ +import os + +import pandas as pd +from huggingface_hub import add_collection_item, delete_collection_item, get_collection, update_collection_item +from huggingface_hub.utils._errors import HfHubHTTPError +from pandas import DataFrame + +from src.display.utils import AutoEvalColumn, ModelType +from src.envs import H4_TOKEN, PATH_TO_COLLECTION + +# Specific intervals for the collections +intervals = { + "1B": pd.Interval(0, 1.5, closed="right"), + "3B": pd.Interval(2.5, 3.5, closed="neither"), + "7B": pd.Interval(6, 8, closed="neither"), + "13B": pd.Interval(10, 14, closed="neither"), + "30B": pd.Interval(25, 35, closed="neither"), + "65B": pd.Interval(60, 70, closed="neither"), +} + + +def update_collections(df: DataFrame): + """This function updates the Open LLM Leaderboard model collection with the latest best models for + each size category and type. + """ + collection = get_collection(collection_slug=PATH_TO_COLLECTION, token=H4_TOKEN) + params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce") + + cur_best_models = [] + + ix = 0 + for type in ModelType: + if type.value.name == "": + continue + for size in intervals: + # We filter the df to gather the relevant models + type_emoji = [t[0] for t in type.value.symbol] + filtered_df = df[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)] + + numeric_interval = pd.IntervalIndex([intervals[size]]) + mask = params_column.apply(lambda x: any(numeric_interval.contains(x))) + filtered_df = filtered_df.loc[mask] + + best_models = list( + filtered_df.sort_values(AutoEvalColumn.average.name, ascending=False)[AutoEvalColumn.dummy.name] + ) + print(type.value.symbol, size, best_models[:10]) + + # We add them one by one to the leaderboard + for model in best_models: + ix += 1 + cur_len_collection = len(collection.items) + try: + collection = add_collection_item( + PATH_TO_COLLECTION, + item_id=model, + item_type="model", + exists_ok=True, + note=f"Best {type.to_str(' ')} model of around {size} on the leaderboard today!", + token=H4_TOKEN, + ) + if ( + len(collection.items) > cur_len_collection + ): # we added an item - we make sure its position is correct + item_object_id = collection.items[-1].item_object_id + update_collection_item( + collection_slug=PATH_TO_COLLECTION, item_object_id=item_object_id, position=ix + ) + cur_len_collection = len(collection.items) + cur_best_models.append(model) + break + except HfHubHTTPError: + continue + + collection = get_collection(PATH_TO_COLLECTION, token=H4_TOKEN) + for item in collection.items: + if item.item_id not in cur_best_models: + try: + delete_collection_item( + collection_slug=PATH_TO_COLLECTION, item_object_id=item.item_object_id, token=H4_TOKEN + ) + except HfHubHTTPError: + continue diff --git a/src/tools/model_backlinks.py b/src/tools/model_backlinks.py new file mode 100644 index 0000000000000000000000000000000000000000..e1601174d8eae6052c65575d3b4c268f09a80208 --- /dev/null +++ b/src/tools/model_backlinks.py @@ -0,0 +1,1309 @@ +models = [ + "uni-tianyan/Uni-TianYan", + "fangloveskari/ORCA_LLaMA_70B_QLoRA", + "garage-bAInd/Platypus2-70B-instruct", + "upstage/Llama-2-70b-instruct-v2", + "fangloveskari/Platypus_QLoRA_LLaMA_70b", + "yeontaek/llama-2-70B-ensemble-v5", + "TheBloke/Genz-70b-GPTQ", + "TheBloke/Platypus2-70B-Instruct-GPTQ", + "psmathur/model_007", + "yeontaek/llama-2-70B-ensemble-v4", + "psmathur/orca_mini_v3_70b", + "ehartford/Samantha-1.11-70b", + "MayaPH/GodziLLa2-70B", + "psmathur/model_007_v2", + "chargoddard/MelangeA-70b", + "ehartford/Samantha-1.1-70b", + "psmathur/model_009", + "upstage/Llama-2-70b-instruct", + "yeontaek/llama-2-70B-ensemble-v7", + "yeontaek/llama-2-70B-ensemble-v6", + "chargoddard/MelangeB-70b", + "yeontaek/llama-2-70B-ensemble-v3", + "chargoddard/MelangeC-70b", + "garage-bAInd/Camel-Platypus2-70B", + "yeontaek/llama-2-70B-ensemble-v2", + "garage-bAInd/Camel-Platypus2-70B", + "migtissera/Synthia-70B-v1.2", + "v2ray/LLaMA-2-Wizard-70B-QLoRA", + "quantumaikr/llama-2-70b-fb16-orca-chat-10k", + "v2ray/LLaMA-2-Wizard-70B-QLoRA", + "stabilityai/StableBeluga2", + "quantumaikr/llama-2-70b-fb16-guanaco-1k", + "garage-bAInd/Camel-Platypus2-70B", + "migtissera/Synthia-70B-v1.1", + "migtissera/Synthia-70B", + "psmathur/model_101", + "augtoma/qCammel70", + "augtoma/qCammel-70", + "augtoma/qCammel-70v1", + "augtoma/qCammel-70x", + "augtoma/qCammel-70-x", + "jondurbin/airoboros-l2-70b-gpt4-1.4.1", + "dfurman/llama-2-70b-dolphin-peft", + "jondurbin/airoboros-l2-70b-2.1", + "TheBloke/llama-2-70b-Guanaco-QLoRA-fp16", + "quantumaikr/QuantumLM-llama2-70B-Korean-LoRA", + "quantumaikr/quantumairk-llama-2-70B-instruct", + "psmathur/model_420", + "psmathur/model_51", + "garage-bAInd/Camel-Platypus2-70B", + "TheBloke/Airoboros-L2-70B-2.1-GPTQ", + "OpenAssistant/llama2-70b-oasst-sft-v10", + "garage-bAInd/Platypus2-70B", + "liuxiang886/llama2-70B-qlora-gpt4", + "upstage/llama-65b-instruct", + "quantumaikr/llama-2-70b-fb16-korean", + "NousResearch/Nous-Hermes-Llama2-70b", + "v2ray/LLaMA-2-Jannie-70B-QLoRA", + "jondurbin/airoboros-l2-70b-gpt4-m2.0", + "jondurbin/airoboros-l2-70b-gpt4-m2.0", + "OpenAssistant/llama2-70b-oasst-sft-v10", + "yeontaek/llama-2-70B-ensemble-v8", + "jondurbin/airoboros-l2-70b-gpt4-2.0", + "jarradh/llama2_70b_chat_uncensored", + "WizardLM/WizardMath-70B-V1.0", + "jordiclive/Llama-2-70b-oasst-1-200", + "WizardLM/WizardMath-70B-V1.0", + "jondurbin/airoboros-l2-70b-gpt4-2.0", + "OpenLemur/lemur-70b-chat-v1", + "tiiuae/falcon-180B", + "tiiuae/falcon-180B", + "stabilityai/StableBeluga1-Delta", + "psmathur/model_42_70b", + "psmathur/test_42_70b", + "TheBloke/fiction.live-Kimiko-V2-70B-fp16", + "tiiuae/falcon-180B", + "WizardLM/WizardMath-70B-V1.0", + "tiiuae/falcon-180B-chat", + "jondurbin/airoboros-l2-70b-gpt4-2.0", + "ehartford/samantha-1.1-llama-33b", + "ajibawa-2023/scarlett-33b", + "ddobokki/Llama-2-70b-orca-200k", + "TheBloke/gpt4-alpaca-lora_mlp-65B-HF", + "tiiuae/falcon-180B-chat", + "tiiuae/falcon-180B-chat", + "tiiuae/falcon-180B", + "TheBloke/Lemur-70B-Chat-v1-GPTQ", + "NousResearch/Nous-Puffin-70B", + "WizardLM/WizardLM-70B-V1.0", + "WizardLM/WizardMath-70B-V1.0", + "meta-llama/Llama-2-70b-hf", + "TheBloke/Llama-2-70B-fp16", + "Weyaxi/llama-2-alpacagpt4-1000step", + "WizardLM/WizardLM-70B-V1.0", + "simsim314/WizardLM-70B-V1.0-HF", + "simsim314/WizardLM-70B-V1.0-HF", + "WizardLM/WizardLM-70B-V1.0", + "openbmb/UltraLM-65b", + "psmathur/model_420_preview", + "WizardLM/WizardLM-70B-V1.0", + "simsim314/WizardLM-70B-V1.0-HF", + "OpenBuddy/openbuddy-llama2-70b-v10.1-bf16", + "upstage/llama-30b-instruct-2048", + "jondurbin/airoboros-65b-gpt4-1.2", + "TheBloke/guanaco-65B-HF", + "jondurbin/airoboros-65b-gpt4-1.3", + "meta-llama/Llama-2-70b-chat-hf", + "ValiantLabs/ShiningValiant", + "Faradaylab/Aria-70B", + "lilloukas/GPlatty-30B", + "TheBloke/VicUnlocked-alpaca-65B-QLoRA-fp16", + "jondurbin/airoboros-65b-gpt4-1.4-peft", + "jondurbin/airoboros-65b-gpt4-1.4", + "jondurbin/airoboros-65b-gpt4-2.0", + "TheBloke/WizardLM-70B-V1.0-GPTQ", + "TheBloke/WizardLM-70B-V1.0-GPTQ", + "ariellee/SuperPlatty-30B", + "jondurbin/airoboros-65b-gpt4-1.4", + "jondurbin/airoboros-65b-gpt4-2.0", + "yeontaek/llama-2-70b-IA3-guanaco", + "CalderaAI/30B-Lazarus", + "Aspik101/trurl-2-13b-pl-instruct_unload", + "ehartford/WizardLM-33B-V1.0-Uncensored", + "ehartford/WizardLM-33B-V1.0-Uncensored", + "OpenBuddy/openbuddy-llama-65b-v8-bf16", + "Aspik101/llama-30b-instruct-2048-PL-lora", + "h2oai/h2ogpt-research-oasst1-llama-65b", + "Aspik101/llama-30b-instruct-2048-PL-lora", + "CalderaAI/30B-Epsilon", + "Aspik101/llama-30b-2048-instruct-PL-lora_unload", + "jondurbin/airoboros-65b-gpt4-m2.0", + "jondurbin/airoboros-65b-gpt4-m2.0", + "Aeala/Alpaca-elina-65b", + "TheBloke/robin-65b-v2-fp16", + "TheBloke/gpt4-alpaca-lora-30b-HF", + "TheBloke/Llama-2-70B-chat-GPTQ", + "upstage/llama-30b-instruct", + "OpenLemur/lemur-70b-v1", + "lmsys/vicuna-33b-v1.3", + "ausboss/llama-30b-supercot", + "ai-business/Luban-13B", + "Henk717/airochronos-33B", + "lmsys/vicuna-33b-v1.3", + "Henk717/airochronos-33B", + "bavest/fin-llama-33b-merged", + "jondurbin/airoboros-33b-gpt4-1.4", + "YeungNLP/firefly-llama-30b", + "Aspik101/30B-Lazarus-instruct-PL-lora_unload", + "uukuguy/speechless-llama2-luban-orca-platypus-13b", + "xxyyy123/test_merge_p_ov1_w0.66_w0.5_n1", + "jondurbin/airoboros-33b-gpt4-1.2", + "TheBloke/alpaca-lora-65B-HF", + "bofenghuang/vigogne-33b-instruct", + "yeontaek/llama-2-13B-ensemble-v5", + "garage-bAInd/Platypus-30B", + "Open-Orca/OpenOrca-Platypus2-13B", + "kajdun/viwaai-30b_v4", + "lilloukas/Platypus-30B", + "Open-Orca/OpenOrca-Platypus2-13B", + "Henk717/chronoboros-33B", + "jondurbin/airoboros-33b-2.1", + "HiTZ/alpaca-lora-65b-en-pt-es-ca", + "quantumaikr/QuantumLM-70B-hf", + "uukuguy/speechless-llama2-13b", + "uukuguy/speechless-llama2-hermes-orca-platypus-13b", + "openaccess-ai-collective/manticore-30b-chat-pyg-alpha", + "LLMs/WizardLM-30B-V1.0", + "TheBloke/WizardLM-30B-fp16", + "openaccess-ai-collective/hippogriff-30b-chat", + "concedo/Vicuzard-30B-Uncensored", + "TFLai/OpenOrca-Platypus2-13B-QLoRA-0.80-epoch", + "huggingface/llama-65b", + "huggyllama/llama-65b", + "gaodrew/gaodrew-llama-30b-instruct-2048-Open-Platypus-100steps", + "uukuguy/speechless-llama2-hermes-orca-platypus-wizardlm-13b", + "Sao10K/Mythical-Destroyer-V2-L2-13B", + "camel-ai/CAMEL-33B-Combined-Data", + "dsvv-cair/alpaca-cleaned-llama-30b-bf16", + "MetaIX/GPT4-X-Alpasta-30b", + "garage-bAInd/Stable-Platypus2-13B", + "TFLai/Luban-Platypus2-13B-QLora-0.80-epoch", + "TheBloke/OpenOrca-Platypus2-13B-GPTQ", + "IkariDev/Athena-tmp", + "OpenBuddyEA/openbuddy-llama-30b-v7.1-bf16", + "OpenBuddyEA/openbuddy-llama-30b-v7.1-bf16", + "Open-Orca/OpenOrcaxOpenChat-Preview2-13B", + "psmathur/model_007_13b_v2", + "Aspik101/Vicuzard-30B-Uncensored-instruct-PL-lora_unload", + "jondurbin/airoboros-33b-gpt4-m2.0", + "Sao10K/Mythical-Destroyer-L2-13B", + "TheBloke/Wizard-Vicuna-30B-Uncensored-fp16", + "ehartford/Wizard-Vicuna-30B-Uncensored", + "TFLai/Nova-13B", + "TheBloke/robin-33B-v2-fp16", + "totally-not-an-llm/PuddleJumper-13b", + "Aeala/VicUnlocked-alpaca-30b", + "Yhyu13/oasst-rlhf-2-llama-30b-7k-steps-hf", + "jondurbin/airoboros-33b-gpt4", + "jondurbin/airoboros-33b-gpt4-m2.0", + "tiiuae/falcon-40b-instruct", + "psmathur/orca_mini_v3_13b", + "Aeala/GPT4-x-AlpacaDente-30b", + "MayaPH/GodziLLa-30B", + "jondurbin/airoboros-33b-gpt4-m2.0", + "TFLai/SpeechlessV1-Nova-13B", + "yeontaek/llama-2-13B-ensemble-v4", + "ajibawa-2023/carl-33b", + "jondurbin/airoboros-33b-gpt4-2.0", + "TFLai/Stable-Platypus2-13B-QLoRA-0.80-epoch", + "jondurbin/airoboros-33b-gpt4-1.3", + "TehVenom/oasst-sft-6-llama-33b-xor-MERGED-16bit", + "TFLai/OrcaMini-Platypus2-13B-QLoRA-0.80-epoch", + "jondurbin/airoboros-33b-gpt4-2.0", + "chargoddard/Chronorctypus-Limarobormes-13b", + "jondurbin/airoboros-33b-gpt4-1.3", + "Open-Orca/OpenOrca-Platypus2-13B", + "FelixChao/vicuna-33b-coder", + "FelixChao/vicuna-33b-coder", + "Gryphe/MythoMix-L2-13b", + "Aeala/Enterredaas-33b", + "yeontaek/llama-2-13B-ensemble-v1", + "TFLai/OpenOrcaPlatypus2-Platypus2-13B-QLora-0.80-epoch", + "TFLai/Ensemble5-Platypus2-13B-QLora-0.80-epoch", + "yeontaek/llama-2-13B-ensemble-v3", + "TFLai/MythoMix-Platypus2-13B-QLoRA-0.80-epoch", + "yihan6324/llama2-13b-instructmining-40k-sharegpt", + "timdettmers/guanaco-33b-merged", + "TFLai/EnsembleV5-Nova-13B", + "circulus/Llama-2-13b-orca-v1", + "Undi95/ReMM-SLERP-L2-13B", + "Gryphe/MythoMax-L2-13b", + "stabilityai/StableBeluga-13B", + "circulus/Llama-2-13b-orca-v1", + "ehartford/WizardLM-30B-Uncensored", + "The-Face-Of-Goonery/huginnv1.2", + "TheBloke/OpenOrcaxOpenChat-Preview2-13B-GPTQ", + "Sao10K/Stheno-L2-13B", + "bofenghuang/vigogne-2-13b-instruct", + "The-Face-Of-Goonery/Huginn-13b-FP16", + "grimpep/L2-MythoMax22b-instruct-Falseblock", + "TFLai/Nous-Hermes-Platypus2-13B-QLoRA-0.80-epoch", + "yeontaek/Platypus2xOpenOrca-13B-IA3-v4", + "yeontaek/Platypus2xOpenOrca-13B-IA3", + "yeontaek/Platypus2xOpenOrca-13B-IA3-ensemble", + "Open-Orca/LlongOrca-13B-16k", + "Sao10K/Stheno-Inverted-L2-13B", + "garage-bAInd/Camel-Platypus2-13B", + "digitous/Alpacino30b", + "NousResearch/Nous-Hermes-Llama2-13b", + "yeontaek/Platypus2xOpenOrca-13B-IA3-v3", + "TFLai/MythicalDestroyerV2-Platypus2-13B-QLora-0.80-epoch", + "TheBloke/VicUnlocked-30B-LoRA-HF", + "Undi95/Nous-Hermes-13B-Code", + "The-Face-Of-Goonery/Chronos-Beluga-v2-13bfp16", + "NousResearch/Nous-Hermes-Llama2-13b", + "Monero/WizardLM-Uncensored-SuperCOT-StoryTelling-30b", + "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ", + "Open-Orca/OpenOrcaxOpenChat-Preview2-13B", + "Austism/chronos-hermes-13b-v2", + "yeontaek/Platypus2xOpenOrca-13B-IA3-v2.1", + "yeontaek/Platypus2xOpenOrca-13B-IA3-v2", + "Gryphe/MythoLogic-L2-13b", + "augtoma/qCammel-13", + "YeungNLP/firefly-llama2-13b-v1.2", + "Aspik101/StableBeluga-13B-instruct-PL-lora_unload", + "andreaskoepf/llama2-13b-megacode2_min100", + "rombodawg/LosslessMegaCoder-llama2-13b-mini", + "yulan-team/YuLan-Chat-2-13b-fp16", + "elinas/chronos-33b", + "YeungNLP/firefly-llama2-13b", + "Sao10K/Medusa-13b", + "OptimalScale/robin-65b-v2-delta", + "minlik/chinese-alpaca-33b-merged", + "OpenAssistant/llama2-13b-megacode2-oasst", + "TheBloke/OpenAssistant-SFT-7-Llama-30B-HF", + "Undi95/UndiMix-v1-13b", + "ehartford/Samantha-1.11-13b", + "beaugogh/Llama2-13b-sharegpt4", + "Aeala/GPT4-x-AlpacaDente2-30b", + "luffycodes/nash-vicuna-13b-v1dot5-ep2-w-rag-w-simple", + "WizardLM/WizardLM-13B-V1.1", + "uukuguy/speechless-orca-platypus-coig-lite-2k-0.6e-13b", + "huggyllama/llama-30b", + "Undi95/ReMM-L2-13B-PIPPA", + "Undi95/ReMM-L2-13B", + "gaodrew/gaodrew-gorgonzola-13b", + "lmsys/vicuna-13b-v1.5", + "yeontaek/Platypus2xOpenOrca-13B-LoRa", + "Yhyu13/llama-30B-hf-openassitant", + "huggingface/llama-30b", + "lmsys/vicuna-13b-v1.5", + "TFLai/Athena-Platypus2-13B-QLora-0.80-epoch", + "TheBloke/dromedary-65b-lora-HF", + "yeontaek/llama-2-13b-Beluga-QLoRA", + "The-Face-Of-Goonery/Huginn-13b-V4", + "The-Face-Of-Goonery/Huginn-13b-v4.5", + "The-Face-Of-Goonery/Huginn-v3-13b", + "tiiuae/falcon-40b", + "WhoTookMyAmogusNickname/NewHope_HF_not_official", + "gaodrew/OpenOrca-Platypus2-13B-thera-1250", + "SLAM-group/NewHope", + "garage-bAInd/Platypus2-13B", + "migtissera/Synthia-13B", + "elinas/chronos-13b-v2", + "mosaicml/mpt-30b-chat", + "CHIH-HUNG/llama-2-13b-OpenOrca_5w", + "uukuguy/speechless-hermes-coig-lite-13b", + "TheBloke/tulu-30B-fp16", + "uukuguy/speechless-hermes-coig-lite-13b", + "xDAN-AI/xDAN_13b_l2_lora", + "lmsys/vicuna-13b-v1.5-16k", + "openchat/openchat_v3.1", + "CHIH-HUNG/llama-2-13b-dolphin_5w", + "Aspik101/vicuna-13b-v1.5-PL-lora_unload", + "Undi95/MLewd-L2-13B", + "ehartford/minotaur-llama2-13b-qlora", + "kajdun/iubaris-13b-v3", + "TFLai/Limarp-Platypus2-13B-QLoRA-0.80-epoch", + "openchat/openchat_v3.1", + "uukuguy/speechless-orca-platypus-coig-lite-4k-0.6e-13b", + "ziqingyang/chinese-alpaca-2-13b", + "TFLai/Airboros2.1-Platypus2-13B-QLora-0.80-epoch", + "yeontaek/llama-2-13b-Guanaco-QLoRA", + "lmsys/vicuna-13b-v1.5-16k", + "ehartford/based-30b", + "kingbri/airolima-chronos-grad-l2-13B", + "openchat/openchat_v3.2", + "uukuguy/speechless-orca-platypus-coig-lite-4k-0.5e-13b", + "yeontaek/Platypus2-13B-LoRa", + "kingbri/chronolima-airo-grad-l2-13B", + "openchat/openchat_v3.2", + "TFLai/PuddleJumper-Platypus2-13B-QLoRA-0.80-epoch", + "shareAI/llama2-13b-Chinese-chat", + "ehartford/WizardLM-1.0-Uncensored-Llama2-13b", + "Aspik101/Redmond-Puffin-13B-instruct-PL-lora_unload", + "yeontaek/llama-2-13B-ensemble-v6", + "WizardLM/WizardLM-13B-V1.2", + "TheBloke/WizardLM-13B-V1.1-GPTQ", + "bhenrym14/airophin-13b-pntk-16k-fp16", + "ehartford/WizardLM-1.0-Uncensored-Llama2-13b", + "Mikael110/llama-2-13b-guanaco-fp16", + "yeontaek/airoboros-2.1-llama-2-13B-QLoRa", + "CalderaAI/13B-Legerdemain-L2", + "grimpep/llama2-22b-wizard_vicuna", + "grimpep/llama2-22B-GPLATTY", + "bhenrym14/airophin-13b-pntk-16k-fp16", + "yeontaek/llama-2-13b-QLoRA", + "OpenAssistant/llama2-13b-orca-8k-3319", + "TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-fp16", + "duliadotio/dulia-13b-8k-alpha", + "Undi95/LewdEngine", + "OpenBuddy/openbuddy-llama2-13b-v8.1-fp16", + "CHIH-HUNG/llama-2-13b-open_orca_20w", + "bhenrym14/airoboros-33b-gpt4-1.4.1-lxctx-PI-16384-fp16", + "FlagAlpha/Llama2-Chinese-13b-Chat", + "LLMs/WizardLM-13B-V1.0", + "chansung/gpt4-alpaca-lora-13b-decapoda-1024", + "TheBloke/wizardLM-13B-1.0-fp16", + "digitous/13B-Chimera", + "yeontaek/Platypus2xOpenOrcaxGuanaco-13B-LoRa", + "jondurbin/airoboros-l2-13b-2.1", + "Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b", + "TheBloke/UltraLM-13B-fp16", + "openaccess-ai-collective/minotaur-13b-fixed", + "NousResearch/Redmond-Puffin-13B", + "KoboldAI/LLaMA2-13B-Holomax", + "Lajonbot/WizardLM-13B-V1.2-PL-lora_unload", + "yeontaek/Platypus2-13B-LoRa-v2", + "TheBloke/airoboros-13B-HF", + "jondurbin/airoboros-13b", + "jjaaaww/posi_13b", + "CoolWP/llama-2-13b-guanaco-fp16", + "yeontaek/Platypus2-13B-QLoRa", + "h2oai/h2ogpt-research-oig-oasst1-512-30b", + "dfurman/llama-2-13b-guanaco-peft", + "NousResearch/Redmond-Puffin-13B", + "pe-nlp/llama-2-13b-platypus-vicuna-wizard", + "CHIH-HUNG/llama-2-13b-dolphin_20w", + "NousResearch/Nous-Hermes-13b", + "NobodyExistsOnTheInternet/GiftedConvo13bLoraNoEconsE4", + "ehartford/Wizard-Vicuna-13B-Uncensored", + "TheBloke/Wizard-Vicuna-13B-Uncensored-HF", + "openchat/openchat_v3.2_super", + "bhenrym14/airophin-v2-13b-PI-8k-fp16", + "openaccess-ai-collective/manticore-13b", + "The-Face-Of-Goonery/Huginn-22b-Prototype", + "jphme/Llama-2-13b-chat-german", + "grimpep/llama2-28B-Airo03", + "TheBloke/Kimiko-v2-13B-fp16", + "FPHam/Free_Sydney_13b_HF", + "lmsys/vicuna-13b-v1.3", + "FelixChao/llama2-13b-math1.1", + "CalderaAI/13B-BlueMethod", + "meta-llama/Llama-2-13b-chat-hf", + "deepse/CodeUp-Llama-2-13b-chat-hf", + "WizardLM/WizardMath-13B-V1.0", + "WizardLM/WizardMath-13B-V1.0", + "HyperbeeAI/Tulpar-7b-v0", + "xxyyy123/test_qkvo_adptor", + "xxyyy123/mc_data_30k_from_platpus_orca_7b_10k_v1_lora_qkvo_rank14_v2", + "openchat/openchat_v2_w", + "FelixChao/llama2-13b-math1.1", + "psmathur/orca_mini_v3_7b", + "TehVenom/Metharme-13b-Merged", + "xxyyy123/10k_v1_lora_qkvo_rank14_v3", + "OpenAssistant/llama2-13b-orca-v2-8k-3166", + "openaccess-ai-collective/wizard-mega-13b", + "jondurbin/airoboros-13b-gpt4-1.4", + "jondurbin/airoboros-13b-gpt4-1.4-fp16", + "Monero/Manticore-13b-Chat-Pyg-Guanaco", + "FelixChao/llama2-13b-math1.2", + "chargoddard/platypus-2-22b-relora", + "FelixChao/llama2-13b-math1.2", + "Gryphe/MythoBoros-13b", + "CalderaAI/13B-Ouroboros", + "OpenAssistant/llama2-13b-orca-v2-8k-3166", + "heegyu/LIMA2-13b-hf", + "digitous/13B-HyperMantis", + "Gryphe/MythoLogic-13b", + "TheBloke/Airoboros-L2-13B-2.1-GPTQ", + "chargoddard/platypus2-22b-relora", + "openchat/openchat_v2", + "yeontaek/Platypus2-13B-IA3", + "stabilityai/StableBeluga-7B", + "circulus/Llama-2-7b-orca-v1", + "budecosystem/genz-13b-v2", + "TheBloke/gpt4-x-vicuna-13B-HF", + "NobodyExistsOnTheInternet/GiftedConvo13bLoraNoEcons", + "zarakiquemparte/zarafusionex-1.1-l2-7b", + "Lajonbot/tableBeluga-7B-instruct-pl-lora_unload", + "jondurbin/airoboros-13b-gpt4", + "gaodrew/gaodrew-gorgonzola-13b", + "jondurbin/airoboros-13b-gpt4-1.1", + "TheBloke/gpt4-alpaca-lora-13B-HF", + "zarakiquemparte/zarablendex-vq-l2-7b", + "openaccess-ai-collective/manticore-13b-chat-pyg", + "Lajonbot/Llama-2-13b-hf-instruct-pl-lora_unload", + "NobodyExistsOnTheInternet/PuffedLIMA13bQLORA", + "xxyyy123/10k_v1_lora_qkvo_rank28_v2", + "jondurbin/airoboros-l2-13b-gpt4-1.4.1", + "dhmeltzer/Llama-2-13b-hf-eli5-wiki-1024_r_64_alpha_16", + "NobodyExistsOnTheInternet/PuffedConvo13bLoraE4", + "yihan6324/llama2-7b-instructmining-40k-sharegpt", + "CHIH-HUNG/llama-2-13b-Open_Platypus_and_ccp_2.6w", + "Aeala/GPT4-x-Alpasta-13b", + "psmathur/orca_mini_v2_13b", + "YeungNLP/firefly-llama-13b", + "psmathur/orca_mini_v2_13b", + "zarakiquemparte/zarafusionix-l2-7b", + "yihan6324/llama2-7b-instructmining-60k-sharegpt", + "yihan6324/llama-2-7b-instructmining-60k-sharegpt", + "layoric/llama-2-13b-code-alpaca", + "bofenghuang/vigogne-13b-instruct", + "Lajonbot/vicuna-13b-v1.3-PL-lora_unload", + "lvkaokao/llama2-7b-hf-chat-lora-v3", + "ehartford/dolphin-llama-13b", + "YeungNLP/firefly-llama-13b-v1.2", + "TheBloke/Kimiko-13B-fp16", + "kevinpro/Vicuna-13B-CoT", + "eachadea/vicuna-13b-1.1", + "pillowtalks-ai/delta13b", + "TheBloke/vicuna-13B-1.1-HF", + "TheBloke/Vicuna-13B-CoT-fp16", + "lmsys/vicuna-13b-delta-v1.1", + "lmsys/vicuna-13b-v1.1", + "xxyyy123/20k_v1_lora_qkvo_rank14_v2", + "TheBloke/guanaco-13B-HF", + "TheBloke/vicuna-13b-v1.3.0-GPTQ", + "edor/Stable-Platypus2-mini-7B", + "totally-not-an-llm/EverythingLM-13b-V2-16k", + "zarakiquemparte/zaraxe-l2-7b", + "beaugogh/Llama2-7b-openorca-mc-v2", + "TheBloke/Nous-Hermes-13B-SuperHOT-8K-fp16", + "quantumaikr/QuantumLM", + "jondurbin/airoboros-13b-gpt4-1.2", + "TheBloke/robin-13B-v2-fp16", + "TFLai/llama-2-13b-4bit-alpaca-gpt4", + "yihan6324/llama2-7b-instructmining-orca-40k", + "dvruette/oasst-llama-13b-2-epochs", + "Open-Orca/LlongOrca-7B-16k", + "Aspik101/Nous-Hermes-13b-pl-lora_unload", + "ehartford/Samantha-1.11-CodeLlama-34b", + "nkpz/llama2-22b-chat-wizard-uncensored", + "bofenghuang/vigogne-13b-chat", + "beaugogh/Llama2-7b-openorca-mc-v1", + "OptimalScale/robin-13b-v2-delta", + "pe-nlp/llama-2-13b-vicuna-wizard", + "chargoddard/llama2-22b", + "gywy/llama2-13b-chinese-v1", + "frank098/Wizard-Vicuna-13B-juniper", + "IGeniusDev/llama13B-quant8-testv1-openorca-customdataset", + "CHIH-HUNG/llama-2-13b-huangyt_Fintune_1_17w-gate_up_down_proj", + "eachadea/vicuna-13b", + "yihan6324/llama2-7b-instructmining-orca-90k", + "chargoddard/llama2-22b-blocktriangular", + "luffycodes/mcq-vicuna-13b-v1.5", + "Yhyu13/chimera-inst-chat-13b-hf", + "luffycodes/mcq-vicuna-13b-v1.5", + "chargoddard/ypotryll-22b-epoch2-qlora", + "totally-not-an-llm/EverythingLM-13b-16k", + "luffycodes/mcq-hal-vicuna-13b-v1.5", + "openaccess-ai-collective/minotaur-13b", + "IGeniusDev/llama13B-quant8-testv1-openorca-customdataset", + "chargoddard/llama2-22b-blocktriangular", + "TFLai/Platypus2-13B-QLoRA-0.80-epoch", + "meta-llama/Llama-2-13b-hf", + "CHIH-HUNG/llama-2-13b-huangyt_FINETUNE2_3w-gate_up_down_proj", + "luffycodes/mcq-hal-vicuna-13b-v1.5", + "TheBloke/Llama-2-13B-fp16", + "TaylorAI/Flash-Llama-13B", + "shareAI/bimoGPT-llama2-13b", + "wahaha1987/llama_13b_sharegpt94k_fastchat", + "openchat/openchat_8192", + "CHIH-HUNG/llama-2-13b-huangyt_Fintune_1_17w-q_k_v_o_proj", + "dvruette/llama-13b-pretrained-sft-do2", + "CHIH-HUNG/llama-2-13b-alpaca-test", + "OpenBuddy/openbuddy-llama2-13b-v11.1-bf16", + "CHIH-HUNG/llama-2-13b-FINETUNE2_TEST_2.2w", + "project-baize/baize-v2-13b", + "jondurbin/airoboros-l2-13b-gpt4-m2.0", + "yeontaek/Platypus2xOpenOrca-13B-LoRa-v2", + "CHIH-HUNG/llama-2-13b-huangyt_FINETUNE2_3w", + "xzuyn/Alpacino-SuperCOT-13B", + "jondurbin/airoboros-l2-13b-gpt4-2.0", + "aiplanet/effi-13b", + "clibrain/Llama-2-13b-ft-instruct-es", + "CHIH-HUNG/llama-2-13b-huangyt_Fintune_1_17w", + "bofenghuang/vigogne-2-7b-instruct", + "CHIH-HUNG/llama-2-13b-huangyt_FINETUNE2_3w-q_k_v_o_proj", + "bofenghuang/vigogne-2-7b-chat", + "aiplanet/effi-13b", + "haonan-li/bactrian-x-llama-13b-merged", + "beaugogh/Llama2-7b-sharegpt4", + "HWERI/Llama2-7b-sharegpt4", + "jondurbin/airoboros-13b-gpt4-1.3", + "jondurbin/airoboros-c34b-2.1", + "junelee/wizard-vicuna-13b", + "TheBloke/wizard-vicuna-13B-HF", + "Open-Orca/OpenOrca-Preview1-13B", + "TheBloke/h2ogpt-oasst1-512-30B-HF", + "TheBloke/Llama-2-13B-GPTQ", + "camel-ai/CAMEL-13B-Combined-Data", + "lmsys/vicuna-7b-v1.5", + "lmsys/vicuna-7b-v1.5-16k", + "lmsys/vicuna-7b-v1.5", + "ausboss/llama-13b-supercot", + "TheBloke/tulu-13B-fp16", + "NousResearch/Nous-Hermes-llama-2-7b", + "jlevin/guanaco-13b-llama-2", + "lmsys/vicuna-7b-v1.5-16k", + "dvruette/llama-13b-pretrained", + "nkpz/llama2-22b-daydreamer-v3", + "dvruette/llama-13b-pretrained-dropout", + "jondurbin/airoboros-l2-13b-2.1", + "LLMs/Stable-Vicuna-13B", + "64bits/LexPodLM-13B", + "lizhuang144/llama_mirror_13b_v1.0", + "TheBloke/stable-vicuna-13B-HF", + "zarakiquemparte/zaraxls-l2-7b", + "TheBloke/Llama-2-13B-GPTQ", + "Kiddyz/testlm-3", + "migtissera/Synthia-7B", + "zarakiquemparte/zarablend-l2-7b", + "mosaicml/mpt-30b-instruct", + "PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged", + "vonjack/Qwen-LLaMAfied-HFTok-7B-Chat", + "l3utterfly/llama2-7b-layla", + "Lajonbot/vicuna-7b-v1.5-PL-lora_unload", + "heegyu/LIMA-13b-hf", + "frank098/WizardLM_13B_juniper", + "ashercn97/manatee-7b", + "chavinlo/gpt4-x-alpaca", + "PocketDoc/Dans-PersonalityEngine-13b", + "ehartford/WizardLM-1.0-Uncensored-CodeLlama-34b", + "digitous/Alpacino13b", + "edor/Hermes-Platypus2-mini-7B", + "lvkaokao/llama2-7b-hf-chat-lora-v2", + "Kiddyz/testlm-1-1", + "Kiddyz/testlm", + "Kiddyz/testlm-1", + "Kiddyz/testlm2", + "radm/Philosophy-Platypus2-13b", + "aiplanet/effi-13b", + "Harshvir/Llama-2-7B-physics", + "YeungNLP/firefly-ziya-13b", + "LinkSoul/Chinese-Llama-2-7b", + "PeanutJar/LLaMa-2-PeanutButter_v10-7B", + "OpenBuddy/openbuddy-llama2-13b-v11-bf16", + "StudentLLM/Alpagasus-2-13B-QLoRA-pipeline", + "meta-llama/Llama-2-13b-hf", + "WizardLM/WizardCoder-Python-34B-V1.0", + "dvruette/llama-13b-pretrained-sft-epoch-1", + "camel-ai/CAMEL-13B-Role-Playing-Data", + "ziqingyang/chinese-llama-2-13b", + "rombodawg/LosslessMegaCoder-llama2-7b-mini", + "TheBloke/koala-13B-HF", + "lmsys/vicuna-7b-delta-v1.1", + "eachadea/vicuna-7b-1.1", + "Ejafa/vicuna_7B_vanilla_1.1", + "lvkaokao/llama2-7b-hf-chat-lora", + "OpenBuddy/openbuddy-atom-13b-v9-bf16", + "Norquinal/llama-2-7b-claude-chat-rp", + "Danielbrdz/Barcenas-7b", + "heegyu/WizardVicuna2-13b-hf", + "meta-llama/Llama-2-7b-chat-hf", + "PeanutJar/LLaMa-2-PeanutButter_v14-7B", + "PeanutJar/LLaMa-2-PeanutButter_v4-7B", + "davzoku/cria-llama2-7b-v1.3", + "OpenBuddy/openbuddy-atom-13b-v9-bf16", + "lvkaokao/llama2-7b-hf-instruction-lora", + "Tap-M/Luna-AI-Llama2-Uncensored", + "ehartford/Samantha-1.11-7b", + "WizardLM/WizardCoder-Python-34B-V1.0", + "TheBloke/Manticore-13B-Chat-Pyg-Guanaco-SuperHOT-8K-GPTQ", + "Mikael110/llama-2-7b-guanaco-fp16", + "garage-bAInd/Platypus2-7B", + "PeanutJar/LLaMa-2-PeanutButter_v18_B-7B", + "mosaicml/mpt-30b", + "garage-bAInd/Platypus2-7B", + "huggingface/llama-13b", + "dvruette/oasst-llama-13b-1000-steps", + "jordiclive/gpt4all-alpaca-oa-codealpaca-lora-13b", + "huggyllama/llama-13b", + "Voicelab/trurl-2-7b", + "TFLai/llama-13b-4bit-alpaca", + "gywy/llama2-13b-chinese-v2", + "lmsys/longchat-13b-16k", + "Aspik101/trurl-2-7b-pl-instruct_unload", + "WizardLM/WizardMath-7B-V1.0", + "Norquinal/llama-2-7b-claude-chat", + "TheTravellingEngineer/llama2-7b-chat-hf-dpo", + "HuggingFaceH4/starchat-beta", + "joehuangx/spatial-vicuna-7b-v1.5-LoRA", + "conceptofmind/LLongMA-2-13b-16k", + "tianyil1/denas-llama2", + "lmsys/vicuna-7b-v1.3", + "conceptofmind/LLongMA-2-13b-16k", + "openchat/opencoderplus", + "ajibawa-2023/scarlett-7b", + "dhmeltzer/llama-7b-SFT_eli5_wiki65k_1024_r_64_alpha_16_merged", + "psyche/kollama2-7b-v2", + "heegyu/LIMA2-7b-hf", + "dhmeltzer/llama-7b-SFT-qlora-eli5-wiki_DPO_ds_RM_top_2_1024_r_64_alpha_16", + "abhishek/llama2guanacotest", + "jondurbin/airoboros-l2-7b-2.1", + "llama-anon/instruct-13b", + "FelixChao/vicuna-7B-physics", + "Aspik101/Llama-2-7b-hf-instruct-pl-lora_unload", + "shibing624/chinese-alpaca-plus-13b-hf", + "davzoku/cria-llama2-7b-v1.3_peft", + "quantumaikr/llama-2-7b-hf-guanaco-1k", + "togethercomputer/Llama-2-7B-32K-Instruct", + "sia-ai/llama-2-7b-1-percent-open-orca-1000-steps-v0", + "TheTravellingEngineer/llama2-7b-hf-guanaco", + "Lajonbot/Llama-2-7b-chat-hf-instruct-pl-lora_unload", + "jondurbin/airoboros-l2-7b-gpt4-1.4.1", + "wahaha1987/llama_7b_sharegpt94k_fastchat", + "FelixChao/vicuna-7B-chemical", + "TinyPixel/llama2-7b-oa", + "chaoyi-wu/MedLLaMA_13B", + "edor/Platypus2-mini-7B", + "RoversX/llama-2-7b-hf-small-shards-Samantha-V1-SFT", + "venkycs/llama-v2-7b-32kC-Security", + "psyche/kollama2-7b", + "Fredithefish/Guanaco-7B-Uncensored", + "TheTravellingEngineer/llama2-7b-chat-hf-guanaco", + "ehartford/WizardLM-13B-Uncensored", + "PocketDoc/Dans-CreepingSenseOfDoom", + "wenge-research/yayi-7b-llama2", + "georgesung/llama2_7b_chat_uncensored", + "TinyPixel/llama2-7b-instruct", + "quantumaikr/QuantumLM-7B", + "xzuyn/MedicWizard-7B", + "wenge-research/yayi-7b-llama2", + "TinyPixel/lima-test", + "elyza/ELYZA-japanese-Llama-2-7b-instruct", + "lgaalves/llama-2-7b-hf_open-platypus", + "ziqingyang/chinese-alpaca-2-7b", + "TehVenom/Pygmalion-Vicuna-1.1-7b", + "meta-llama/Llama-2-7b-hf", + "bongchoi/test-llama2-7b", + "TaylorAI/Flash-Llama-7B", + "TheTravellingEngineer/llama2-7b-chat-hf-v2", + "TheTravellingEngineer/llama2-7b-chat-hf-v4", + "kashif/stack-llama-2", + "PeanutJar/LLaMa-2-PeanutButter_v18_A-7B", + "ToolBench/ToolLLaMA-7b-LoRA", + "Monero/WizardLM-13b-OpenAssistant-Uncensored", + "TheTravellingEngineer/llama2-7b-chat-hf-v2", + "TheTravellingEngineer/llama2-7b-chat-hf-v4", + "mrm8488/llama-2-coder-7b", + "elyza/ELYZA-japanese-Llama-2-7b-fast-instruct", + "clibrain/Llama-2-7b-ft-instruct-es", + "medalpaca/medalpaca-7b", + "TheBloke/tulu-7B-fp16", + "OpenBuddy/openbuddy-openllama-13b-v7-fp16", + "TaylorAI/FLAN-Llama-7B-2_Llama2-7B-Flash_868_full_model", + "Aspik101/vicuna-7b-v1.3-instruct-pl-lora_unload", + "jondurbin/airoboros-l2-7b-gpt4-2.0", + "dhmeltzer/llama-7b-SFT_ds_eli5_1024_r_64_alpha_16_merged", + "GOAT-AI/GOAT-7B-Community", + "AtomEchoAI/AtomGPT_56k", + "julianweng/Llama-2-7b-chat-orcah", + "TehVenom/Pygmalion-13b-Merged", + "jondurbin/airoboros-7b-gpt4-1.1", + "dhmeltzer/llama-7b-SFT_ds_wiki65k_1024_r_64_alpha_16_merged", + "bofenghuang/vigogne-7b-chat", + "lmsys/longchat-7b-v1.5-32k", + "jondurbin/airoboros-l2-7b-gpt4-m2.0", + "synapsoft/Llama-2-7b-chat-hf-flan2022-1.2M", + "jondurbin/airoboros-7b-gpt4-1.4", + "Charlie911/vicuna-7b-v1.5-lora-mctaco", + "yihan6324/instructmining-platypus-15k", + "meta-llama/Llama-2-7b-hf", + "TheTravellingEngineer/llama2-7b-chat-hf-v3", + "quantumaikr/KoreanLM-hf", + "openthaigpt/openthaigpt-1.0.0-alpha-7b-chat-ckpt-hf", + "TheBloke/Llama-2-7B-GPTQ", + "TheBloke/Llama-2-7B-GPTQ", + "LLMs/AlpacaGPT4-7B-elina", + "ehartford/Wizard-Vicuna-7B-Uncensored", + "TheBloke/Wizard-Vicuna-7B-Uncensored-HF", + "TheTravellingEngineer/llama2-7b-chat-hf-v3", + "golaxy/gowizardlm", + "ehartford/dolphin-llama2-7b", + "CHIH-HUNG/llama-2-7b-dolphin_10w-test", + "mncai/chatdoctor", + "psyche/kollama2-7b-v3", + "jondurbin/airoboros-7b-gpt4", + "jondurbin/airoboros-7b", + "TheBloke/airoboros-7b-gpt4-fp16", + "mosaicml/mpt-7b-8k-chat", + "elyza/ELYZA-japanese-Llama-2-7b", + "bofenghuang/vigogne-7b-instruct", + "jxhong/CAlign-alpaca-7b", + "golaxy/goims", + "jondurbin/airoboros-7b-gpt4-1.2", + "jphme/orca_mini_v2_ger_7b", + "psmathur/orca_mini_v2_7b", + "notstoic/PygmalionCoT-7b", + "golaxy/gogpt2-13b", + "golaxy/gogpt2-13b-chat", + "togethercomputer/LLaMA-2-7B-32K", + "TheBloke/wizardLM-7B-HF", + "keyfan/vicuna-chinese-replication-v1.1", + "golaxy/gogpt2-7b", + "aiplanet/effi-7b", + "arver/llama7b-qlora", + "titan087/OpenLlama13B-Guanaco", + "chavinlo/alpaca-native", + "project-baize/baize-healthcare-lora-7B", + "AlpinDale/pygmalion-instruct", + "openlm-research/open_llama_13b", + "jondurbin/airoboros-7b-gpt4-1.3", + "elyza/ELYZA-japanese-Llama-2-7b-fast", + "jondurbin/airoboros-gpt-3.5-turbo-100k-7b", + "uukuguy/speechless-codellama-orca-13b", + "bigcode/starcoderplus", + "TheBloke/guanaco-7B-HF", + "Neko-Institute-of-Science/metharme-7b", + "TigerResearch/tigerbot-7b-base", + "golaxy/gogpt-7b", + "togethercomputer/LLaMA-2-7B-32K", + "yhyhy3/open_llama_7b_v2_med_instruct", + "ajibawa-2023/carl-7b", + "stabilityai/stablelm-base-alpha-7b-v2", + "conceptofmind/LLongMA-2-7b-16k", + "TehVenom/Pygmalion_AlpacaLora-7b", + "jondurbin/airoboros-7b-gpt4-1.4.1-qlora", + "wannaphong/openthaigpt-0.1.0-beta-full-model_for_open_llm_leaderboard", + "ausboss/llama7b-wizardlm-unfiltered", + "project-baize/baize-v2-7b", + "LMFlow/Robin-v2", + "HanningZhang/Robin-v2", + "LMFlow/Robin-7b-v2", + "OptimalScale/robin-7b-v2-delta", + "uukuguy/speechless-codellama-platypus-13b", + "jerryjalapeno/nart-100k-7b", + "wenge-research/yayi-13b-llama2", + "fireballoon/baichuan-vicuna-chinese-7b", + "jlevin/guanaco-unchained-llama-2-7b", + "csitfun/llama-7b-logicot", + "DevaMalla/llama7b_alpaca_1gpu_bf16", + "WeOpenML/PandaLM-Alpaca-7B-v1", + "illuin/test-custom-llama", + "yeontaek/WizardCoder-Python-13B-LoRa", + "ashercn97/giraffe-7b", + "mosaicml/mpt-7b-chat", + "abhishek/autotrain-llama-alpaca-peft-52508123785", + "Neko-Institute-of-Science/pygmalion-7b", + "TFLai/llama-7b-4bit-alpaca", + "huggingface/llama-7b", + "TheBloke/Planner-7B-fp16", + "shibing624/chinese-llama-plus-13b-hf", + "AGI-inc/lora_moe_7b_baseline", + "DevaMalla/llama-base-7b", + "AGI-inc/lora_moe_7b", + "togethercomputer/GPT-JT-6B-v0", + "ehartford/WizardLM-7B-Uncensored", + "shibing624/chinese-alpaca-plus-7b-hf", + "beomi/llama-2-ko-7b", + "mosaicml/mpt-7b-8k-instruct", + "Enno-Ai/ennodata-7b", + "mosaicml/mpt-7b-instruct", + "facebook/opt-iml-max-30b", + "WeOpenML/Alpaca-7B-v1", + "TheBloke/Project-Baize-v2-7B-GPTQ", + "codellama/CodeLlama-13b-Instruct-hf", + "TheBloke/CodeLlama-13B-Instruct-fp16", + "facebook/galactica-30b", + "FreedomIntelligence/phoenix-inst-chat-7b", + "openlm-research/open_llama_7b_v2", + "GeorgiaTechResearchInstitute/galpaca-30b", + "THUDM/chatglm2-6b", + "togethercomputer/GPT-JT-6B-v1", + "TheBloke/koala-7B-HF", + "nathan0/mpt_delta_tuned_model_v3", + "nathan0/mpt_delta_tuned_model_v2", + "GeorgiaTechResearchInstitute/galpaca-30b", + "JosephusCheung/Guanaco", + "shareAI/CodeLLaMA-chat-13b-Chinese", + "TigerResearch/tigerbot-7b-sft", + "Writer/InstructPalmyra-20b", + "OpenAssistant/codellama-13b-oasst-sft-v10", + "bigscience/bloomz-7b1-mt", + "nathan0/mpt_delta_tuned_model_v3", + "VMware/open-llama-7b-open-instruct", + "baichuan-inc/Baichuan-7B", + "anas-awadalla/mpt-7b", + "mosaicml/mpt-7b", + "bigscience/bloomz-7b1", + "ziqingyang/chinese-llama-2-7b", + "OpenAssistant/codellama-13b-oasst-sft-v10", + "wenge-research/yayi-7b", + "tiiuae/falcon-7b", + "togethercomputer/RedPajama-INCITE-Instruct-7B-v0.1", + "togethercomputer/RedPajama-INCITE-7B-Instruct", + "TheBloke/landmark-attention-llama7b-fp16", + "togethercomputer/GPT-JT-Moderation-6B", + "h2oai/h2ogpt-gm-oasst1-en-1024-20b", + "dvruette/gpt-neox-20b-full-precision", + "TehVenom/Moderator-Chan_GPT-JT-6b", + "dvruette/oasst-gpt-neox-20b-1000-steps", + "AlekseyKorshuk/pygmalion-6b-vicuna-chatml", + "facebook/opt-66b", + "Salesforce/codegen-16B-nl", + "Vmware/open-llama-7b-v2-open-instruct", + "mosaicml/mpt-7b-storywriter", + "acrastt/Marx-3B-V2", + "openlm-research/open_llama_7b", + "Fredithefish/ReasonixPajama-3B-HF", + "togethercomputer/GPT-NeoXT-Chat-Base-20B", + "psmathur/orca_mini_13b", + "RWKV/rwkv-raven-14b", + "h2oai/h2ogpt-oasst1-512-20b", + "acrastt/Marx-3B", + "klosax/open_llama_13b_600bt_preview", + "synapsoft/Llama-2-7b-hf-flan2022-1.2M", + "OpenAssistant/oasst-sft-1-pythia-12b", + "golaxy/gogpt-7b-bloom", + "Writer/palmyra-large", + "psmathur/orca_mini_7b", + "dvruette/oasst-pythia-12b-6000-steps", + "NousResearch/CodeLlama-13b-hf", + "codellama/CodeLlama-13b-hf", + "h2oai/h2ogpt-gm-oasst1-multilang-1024-20b", + "VMware/open-llama-0.7T-7B-open-instruct-v1.1", + "dvruette/oasst-pythia-12b-flash-attn-5000-steps", + "dvruette/oasst-gpt-neox-20b-3000-steps", + "RobbeD/OpenLlama-Platypus-3B", + "facebook/opt-30b", + "acrastt/Puma-3B", + "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + "dvruette/oasst-pythia-12b-pretrained-sft", + "digitous/GPT-R", + "acrastt/Griffin-3B", + "togethercomputer/RedPajama-INCITE-Base-7B-v0.1", + "togethercomputer/RedPajama-INCITE-7B-Base", + "CobraMamba/mamba-gpt-3b-v3", + "Danielbrdz/CodeBarcenas-7b", + "l3utterfly/open-llama-3b-v2-layla", + "CobraMamba/mamba-gpt-3b-v2", + "OpenAssistant/pythia-12b-sft-v8-7k-steps", + "KoboldAI/GPT-NeoX-20B-Erebus", + "RobbeD/Orca-Platypus-3B", + "h2oai/h2ogpt-gm-oasst1-en-1024-12b", + "OpenAssistant/pythia-12b-sft-v8-2.5k-steps", + "AlekseyKorshuk/chatml-pyg-v1", + "togethercomputer/RedPajama-INCITE-Chat-7B-v0.1", + "togethercomputer/RedPajama-INCITE-7B-Chat", + "digitous/Javelin-R", + "dvruette/oasst-pythia-12b-reference", + "EleutherAI/gpt-neox-20b", + "KoboldAI/fairseq-dense-13B", + "OpenAssistant/pythia-12b-sft-v8-rlhf-2k-steps", + "codellama/CodeLlama-7b-Instruct-hf", + "digitous/Javelin-GPTJ", + "KoboldAI/GPT-NeoX-20B-Skein", + "digitous/Javalion-R", + "h2oai/h2ogpt-oasst1-512-12b", + "acrastt/Bean-3B", + "KoboldAI/GPT-J-6B-Skein", + "nomic-ai/gpt4all-j", + "databricks/dolly-v2-12b", + "TehVenom/Dolly_Shygmalion-6b-Dev_V8P2", + "databricks/dolly-v2-7b", + "Aspik101/WizardVicuna-Uncensored-3B-instruct-PL-lora_unload", + "digitous/Adventien-GPTJ", + "openlm-research/open_llama_3b_v2", + "RWKV/rwkv-4-14b-pile", + "Lazycuber/Janemalion-6B", + "OpenAssistant/pythia-12b-pre-v8-12.5k-steps", + "digitous/Janin-R", + "kfkas/Llama-2-ko-7b-Chat", + "heegyu/WizardVicuna-Uncensored-3B-0719", + "h2oai/h2ogpt-gm-oasst1-en-1024-open-llama-7b-preview-400bt", + "TaylorAI/Flash-Llama-3B", + "kfkas/Llama-2-ko-7b-Chat", + "digitous/Skegma-GPTJ", + "digitous/Javalion-GPTJ", + "Pirr/pythia-13b-deduped-green_devil", + "TehVenom/PPO_Shygmalion-V8p4_Dev-6b", + "dvruette/oasst-pythia-6.9b-4000-steps", + "heegyu/WizardVicuna-3B-0719", + "psmathur/orca_mini_3b", + "OpenAssistant/galactica-6.7b-finetuned", + "frank098/orca_mini_3b_juniper", + "PygmalionAI/pygmalion-6b", + "TehVenom/PPO_Pygway-V8p4_Dev-6b", + "TFLai/gpt-neox-20b-4bit-alpaca", + "Corianas/gpt-j-6B-Dolly", + "TehVenom/Dolly_Shygmalion-6b", + "digitous/Janin-GPTJ", + "TehVenom/GPT-J-Pyg_PPO-6B-Dev-V8p4", + "EleutherAI/gpt-j-6b", + "KoboldAI/GPT-J-6B-Shinen", + "TehVenom/Dolly_Malion-6b", + "TehVenom/ChanMalion", + "Salesforce/codegen-6B-nl", + "Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4", + "KoboldAI/GPT-J-6B-Janeway", + "togethercomputer/RedPajama-INCITE-Chat-3B-v1", + "togethercomputer/Pythia-Chat-Base-7B", + "heegyu/RedTulu-Uncensored-3B-0719", + "KoboldAI/PPO_Pygway-6b-Mix", + "KoboldAI/OPT-13B-Erebus", + "KoboldAI/fairseq-dense-6.7B", + "EleutherAI/pythia-12b-deduped", + "pszemraj/pythia-6.9b-HC3", + "Fredithefish/Guanaco-3B-Uncensored-v2", + "facebook/opt-13b", + "TehVenom/GPT-J-Pyg_PPO-6B", + "EleutherAI/pythia-6.9b-deduped", + "Devio/test-1400", + "Fredithefish/Guanaco-3B-Uncensored", + "codellama/CodeLlama-7b-hf", + "acrastt/RedPajama-INCITE-Chat-Instruct-3B-V1", + "Fredithefish/ScarletPajama-3B-HF", + "KoboldAI/OPT-13B-Nerybus-Mix", + "YeungNLP/firefly-bloom-7b1", + "DanielSc4/RedPajama-INCITE-Chat-3B-v1-RL-LoRA-8bit-test1", + "klosax/open_llama_7b_400bt_preview", + "KoboldAI/OPT-13B-Nerys-v2", + "TehVenom/PPO_Shygmalion-6b", + "amazon/LightGPT", + "KnutJaegersberg/black_goo_recipe_c", + "NousResearch/CodeLlama-7b-hf", + "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", + "heegyu/WizardVicuna-open-llama-3b-v2", + "bigscience/bloom-7b1", + "Devio/test-22B", + "RWKV/rwkv-raven-7b", + "hakurei/instruct-12b", + "CobraMamba/mamba-gpt-3b", + "KnutJaegersberg/black_goo_recipe_a", + "acrastt/OmegLLaMA-3B", + "codellama/CodeLlama-7b-Instruct-hf", + "h2oai/h2ogpt-oig-oasst1-512-6_9b", + "KoboldAI/OPT-6.7B-Erebus", + "facebook/opt-6.7b", + "KnutJaegersberg/black_goo_recipe_d", + "KnutJaegersberg/LLongMA-3b-LIMA", + "KnutJaegersberg/black_goo_recipe_b", + "KoboldAI/OPT-6.7B-Nerybus-Mix", + "health360/Healix-3B", + "EleutherAI/pythia-12b", + "Fredithefish/RedPajama-INCITE-Chat-3B-ShareGPT-11K", + "GeorgiaTechResearchInstitute/galactica-6.7b-evol-instruct-70k", + "h2oai/h2ogpt-oig-oasst1-256-6_9b", + "ikala/bloom-zh-3b-chat", + "Taekyoon/llama2-ko-7b-test", + "anhnv125/pygmalion-6b-roleplay", + "TehVenom/DiffMerge_Pygmalion_Main-onto-V8P4", + "KoboldAI/OPT-6B-nerys-v2", + "Lazycuber/pyg-instruct-wizardlm", + "Devio/testC", + "KoboldAI/OPT-30B-Erebus", + "Fredithefish/CrimsonPajama", + "togethercomputer/RedPajama-INCITE-Base-3B-v1", + "bigscience/bloomz-3b", + "conceptofmind/Open-LLongMA-3b", + "RWKV/rwkv-4-7b-pile", + "openlm-research/open_llama_3b", + "ewof/koishi-instruct-3b", + "DanielSc4/RedPajama-INCITE-Chat-3B-v1-FT-LoRA-8bit-test1", + "cerebras/Cerebras-GPT-13B", + "EleutherAI/pythia-6.7b", + "aisquared/chopt-2_7b", + "Azure99/blossom-v1-3b", + "PSanni/Deer-3b", + "bertin-project/bertin-gpt-j-6B-alpaca", + "OpenBuddy/openbuddy-openllama-3b-v10-bf16", + "KoboldAI/fairseq-dense-2.7B", + "ehartford/CodeLlama-34b-Instruct-hf", + "codellama/CodeLlama-34b-Instruct-hf", + "TheBloke/CodeLlama-34B-Instruct-fp16", + "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2", + "openlm-research/open_llama_7b_700bt_preview", + "NbAiLab/nb-gpt-j-6B-alpaca", + "KoboldAI/OPT-2.7B-Erebus", + "Writer/camel-5b-hf", + "EleutherAI/pythia-2.7b", + "facebook/xglm-7.5B", + "EleutherAI/pythia-2.8b-deduped", + "klosax/open_llama_3b_350bt_preview", + "klosax/openllama-3b-350bt", + "KoboldAI/OPT-2.7B-Nerybus-Mix", + "KoboldAI/GPT-J-6B-Adventure", + "cerebras/Cerebras-GPT-6.7B", + "TFLai/pythia-2.8b-4bit-alpaca", + "facebook/opt-2.7b", + "KoboldAI/OPT-2.7B-Nerys-v2", + "bigscience/bloom-3b", + "Devio/test100", + "RWKV/rwkv-raven-3b", + "Azure99/blossom-v2-3b", + "codellama/CodeLlama-34b-Python-hf", + "bhenrym14/airoboros-33b-gpt4-1.4.1-PI-8192-fp16", + "EleutherAI/gpt-neo-2.7B", + "danielhanchen/open_llama_3b_600bt_preview", + "HuggingFaceH4/starchat-alpha", + "pythainlp/wangchanglm-7.5B-sft-en-sharded", + "beaugogh/pythia-1.4b-deduped-sharegpt", + "HWERI/pythia-1.4b-deduped-sharegpt", + "OpenAssistant/stablelm-7b-sft-v7-epoch-3", + "codellama/CodeLlama-7b-Python-hf", + "aisquared/chopt-1_3b", + "PygmalionAI/metharme-1.3b", + "Linly-AI/Chinese-LLaMA-2-13B-hf", + "chargoddard/llama-2-34b-uncode", + "RWKV/rwkv-4-3b-pile", + "pythainlp/wangchanglm-7.5B-sft-enth", + "MBZUAI/LaMini-GPT-1.5B", + "Writer/palmyra-base", + "KoboldAI/fairseq-dense-1.3B", + "EleutherAI/pythia-1.4b-deduped", + "MBZUAI/lamini-neo-1.3b", + "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt", + "sartmis1/starcoder-finetune-openapi", + "MayaPH/opt-flan-iml-6.7b", + "facebook/xglm-4.5B", + "WizardLM/WizardCoder-15B-V1.0", + "facebook/opt-iml-max-1.3b", + "stabilityai/stablelm-tuned-alpha-7b", + "aisquared/dlite-v2-1_5b", + "stabilityai/stablelm-base-alpha-7b", + "sartmis1/starcoder-finetune-selfinstruct", + "lizhuang144/starcoder_mirror", + "bigcode/starcoder", + "TheBloke/CodeLlama-34B-Python-fp16", + "open-llm-leaderboard/bloomz-1b7-4bit-alpaca-auto-eval-adapter-applied", + "ehartford/CodeLlama-34b-Python-hf", + "codellama/CodeLlama-7b-Python-hf", + "GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct", + "LoupGarou/WizardCoder-Guanaco-15B-V1.0", + "golaxy/gogpt-3b-bloom", + "EleutherAI/pythia-1.3b", + "codellama/CodeLlama-13b-Python-hf", + "hakurei/lotus-12B", + "NYTK/PULI-GPTrio", + "facebook/opt-1.3b", + "TheBloke/CodeLlama-13B-Python-fp16", + "codellama/CodeLlama-13b-Python-hf", + "RWKV/rwkv-raven-1b5", + "PygmalionAI/pygmalion-2.7b", + "bigscience/bloom-1b7", + "gpt2-xl", + "LoupGarou/WizardCoder-Guanaco-15B-V1.1", + "RWKV/rwkv-4-1b5-pile", + "codellama/CodeLlama-34b-hf", + "NousResearch/CodeLlama-34b-hf", + "rinna/bilingual-gpt-neox-4b-8k", + "lxe/Cerebras-GPT-2.7B-Alpaca-SP", + "cerebras/Cerebras-GPT-2.7B", + "jzjiao/opt-1.3b-rlhf", + "EleutherAI/gpt-neo-1.3B", + "aisquared/dlite-v1-1_5b", + "Corianas/Quokka_2.7b", + "MrNJK/gpt2-xl-sft", + "facebook/galactica-1.3b", + "aisquared/dlite-v2-774m", + "EleutherAI/pythia-1b-deduped", + "Kunhao/pile-7b-250b-tokens", + "w601sxs/b1ade-1b", + "rinna/bilingual-gpt-neox-4b", + "shaohang/SparseOPT-1.3B", + "shaohang/Sparse0.5_OPT-1.3", + "EleutherAI/polyglot-ko-12.8b", + "Salesforce/codegen-6B-multi", + "bigscience/bloom-1b1", + "TFLai/gpt-neo-1.3B-4bit-alpaca", + "FabbriSimo01/Bloom_1b_Quantized", + "MBZUAI/LaMini-GPT-774M", + "Locutusque/gpt2-large-conversational", + "Devio/test-3b", + "stabilityai/stablelm-tuned-alpha-3b", + "PygmalionAI/pygmalion-1.3b", + "KoboldAI/fairseq-dense-355M", + "Rachneet/gpt2-xl-alpaca", + "gpt2-large", + "Mikivis/gpt2-large-lora-sft", + "stabilityai/stablelm-base-alpha-3b", + "gpt2-medium", + "Kunhao/pile-7b", + "aisquared/dlite-v1-774m", + "aisquared/dlite-v2-355m", + "YeungNLP/firefly-bloom-2b6-v2", + "KnutJaegersberg/gpt-2-xl-EvolInstruct", + "KnutJaegersberg/galactica-orca-wizardlm-1.3b", + "cerebras/Cerebras-GPT-1.3B", + "FabbriSimo01/Cerebras_1.3b_Quantized", + "facebook/xglm-1.7B", + "EleutherAI/pythia-410m-deduped", + "TheBloke/GPlatty-30B-SuperHOT-8K-fp16", + "DataLinguistic/DataLinguistic-34B-V1.0", + "Corianas/Quokka_1.3b", + "TheTravellingEngineer/bloom-560m-RLHF-v2", + "Corianas/1.3b", + "RWKV/rwkv-4-430m-pile", + "porkorbeef/Llama-2-13b-sf", + "xhyi/PT_GPTNEO350_ATG", + "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ", + "bigscience/bloomz-560m", + "TheBloke/medalpaca-13B-GPTQ-4bit", + "TheBloke/Vicuna-33B-1-3-SuperHOT-8K-fp16", + "aisquared/dlite-v1-355m", + "uukuguy/speechless-codellama-orca-airoboros-13b-0.10e", + "yhyhy3/med-orca-instruct-33b", + "TheBloke/Wizard-Vicuna-30B-Superhot-8K-fp16", + "TheTravellingEngineer/bloom-1b1-RLHF", + "MBZUAI/lamini-cerebras-1.3b", + "IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1", + "TheBloke/WizardLM-7B-uncensored-GPTQ", + "TheBloke/EverythingLM-13B-16K-GPTQ", + "quantumaikr/open_llama_7b_hf", + "TheBloke/chronos-wizardlm-uc-scot-st-13B-GPTQ", + "TheBloke/WizardLM-30B-Uncensored-GPTQ", + "IDEA-CCNL/Ziya-LLaMA-13B-v1", + "Phind/Phind-CodeLlama-34B-v1", + "robowaifudev/megatron-gpt2-345m", + "MayaPH/GodziLLa-30B-instruct", + "TheBloke/CAMEL-33B-Combined-Data-SuperHOT-8K-fp16", + "uukuguy/speechless-codellama-orca-platypus-13b-0.10e", + "doas/test2", + "BreadAi/PM_modelV2", + "bigcode/santacoder", + "TheBloke/wizard-vicuna-13B-GPTQ", + "porkorbeef/Llama-2-13b", + "TehVenom/DiffMerge-DollyGPT-Pygmalion", + "PygmalionAI/pygmalion-350m", + "TheBloke/orca_mini_v3_7B-GPTQ", + "TheBloke/WizardLM-Uncensored-SuperCOT-StoryTelling-30B-GPTQ", + "TheBloke/WizardLM-30B-GPTQ", + "bigscience/bloom-560m", + "TFLai/gpt2-turkish-uncased", + "TheBloke/guanaco-33B-GPTQ", + "TheBloke/openchat_v2_openorca_preview-GPTQ", + "porkorbeef/Llama-2-13b-public", + "TheBloke/LongChat-13B-GPTQ", + "yhyhy3/med-orca-instruct-33b", + "TheBloke/airoboros-33B-gpt4-1-4-SuperHOT-8K-fp16", + "TheBloke/Chinese-Alpaca-33B-SuperHOT-8K-fp16", + "MayaPH/FinOPT-Franklin", + "TheBloke/WizardLM-33B-V1.0-Uncensored-GPTQ", + "TheBloke/Project-Baize-v2-13B-GPTQ", + "malhajar/Platypus2-70B-instruct-4bit-gptq", + "KoboldAI/OPT-350M-Erebus", + "rishiraj/bloom-560m-guanaco", + "Panchovix/WizardLM-33B-V1.0-Uncensored-SuperHOT-8k", + "doas/test5", + "vicgalle/alpaca-7b", + "beomi/KoAlpaca-Polyglot-5.8B", + "Phind/Phind-CodeLlama-34B-Python-v1", + "timdettmers/guanaco-65b-merged", + "TheBloke/wizard-mega-13B-GPTQ", + "MayaPH/GodziLLa-30B-plus", + "TheBloke/Platypus-30B-SuperHOT-8K-fp16", + "facebook/opt-350m", + "KoboldAI/OPT-350M-Nerys-v2", + "TheBloke/robin-33B-v2-GPTQ", + "jaspercatapang/Echidna-30B", + "TheBloke/llama-30b-supercot-SuperHOT-8K-fp16", + "marcchew/test1", + "Harshvir/LaMini-Neo-1.3B-Mental-Health_lora", + "golaxy/gogpt-560m", + "TheBloke/orca_mini_13B-GPTQ", + "Panchovix/airoboros-33b-gpt4-1.2-SuperHOT-8k", + "Aspik101/tulu-7b-instruct-pl-lora_unload", + "Phind/Phind-CodeLlama-34B-v2", + "BreadAi/MusePy-1-2", + "cerebras/Cerebras-GPT-590M", + "microsoft/CodeGPT-small-py", + "victor123/WizardLM-13B-1.0", + "OptimalScale/robin-65b-v2-delta", + "voidful/changpt-bart", + "FabbriSimo01/GPT_Large_Quantized", + "MayaPH/FinOPT-Lincoln", + "KoboldAI/fairseq-dense-125M", + "SebastianSchramm/Cerebras-GPT-111M-instruction", + "TheTravellingEngineer/bloom-560m-RLHF", + "breadlicker45/dough-instruct-base-001", + "WizardLM/WizardLM-30B-V1.0", + "WizardLM/WizardLM-30B-V1.0", + "WizardLM/WizardLM-30B-V1.0", + "TaylorAI/Flash-Llama-30M-20001", + "porkorbeef/Llama-2-13b-12_153950", + "huggingtweets/bladeecity-jerma985", + "KnutJaegersberg/megatron-GPT-2-345m-EvolInstruct", + "bhenrym14/airoboros-33b-gpt4-1.4.1-lxctx-PI-16384-fp16", + "microsoft/DialoGPT-small", + "Corianas/590m", + "facebook/xglm-564M", + "EleutherAI/gpt-neo-125m", + "EleutherAI/pythia-160m-deduped", + "klosax/pythia-160m-deduped-step92k-193bt", + "MBZUAI/lamini-neo-125m", + "bigcode/tiny_starcoder_py", + "concedo/OPT-19M-ChatSalad", + "anton-l/gpt-j-tiny-random", + "grantprice/Cerebras-GPT-590M-finetuned-DND", + "deepnight-research/zsc-text", + "WangZeJun/bloom-820m-chat", + "cerebras/Cerebras-GPT-256M", + "ai-forever/rugpt3large_based_on_gpt2", + "alibidaran/medical_transcription_generator", + "Deci/DeciCoder-1b", + "microsoft/DialoGPT-medium", + "ogimgio/gpt-neo-125m-neurallinguisticpioneers", + "open-llm-leaderboard/bloom-560m-4bit-alpaca-auto-eval-adapter-applied", + "BreadAi/gpt-YA-1-1_160M", + "microsoft/DialoGPT-large", + "facebook/opt-125m", + "huggingtweets/jerma985", + "Locutusque/gpt2-conversational-or-qa", + "concedo/Pythia-70M-ChatSalad", + "roneneldan/TinyStories-1M", + "BreadAi/DiscordPy", + "bigcode/gpt_bigcode-santacoder", + "Tincando/fiction_story_generator", + "klosax/pythia-70m-deduped-step44k-92bt", + "Quake24/easyTermsSummerizer", + "BreadAi/gpt-YA-1-1_70M", + "EleutherAI/pythia-160m", + "euclaise/gpt-neox-122m-minipile-digits", + "MBZUAI/lamini-cerebras-590m", + "nicholasKluge/Aira-124M", + "MayaPH/FinOPT-Washington", + "cyberagent/open-calm-large", + "BreadAi/StoryPy", + "EleutherAI/pythia-70m", + "BreadAi/gpt-Youtube", + "roneneldan/TinyStories-33M", + "EleutherAI/pythia-70m-deduped", + "lgaalves/gpt2_guanaco-dolly-platypus", + "Corianas/Quokka_590m", + "lgaalves/gpt2_platypus-dolly-guanaco", + "cyberagent/open-calm-7b", + "RWKV/rwkv-4-169m-pile", + "gpt2", + "roneneldan/TinyStories-28M", + "lgaalves/gpt2_open-platypus", + "gpt2", + "SaylorTwift/gpt2_test", + "roneneldan/TinyStories-3M", + "nthngdy/pythia-owt2-70m-50k", + "Corianas/256_5epoch", + "roneneldan/TinyStories-8M", + "lgaalves/gpt2-dolly", + "nthngdy/pythia-owt2-70m-100k", + "aisquared/dlite-v2-124m", + "mncai/SGPT-1.3B-insurance-epoch10", + "huggingtweets/gladosystem", + "abhiramtirumala/DialoGPT-sarcastic-medium", + "MBZUAI/lamini-cerebras-256m", + "cerebras/Cerebras-GPT-111M", + "uberkie/metharme-1.3b-finetuned", + "MBZUAI/lamini-cerebras-111m", + "psyche/kogpt", + "Corianas/Quokka_256m", + "vicgalle/gpt2-alpaca-gpt4", + "aisquared/dlite-v1-124m", + "Mikivis/xuanxuan", + "MBZUAI/LaMini-GPT-124M", + "vicgalle/gpt2-alpaca", + "huashiyiqike/testmodel", + "Corianas/111m", + "baseline", +] diff --git a/src/tools/plots.py b/src/tools/plots.py new file mode 100644 index 0000000000000000000000000000000000000000..1b81e583246dadf02438c4d61c93fa71b25be187 --- /dev/null +++ b/src/tools/plots.py @@ -0,0 +1,156 @@ +import pandas as pd +import numpy as np +import plotly.express as px +from plotly.graph_objs import Figure + +from src.leaderboard.filter_models import FLAGGED_MODELS +from src.display.utils import human_baseline_row as HUMAN_BASELINE, AutoEvalColumn, Tasks, Task, BENCHMARK_COLS +from src.leaderboard.read_evals import EvalResult + + + +def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame: + """ + Generates a DataFrame containing the maximum scores until each date. + + :param results_df: A DataFrame containing result information including metric scores and dates. + :return: A new DataFrame containing the maximum scores until each date for every metric. + """ + # Step 1: Ensure 'date' is in datetime format and sort the DataFrame by it + results_df = pd.DataFrame(raw_data) + #results_df["date"] = pd.to_datetime(results_df["date"], format="mixed", utc=True) + results_df.sort_values(by="date", inplace=True) + + # Step 2: Initialize the scores dictionary + scores = {k: [] for k in BENCHMARK_COLS + [AutoEvalColumn.average.name]} + + # Step 3: Iterate over the rows of the DataFrame and update the scores dictionary + for task in [t.value for t in Tasks] + [Task("Average", "avg", AutoEvalColumn.average.name)]: + current_max = 0 + last_date = "" + column = task.col_name + for _, row in results_df.iterrows(): + current_model = row["full_model"] + # We ignore models that are flagged/no longer on the hub/not finished + to_ignore = not row["still_on_hub"] or row["flagged"] or current_model in FLAGGED_MODELS or row["status"] != "FINISHED" + if to_ignore: + continue + + current_date = row["date"] + if task.benchmark == "Average": + current_score = np.mean(list(row["results"].values())) + else: + current_score = row["results"][task.benchmark] + + if current_score > current_max: + if current_date == last_date and len(scores[column]) > 0: + scores[column][-1] = {"model": current_model, "date": current_date, "score": current_score} + else: + scores[column].append({"model": current_model, "date": current_date, "score": current_score}) + current_max = current_score + last_date = current_date + + # Step 4: Return all dictionaries as DataFrames + return {k: pd.DataFrame(v) for k, v in scores.items()} + + +def create_plot_df(scores_df: dict[str: pd.DataFrame]) -> pd.DataFrame: + """ + Transforms the scores DataFrame into a new format suitable for plotting. + + :param scores_df: A DataFrame containing metric scores and dates. + :return: A new DataFrame reshaped for plotting purposes. + """ + # Initialize the list to store DataFrames + dfs = [] + + # Iterate over the cols and create a new DataFrame for each column + for col in BENCHMARK_COLS + [AutoEvalColumn.average.name]: + d = scores_df[col].reset_index(drop=True) + d["task"] = col + dfs.append(d) + + # Concatenate all the created DataFrames + concat_df = pd.concat(dfs, ignore_index=True) + + # Sort values by 'date' + concat_df.sort_values(by="date", inplace=True) + concat_df.reset_index(drop=True, inplace=True) + return concat_df + + +def create_metric_plot_obj( + df: pd.DataFrame, metrics: list[str], title: str +) -> Figure: + """ + Create a Plotly figure object with lines representing different metrics + and horizontal dotted lines representing human baselines. + + :param df: The DataFrame containing the metric values, names, and dates. + :param metrics: A list of strings representing the names of the metrics + to be included in the plot. + :param title: A string representing the title of the plot. + :return: A Plotly figure object with lines representing metrics and + horizontal dotted lines representing human baselines. + """ + + # Filter the DataFrame based on the specified metrics + df = df[df["task"].isin(metrics)] + + # Filter the human baselines based on the specified metrics + filtered_human_baselines = {k: v for k, v in HUMAN_BASELINE.items() if k in metrics} + + # Create a line figure using plotly express with specified markers and custom data + fig = px.line( + df, + x="date", + y="score", + color="task", + markers=True, + custom_data=["task", "score", "model"], + title=title, + ) + + # Update hovertemplate for better hover interaction experience + fig.update_traces( + hovertemplate="