# source: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/blob/main/src/utils_display.py from dataclasses import dataclass import plotly.graph_objects as go from transformers import AutoConfig # These classes are for user facing column names, to avoid having to change them # all around the code when a modif is needed @dataclass class ColumnContent: name: str type: str displayed_by_default: bool hidden: bool = False def fields(raw_class): return [ v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__" ] @dataclass(frozen=True) class AutoEvalColumn: # Auto evals column model_type_symbol = ColumnContent("T", "str", True) model = ColumnContent("Models", "markdown", True) ARC = ColumnContent("ARC", "number", True) HellaSwag = ColumnContent("HellaSwag", "number", True) MMLU = ColumnContent("MMLU", "number", True) TruthfulQA = ColumnContent("TruthfulQA", "number", True) Winogrande = ColumnContent("Winogrande", "number", True) GSM8K = ColumnContent("GSM8K", "number", True) dummy = ColumnContent("Models", "str", True) ref_model = ColumnContent("Reference Model", "str", True) def model_hyperlink(link, model_name): return f'{model_name}' def make_clickable_names(df): df["Models"] = df.apply( lambda row: model_hyperlink(row["Links"], row["Models"]), axis=1 ) return df def styled_error(error): return f"
{error}
" def styled_warning(warn): return f"{warn}
" def styled_message(message): return f"{message}
" def has_no_nan_values(df, columns): return df[columns].notna().all(axis=1) def has_nan_values(df, columns): return df[columns].isna().any(axis=1) def is_model_on_hub(model_name: str, revision: str) -> bool: try: AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=False) return True, None except ValueError: return ( False, "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.", ) except Exception as e: print(f"Could not get the model config from the hub.: {e}") return False, "was not found on hub!" @dataclass(frozen=True) class EvalQueueColumn: # Queue column model = ColumnContent("model", "markdown", True) revision = ColumnContent("revision", "str", True) private = ColumnContent("private", "bool", True) precision = ColumnContent("precision", "str", True) weight_type = ColumnContent("weight_type", "str", "Original") status = ColumnContent("status", "str", True) EVAL_COLS = [c.name for c in fields(EvalQueueColumn)] EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]