import pandas as pd from pathlib import Path from datasets import load_dataset import numpy as np import os import re UNVERIFIED_MODELS = [ ] CONTAMINATED_MODELS = [ ] # From Open LLM Leaderboard def model_hyperlink(link, model_name): # if model_name is above 50 characters, return first 47 characters and "..." if len(model_name) > 50: model_name = model_name[:47] + "..." if model_name == "random": output = "random" elif model_name == "Cohere March 2024": output = f'{model_name}' elif "openai" == model_name.split("/")[0]: output = f'{model_name}' elif "Anthropic" == model_name.split("/")[0]: output = f'{model_name}' elif "google" == model_name.split("/")[0]: output = f'{model_name}' elif "PoLL" == model_name.split("/")[0]: output = model_name output = f'{model_name}' if model_name in UNVERIFIED_MODELS: output += " *" if model_name in CONTAMINATED_MODELS: output += " ⚠️" return output def undo_hyperlink(html_string): # Regex pattern to match content inside > and < pattern = r'>[^<]+<' match = re.search(pattern, html_string) if match: # Extract the matched text and remove leading '>' and trailing '<' return match.group(0)[1:-1] else: return "No text found" # Define a function to fetch and process data def load_all_data(data_repo, subdir:str, subsubsets=False): # use HF api to pull the git repo dir = Path(data_repo) data_dir = dir / subdir # get all files models_names = [f.split(".json")[0] for f in os.listdir(data_dir) if os.path.isfile(os.path.join(data_dir, f)) and f.endswith(".json")] # create empty dataframe to add all data to df = pd.DataFrame() # load all json data in the list models_results one by one to avoid not having the same entries for model_name in models_names: model_data = load_dataset("json", data_files=os.path.join(data_dir, model_name + ".json"), split="train") model_data = model_data.add_column("model", [model_name]) df2 = pd.DataFrame(model_data) # add to df df = pd.concat([df2, df]) return df def prep_df(df): # sort columns alphabetically df = df.reindex(sorted(df.columns), axis=1) # move column "model" to the front cols = list(df.columns) cols.insert(0, cols.pop(cols.index('model'))) df = df.loc[:, cols] # apply model_hyperlink function to column "model" df["model"] = df.apply(lambda row: model_hyperlink(f"https://huggingface.co/{row['path']}", row['model']), axis=1) df = df.drop(columns=["path"]) # select all columns except "model" and convert to score cols = df.columns.tolist() cols.remove("model") cols = [c for c in cols if "rank" not in c and "confi" not in c] df[cols] = (df[cols]*100) # move average column to the second cols = list(df.columns) cols.insert(1, cols.pop(cols.index('average'))) df = df.loc[:, cols] df = df.rename(columns={ "model": "Model", "average": "Average", "brainstorm": "Brainstorm", "open_qa": "Open QA", "closed_qa": "Closed QA", "extract": "Extract", "generation": "Generation", "rewrite": "Rewrite", "summarize": "Summarize", "classify": "Classify", "reasoning_over_numerical_data": "Reasoning Over Numerical Data", "multi-document_synthesis": "Multi-Document Synthesis", "fact_checking_or_attributed_qa": "Fact Checking or Attributed QA", }) # Format for different columns # if Score exists, round to 2 decimals # if "Average" in df.columns: # df["Average"] = np.array([f"{v:.2f}" for v in df["Average"].values]) # # round all others to 1 decimal # for col in df.columns: # if col not in ["Model", "Average"]: # # replace any df[col].values == '' with np.nan # df[col] = df[col].replace('', np.nan) # df[col] = np.array([f"{v:.1f}" for v in df[col].values]) return df def sort_by_category(df, category): new_df = df.copy() col_rank = category.lower().replace(" ", "_") + "_rank" col_confi = category.lower().replace(" ", "_") + "_confi" # sort new_df = new_df.sort_values(by=[col_rank, category], ascending=[True, False]) # move column ranking to the front cols = list(new_df.columns) cols.insert(0, cols.pop(cols.index(col_rank))) new_df = new_df.loc[:, cols] new_df = new_df.rename(columns={col_rank: "Rank"}) # move selected column to the third cols = list(new_df.columns) cols.insert(2, cols.pop(cols.index(category))) new_df = new_df.loc[:, cols] # move selected column to the fourth cols = list(new_df.columns) cols.insert(3, cols.pop(cols.index(col_confi))) new_df = new_df.loc[:, cols] new_df = new_df.rename(columns={col_confi: "95% CI"}) # drop all ranking and confidence interval new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("rank")]) new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("confi")]) return new_df