Spaces:
Runtime error
Runtime error
File size: 5,983 Bytes
8ea42fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import pandas as pd
from utils import model_hyperlink
def add_model_readme(df):
# write model ids to README.md
with open("README.md", "r") as f:
lines = f.readlines()
links = df["Links"].astype(str)
for link in links:
try:
model_id = link.split(".co/")[1]
# verify line doesn't exist
if f"- {model_id}\n" in lines:
continue
lines.insert(-1, f"- {model_id}\n")
except IndexError:
print(f"link {link} is not valid")
with open("README.md", "w") as f:
f.writelines(lines)
df = pd.read_csv("data/raw_scores.csv")
COLS = df.columns.to_list()
# add column models_query with same values a smodels at the end of columns
df.insert(len(COLS), "models_query", df["Models"])
print(f"all cols {df.columns.to_list()}")
# average score
mean_columns = df.iloc[:,5:-3]
# print cols in mean_columns
print("cols", mean_columns.columns.to_list())
df.insert(len(mean_columns.columns.to_list()), "Average score", mean_columns.mean(axis=1).round(2))
# add win rate columns for each language
old_size = len(df.columns)
for col in df.columns[6:-2]:
df[col + " rank"] = df[col].rank(ascending=False)
df[col + " rank"] = len(df) - (df[col + " rank"] - 1)
df["Win Rate"] = df.iloc[:, old_size:].mean(axis=1).round(2)
df = df.drop(df.columns[old_size:-1], axis=1)
df = df[["Models", "Size (B)", "Win Rate"] + df.columns[2:-1].tolist()]
# sort with regard to column win rate
df = df.sort_values(by=["Win Rate"], ascending=False)
# add column with model links as https://huggingface.co/WizardLM/WizardCoder-15B-V1.0, https://huggingface.co/bigcode/starcoder, https://huggingface.co/bigcode/starcoderbase, https://huggingface.co/bigcode/starcoderbase-7b,
# https://huggingface.co/bigcode/starcoderbase-3b, https://huggingface.co/bigcode/starcoderbase-1b, https://huggingface.co/bigcode/santacoder, https://huggingface.co/replit/replit-code-v1-3b, https://huggingface.co/THUDM/codegeex2-6b
links = {
"WizardCoder-15B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-15B-V1.0",
"WizardCoder-3B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-3B-V1.0",
"WizardCoder-1B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-1B-V1.0",
"WizardCoder-Python-34B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0",
"WizardCoder-Python-13B-V1.0": "https://huggingface.co/WizardLM/WizardCoder-Python-13B-V1.0",
"OctoCoder-15B": "https://huggingface.co/bigcode/octocoder",
"OctoGeeX-7B": "https://huggingface.co/bigcode/octogeex",
"StableCode-3B": "https://huggingface.co/stabilityai/stablecode-completion-alpha-3b",
"StarCoder-15B": "https://huggingface.co/bigcode/starcoder",
"StarCoderBase-15B": "https://huggingface.co/bigcode/starcoderbase",
"StarCoderBase-7B": "https://huggingface.co/bigcode/starcoderbase-7b",
"StarCoderBase-3B": "https://huggingface.co/bigcode/starcoderbase-3b",
"StarCoderBase-1.1B": "https://huggingface.co/bigcode/starcoderbase-1b",
"SantaCoder-1.1B": "https://huggingface.co/bigcode/santacoder",
"Replit-2.7B": "https://huggingface.co/replit/replit-code-v1-3b",
"CodeGeex2-6B": "https://huggingface.co/THUDM/codegeex2-6b",
"CodeGen25-7B-multi": "https://huggingface.co/Salesforce/codegen25-7b-multi",
"CodeGen25-7B-mono": "https://huggingface.co/Salesforce/codegen25-7b-mono",
"CodeGen-16B-Multi": "https://huggingface.co/Salesforce/codegen-16B-multi",
"DeciCoder-1B": "https://huggingface.co/Deci/DeciCoder-1b",
"Phind-CodeLlama-34B-v1": "https://huggingface.co/phind/Phind-CodeLlama-34B-v1",
"Phind-CodeLlama-34B-Python-v1": "https://huggingface.co/phind/Phind-CodeLlama-34B-Python-v1",
"Phind-CodeLlama-34B-v2": "https://huggingface.co/phind/Phind-CodeLlama-34B-v2",
"Falcon-180B": "https://huggingface.co/tiiuae/falcon-180B",
"Refact-1.6B": "https://huggingface.co/smallcloudai/Refact-1_6B-fim",
"Phi-1": "https://huggingface.co/microsoft/phi-1",
"CodeShell-7B": "https://huggingface.co/WisdomShell/CodeShell-7B",
"DeepSeek-Coder-1b-base": "https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base",
"DeepSeek-Coder-7b-base": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base",
"DeepSeek-Coder-33b-base": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-base",
}
codellamas = ['CodeLlama-7b', 'CodeLlama-7b-Python', 'CodeLlama-7b-Instruct', 'CodeLlama-13b', 'CodeLlama-13b-Python', 'CodeLlama-13b-Instruct', 'CodeLlama-34b', 'CodeLlama-34b-Python', 'CodeLlama-34b-Instruct']
for codellama in codellamas:
links[codellama] = f"https://huggingface.co/codellama/{codellama}-hf"
df["Links"] = df["Models"].map(links)
df.insert(0, "T", "🟢")
patterns = ["WizardCoder", "Octo", "Instruct", "Phind", "Refact"]
df.loc[df["Models"].str.contains('|'.join(patterns)), "T"] = "🔶"
df.loc[df["Models"].str.contains('|'.join(patterns)), "T"] = "🔶"
df.loc[df["Models"].str.contains('|'.join(["CodeShell", "DeepSeek"])), "T"] = "🔴"
# add clumn submission_pr with empty fiels except for CodeShell with link AA
df["Submission PR"] = ""
df.loc[df["Models"].str.contains('|'.join(["CodeShell"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/16"
df.loc[df["Models"].str.contains('|'.join(["DeepSeek-Coder-1b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/33"
df.loc[df["Models"].str.contains('|'.join(["DeepSeek-Coder-7b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/32"
df.loc[df["Models"].str.contains('|'.join(["DeepSeek-Coder-33b-base"])), "Submission PR"] = "https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/discussions/31"
# print first 5 rows and 10 cols
print(df.iloc[:5, :-1])
df.to_csv("data/code_eval_board.csv", index=False)
# fill readme
add_model_readme(df)
print("Readme filled")
|