Spaces:
Runtime error
Runtime error
Track large files with Git LFS, and expand app to include a data explorer and more length-based visualizations.
707a231
import os | |
import json | |
import tiktoken | |
from alpaca_eval import utils, metrics, annotators, constants, analyze, plotting, main | |
from alpaca_eval.metrics.glm_winrate import get_length_controlled_winrate | |
import os | |
import pandas as pd | |
import json | |
# Define the path to the top-level directory | |
TOP_LEVEL_DIRECTORY = "submodules/alpaca_eval/results" | |
# Initialize an empty dictionary to hold the model name to dataframe mapping | |
model_dataframes_outputs = {} | |
# Iterate through each subdirectory in the top-level directory | |
for model_name in os.listdir(TOP_LEVEL_DIRECTORY): | |
model_dir = os.path.join(TOP_LEVEL_DIRECTORY, model_name) | |
if os.path.isdir(model_dir): | |
model_output_file = os.path.join(model_dir, "model_outputs.json") | |
if os.path.exists(model_output_file): | |
df = pd.read_json(model_output_file) | |
df["model_name"] = model_name | |
model_dataframes_outputs[model_name] = df | |
def get_num_words(text): | |
return len(text.split()) | |
ENCODING = tiktoken.get_encoding("cl100k_base") | |
def get_num_tokens(text): | |
"""Uses tiktoken to get the number of tokens in the text.""" | |
try: | |
return len(ENCODING.encode(str(text))) | |
except: | |
breakpoint() | |
model_name_to_num_words = {} | |
model_name_to_num_tokens = {} | |
for model_name, model_dataframe in model_dataframes_outputs.items(): | |
print(f"model_name_to_num_words for {model_name}") | |
model_dataframe["model_name"] = model_name | |
model_dataframe["output_num_words"] = model_dataframe["output"].apply(get_num_words) | |
model_dataframe["output_num_tokens"] = model_dataframe["output"].apply( | |
get_num_tokens | |
) | |
model_name_to_num_words[model_name] = { | |
"mean": int(model_dataframe["output_num_words"].mean()), | |
"std": int(model_dataframe["output_num_words"].std()), | |
} | |
model_name_to_num_tokens[model_name] = { | |
"mean": int(model_dataframe["output_num_tokens"].mean()), | |
"std": int(model_dataframe["output_num_tokens"].std()), | |
} | |
num_words_df = pd.DataFrame(model_name_to_num_words).T | |
num_tokens_df = pd.DataFrame(model_name_to_num_tokens).T | |
model_name_to_win_rate = {} | |
for model_name in os.listdir(TOP_LEVEL_DIRECTORY): | |
print(f"model_name_to_win_rate for {model_name}") | |
model_dir = os.path.join(TOP_LEVEL_DIRECTORY, model_name) | |
if os.path.isdir(model_dir): | |
model_output_file = os.path.join( | |
model_dir, "weighted_alpaca_eval_gpt4_turbo", "annotations.json" | |
) | |
if os.path.exists(model_output_file): | |
model_dataframe = pd.read_json(model_output_file) | |
model_name_to_win_rate[model_name] = get_length_controlled_winrate( | |
model_dataframe | |
) | |
win_rate_df = pd.DataFrame(model_name_to_win_rate).T | |
df = num_words_df.join(win_rate_df, how="inner") | |
df = df.rename( | |
columns={ | |
"mean": "num_words_mean", | |
"std": "num_words_std", | |
} | |
) | |
df = df.join(num_tokens_df, how="inner") | |
df = df.rename( | |
columns={ | |
"mean": "num_tokens_mean", | |
"std": "num_tokens_std", | |
} | |
) | |
df["model_name"] = df.index | |
df = df[df["length_controlled_winrate"] > 25] | |
df.to_json("data/model_win_rates.jsonl", orient="records", lines=True) | |