|
from pathlib import Path |
|
import gradio as gr |
|
|
|
import pandas as pd |
|
import torch |
|
import json |
|
from gradio import ChatMessage |
|
import os |
|
import matplotlib.pyplot as plt |
|
|
|
IN_SPACE = bool(os.environ.get("SPACE_AUTHOR_NAME", False)) |
|
|
|
files = [ |
|
"./lmsys-ex38-model_oof_df.parquet", |
|
"./lmsys-ex41-model_oof_df.parquet", |
|
"./lmsys-ex43-model_oof_df.parquet", |
|
"./lmsys-exp-llm-049-weight_preds.parquet", |
|
"./lmsys-exp-llm-053-weight_preds.parquet", |
|
"./lmsys-exp-llm-063-weight_preds.parquet", |
|
"./lmsys-exp-llm-065-weight_preds.parquet", |
|
"./lmsys-exp-llm-073-weight_preds.parquet", |
|
"./lmsys-exp-llm-078-weight_preds.parquet", |
|
"./lmsys-exp-llm-081-weight_preds.parquet", |
|
"./lmsys-exp-llm-085-weight_preds.parquet", |
|
"./lmsys-oof-exp2_preds.parquet", |
|
"./lmsys-oof-exp29_preds.parquet", |
|
] |
|
train_filepath = "./train.parquet" |
|
|
|
if not IN_SPACE: |
|
files = [x.replace("./", "../../data/oofs/") for x in files] |
|
train_filepath = "../../data/train.parquet" |
|
from dotenv import load_dotenv |
|
loaded = load_dotenv("../../.env") |
|
print("Loaded .env file:", loaded) |
|
|
|
HF_TOKEN = os.getenv("HF_READ_OOFS_TOKEN") |
|
|
|
if not HF_TOKEN: |
|
print("be sure to set HF_READ_OOFS_TOKEN in .env file") |
|
|
|
if not Path(files[0]).exists(): |
|
from huggingface_hub import snapshot_download, login |
|
|
|
login(token=HF_TOKEN) |
|
|
|
snapshot_download("nbroad/lmsys-cahpp-oofs", repo_type="dataset", local_dir="./", local_dir_use_symlinks=False) |
|
|
|
|
|
exps = {} |
|
|
|
for f in files: |
|
if "lmsys-exp-llm-" in f: |
|
exp = f.split("lmsys-exp-llm-")[1].split("-")[0] |
|
elif "lmsys-ex" in f: |
|
exp = f.split("lmsys-ex")[1].split("-")[0] |
|
elif "lmsys-oof-exp" in f: |
|
exp = f.split("lmsys-oof-exp")[1].split("_")[0] |
|
exps[f] = exp |
|
exps[f.split("/")[-1]] = exp |
|
|
|
|
|
def make_df(): |
|
data = {f: pd.read_parquet(f) for f in files} |
|
|
|
for k in data.keys(): |
|
exp = exps[k] |
|
|
|
if "0" in data[k].columns: |
|
data[k] = data[k].rename( |
|
columns={ |
|
"0": f"winner_model_a_prob_{exp}", |
|
"1": f"winner_model_b_prob_{exp}", |
|
"2": f"winner_tie_prob_{exp}", |
|
}, |
|
) |
|
|
|
elif "winner_tie_prob" not in data[k].columns: |
|
|
|
data[k] = data[k].rename( |
|
columns={ |
|
"winner_model_a": f"winner_model_a_prob_{exp}", |
|
"winner_model_b": f"winner_model_b_prob_{exp}", |
|
"winner_tie": f"winner_tie_prob_{exp}", |
|
} |
|
) |
|
else: |
|
data[k] = data[k].rename( |
|
columns={ |
|
"winner_model_a_prob": f"winner_model_a_prob_{exp}", |
|
"winner_model_b_prob": f"winner_model_b_prob_{exp}", |
|
"winner_tie_prob": f"winner_tie_prob_{exp}", |
|
} |
|
) |
|
|
|
pred_cols = [ |
|
f"winner_model_a_prob_{exp}", |
|
f"winner_model_b_prob_{exp}", |
|
f"winner_tie_prob_{exp}", |
|
] |
|
|
|
data[k] = data[k].sort_values("id") |
|
|
|
final_columns = ["id"] + pred_cols |
|
|
|
data[k] = data[k][final_columns] |
|
|
|
id_col = data[files[0]].iloc[:, 0] |
|
|
|
joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1) |
|
|
|
|
|
joined["id"] = id_col |
|
|
|
tdf = pd.read_parquet(train_filepath) |
|
|
|
joined = joined.merge(tdf, on="id", how="left") |
|
|
|
joined["winner"] = "" |
|
joined.loc[joined["winner_model_a"] == 1, "winner"] = "A" |
|
joined.loc[joined["winner_model_b"] == 1, "winner"] = "B" |
|
joined.loc[joined["winner_tie"] == 1, "winner"] = "Tie" |
|
|
|
for exp in exps.values(): |
|
pred_cols = [ |
|
f"winner_model_a_prob_{exp}", |
|
f"winner_model_b_prob_{exp}", |
|
f"winner_tie_prob_{exp}", |
|
] |
|
|
|
temp_scores = joined[pred_cols].values |
|
|
|
if temp_scores.sum(axis=-1).max() > 1.1: |
|
temp_scores = torch.tensor(temp_scores).softmax(-1) |
|
else: |
|
temp_scores = torch.tensor(temp_scores) |
|
|
|
joined[pred_cols] = temp_scores.numpy() |
|
|
|
gt_idxs = joined[ |
|
["winner_model_a", "winner_model_b", "winner_tie"] |
|
].values.argsort()[:, -1] |
|
temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs] |
|
|
|
joined[f"loss_{exp}"] = torch.nn.functional.binary_cross_entropy( |
|
temp, torch.ones(len(temp), dtype=torch.float64), reduction="none" |
|
) |
|
|
|
joined["prompt_length"] = [len(x) for x in joined["prompt"]] |
|
joined["response_a_length"] = [len(x) for x in joined["response_a"]] |
|
joined["response_b_length"] = [len(x) for x in joined["response_b"]] |
|
joined["total_length"] = ( |
|
joined["prompt_length"] |
|
+ joined["response_a_length"] |
|
+ joined["response_b_length"] |
|
) |
|
|
|
loss_cols = [x for x in joined.columns if "loss" in x] |
|
joined["avg_loss"] = joined[loss_cols].mean(axis=1) |
|
joined["avg_winner_model_a"] = joined[ |
|
[x for x in joined.columns if "winner_model_a_prob" in x] |
|
].mean(axis=1) |
|
joined["avg_winner_model_b"] = joined[ |
|
[x for x in joined.columns if "winner_model_b_prob" in x] |
|
].mean(axis=1) |
|
joined["avg_winner_tie"] = joined[ |
|
[x for x in joined.columns if "winner_tie_prob" in x] |
|
].mean(axis=1) |
|
|
|
prob_cols = [x for x in joined.columns if "prob" in x] |
|
loss_cols = [x for x in joined.columns if "loss" in x] |
|
|
|
joined[prob_cols + loss_cols] = joined[prob_cols + loss_cols].astype("float16") |
|
|
|
id2texts = {i: (p, a, b) for i, p, a, b in joined[["id", "prompt", "response_a", "response_b"]].values} |
|
|
|
joined = joined.drop(columns=["prompt", "response_a", "response_b"]) |
|
|
|
return joined, id2texts |
|
|
|
|
|
MAIN_DF, id2texts = make_df() |
|
|
|
|
|
def filter_df(lower_limit, upper_limit, file, all_check): |
|
if all_check or file is None or file == "": |
|
loss_col = "avg_loss" |
|
else: |
|
loss_col = f"loss_{exps[file]}" |
|
|
|
temp = MAIN_DF[ |
|
(MAIN_DF[loss_col] > lower_limit) & (MAIN_DF[loss_col] < upper_limit) |
|
] |
|
temp = temp.sort_values(loss_col, ascending=False).reset_index(drop=True) |
|
|
|
return 0, temp |
|
|
|
|
|
def make_chat(prompt, response, side, label): |
|
prompts = json.loads(prompt) |
|
responses = json.loads(response) |
|
|
|
header = None |
|
if side == label: |
|
header = "β
Winner β
" |
|
elif label == 2 or label == "Tie": |
|
header = "π¨ Tie π¨" |
|
else: |
|
header = "β Loser β" |
|
|
|
chat = [] |
|
for p, r in zip(prompts, responses): |
|
chat.append( |
|
ChatMessage( |
|
role="user", |
|
content=header + "\n" + p, |
|
) |
|
) |
|
|
|
if r is None: |
|
r = "" |
|
|
|
chat.append(ChatMessage(role="assistant", content=header + "\n" + r)) |
|
|
|
return chat |
|
|
|
|
|
def show_chats(idx, df, file, all_check): |
|
if idx is None: |
|
return None, None |
|
|
|
if idx >= df.shape[0]: |
|
idx = df.shape[0] - 1 |
|
if idx < 0: |
|
idx = 0 |
|
|
|
row = df.iloc[idx] |
|
label = row["winner"] |
|
|
|
id_ = row["id"] |
|
|
|
p, a, b = id2texts[id_] |
|
|
|
chat_a = make_chat(p, a, "A", label) |
|
chat_b = make_chat(p, b, "B", label) |
|
|
|
|
|
|
|
|
|
if all_check or file is None or file == "": |
|
score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"] |
|
else: |
|
score_cols = [ |
|
f"winner_model_a_prob_{exps[file]}", |
|
f"winner_model_b_prob_{exps[file]}", |
|
f"winner_tie_prob_{exps[file]}", |
|
] |
|
|
|
scores = row[score_cols].to_list() |
|
|
|
if all_check or file is None or file == "": |
|
loss_col = "avg_loss" |
|
else: |
|
loss_col = f"loss_{exps[file]}" |
|
|
|
loss = row[loss_col] |
|
|
|
|
|
|
|
return chat_a, chat_b, label, *scores, loss |
|
|
|
|
|
def show_split(text): |
|
if len(text) == 0: |
|
gr.Markdown("## No Input Provided") |
|
else: |
|
for letter in text: |
|
with gr.Row(): |
|
text = gr.Textbox(letter) |
|
btn = gr.Button("Clear") |
|
btn.click(lambda: gr.Textbox(value=""), None, text) |
|
|
|
def update_plot(df, file, all_check): |
|
|
|
print(df.columns) |
|
|
|
print("avg_loss" in df.columns) |
|
|
|
if all_check or file is None or file == "": |
|
loss_col = "avg_loss" |
|
else: |
|
loss_col = f"loss_{exps[file]}" |
|
|
|
return plt.hist(df[loss_col], bins=50) |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
gr.Markdown( |
|
""" |
|
# OOF Visualization |
|
|
|
This is a demo for visualizing the out-of-fold predictions of a model. |
|
It currently shows the predictions for the outputs of [this notebook](https://www.kaggle.com/code/kcotton21/lmsys-preds/notebook). |
|
""" |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files]) |
|
with gr.Column(): |
|
all_check = gr.Checkbox(label="Use average loss of all files", value=True) |
|
with gr.Row(): |
|
lower_limit = gr.Slider( |
|
label="Show samples with loss > this value", minimum=0, maximum=5, value=1 |
|
) |
|
upper_limit = gr.Slider( |
|
label="Show samples with loss < this value", minimum=0, maximum=5, value=5 |
|
) |
|
|
|
|
|
idx = gr.Number(visible=True) |
|
hidden_df = gr.Dataframe(visible=False) |
|
with gr.Row(): |
|
correct_label = gr.Textbox(label="Correct Label", interactive=False) |
|
score_a = gr.Textbox(label="Model A Score", interactive=False) |
|
score_b = gr.Textbox(label="Model B Score", interactive=False) |
|
score_tie = gr.Textbox(label="Tie Score", interactive=False) |
|
loss = gr.Textbox(label="Loss", interactive=False) |
|
with gr.Row(): |
|
with gr.Column(): |
|
prev_btn = gr.Button(value="Previous") |
|
with gr.Column(): |
|
next_btn = gr.Button(value="Next") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
chat_a = gr.Chatbot(label="Model A", type="messages", height=1000) |
|
with gr.Column(): |
|
chat_b = gr.Chatbot(label="Model B", type="messages", height=1000) |
|
|
|
|
|
|
|
|
|
lower_limit.change( |
|
filter_df, |
|
inputs=[lower_limit, upper_limit, file, all_check], |
|
outputs=[idx, hidden_df], |
|
) |
|
upper_limit.change( |
|
filter_df, |
|
inputs=[lower_limit, upper_limit, file, all_check], |
|
outputs=[idx, hidden_df], |
|
) |
|
|
|
|
|
|
|
idx.change( |
|
show_chats, |
|
inputs=[idx, hidden_df, file, all_check], |
|
outputs=[chat_a, chat_b, correct_label, score_a, score_b, score_tie, loss], |
|
) |
|
prev_btn.click(lambda x: max(0, x - 1), inputs=idx, outputs=idx) |
|
next_btn.click(lambda x: x + 1, inputs=idx, outputs=idx) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True) |