asdf / app.py
nbroad's picture
Update app.py
70e1f4f verified
from pathlib import Path
import gradio as gr
# import polars as pl
import pandas as pd
import torch
import json
from gradio import ChatMessage
import os
import matplotlib.pyplot as plt
IN_SPACE = bool(os.environ.get("SPACE_AUTHOR_NAME", False))
files = [
"./lmsys-ex38-model_oof_df.parquet",
"./lmsys-ex41-model_oof_df.parquet",
"./lmsys-ex43-model_oof_df.parquet",
"./lmsys-exp-llm-049-weight_preds.parquet",
"./lmsys-exp-llm-053-weight_preds.parquet",
"./lmsys-exp-llm-063-weight_preds.parquet",
"./lmsys-exp-llm-065-weight_preds.parquet",
"./lmsys-exp-llm-073-weight_preds.parquet",
"./lmsys-exp-llm-078-weight_preds.parquet",
"./lmsys-exp-llm-081-weight_preds.parquet",
"./lmsys-exp-llm-085-weight_preds.parquet",
"./lmsys-oof-exp2_preds.parquet",
"./lmsys-oof-exp29_preds.parquet",
]
train_filepath = "./train.parquet"
if not IN_SPACE:
files = [x.replace("./", "../../data/oofs/") for x in files]
train_filepath = "../../data/train.parquet"
from dotenv import load_dotenv
loaded = load_dotenv("../../.env")
print("Loaded .env file:", loaded)
HF_TOKEN = os.getenv("HF_READ_OOFS_TOKEN")
if not HF_TOKEN:
print("be sure to set HF_READ_OOFS_TOKEN in .env file")
if not Path(files[0]).exists():
from huggingface_hub import snapshot_download, login
login(token=HF_TOKEN)
snapshot_download("nbroad/lmsys-cahpp-oofs", repo_type="dataset", local_dir="./", local_dir_use_symlinks=False)
exps = {}
for f in files:
if "lmsys-exp-llm-" in f:
exp = f.split("lmsys-exp-llm-")[1].split("-")[0]
elif "lmsys-ex" in f:
exp = f.split("lmsys-ex")[1].split("-")[0]
elif "lmsys-oof-exp" in f:
exp = f.split("lmsys-oof-exp")[1].split("_")[0]
exps[f] = exp
exps[f.split("/")[-1]] = exp
def make_df():
data = {f: pd.read_parquet(f) for f in files}
for k in data.keys():
exp = exps[k]
if "0" in data[k].columns:
data[k] = data[k].rename(
columns={
"0": f"winner_model_a_prob_{exp}",
"1": f"winner_model_b_prob_{exp}",
"2": f"winner_tie_prob_{exp}",
},
)
elif "winner_tie_prob" not in data[k].columns:
data[k] = data[k].rename(
columns={
"winner_model_a": f"winner_model_a_prob_{exp}",
"winner_model_b": f"winner_model_b_prob_{exp}",
"winner_tie": f"winner_tie_prob_{exp}",
}
)
else:
data[k] = data[k].rename(
columns={
"winner_model_a_prob": f"winner_model_a_prob_{exp}",
"winner_model_b_prob": f"winner_model_b_prob_{exp}",
"winner_tie_prob": f"winner_tie_prob_{exp}",
}
)
pred_cols = [
f"winner_model_a_prob_{exp}",
f"winner_model_b_prob_{exp}",
f"winner_tie_prob_{exp}",
]
data[k] = data[k].sort_values("id")
final_columns = ["id"] + pred_cols
data[k] = data[k][final_columns]
id_col = data[files[0]].iloc[:, 0]
joined = pd.concat([x.drop("id", axis=1) for x in data.values()], axis=1)
joined["id"] = id_col
tdf = pd.read_parquet(train_filepath)
joined = joined.merge(tdf, on="id", how="left")
joined["winner"] = ""
joined.loc[joined["winner_model_a"] == 1, "winner"] = "A"
joined.loc[joined["winner_model_b"] == 1, "winner"] = "B"
joined.loc[joined["winner_tie"] == 1, "winner"] = "Tie"
for exp in exps.values():
pred_cols = [
f"winner_model_a_prob_{exp}",
f"winner_model_b_prob_{exp}",
f"winner_tie_prob_{exp}",
]
temp_scores = joined[pred_cols].values
if temp_scores.sum(axis=-1).max() > 1.1:
temp_scores = torch.tensor(temp_scores).softmax(-1)
else:
temp_scores = torch.tensor(temp_scores)
joined[pred_cols] = temp_scores.numpy()
gt_idxs = joined[
["winner_model_a", "winner_model_b", "winner_tie"]
].values.argsort()[:, -1]
temp = temp_scores[torch.arange(temp_scores.shape[0]), gt_idxs]
joined[f"loss_{exp}"] = torch.nn.functional.binary_cross_entropy(
temp, torch.ones(len(temp), dtype=torch.float64), reduction="none"
)
joined["prompt_length"] = [len(x) for x in joined["prompt"]]
joined["response_a_length"] = [len(x) for x in joined["response_a"]]
joined["response_b_length"] = [len(x) for x in joined["response_b"]]
joined["total_length"] = (
joined["prompt_length"]
+ joined["response_a_length"]
+ joined["response_b_length"]
)
loss_cols = [x for x in joined.columns if "loss" in x]
joined["avg_loss"] = joined[loss_cols].mean(axis=1)
joined["avg_winner_model_a"] = joined[
[x for x in joined.columns if "winner_model_a_prob" in x]
].mean(axis=1)
joined["avg_winner_model_b"] = joined[
[x for x in joined.columns if "winner_model_b_prob" in x]
].mean(axis=1)
joined["avg_winner_tie"] = joined[
[x for x in joined.columns if "winner_tie_prob" in x]
].mean(axis=1)
prob_cols = [x for x in joined.columns if "prob" in x]
loss_cols = [x for x in joined.columns if "loss" in x]
joined[prob_cols + loss_cols] = joined[prob_cols + loss_cols].astype("float16")
id2texts = {i: (p, a, b) for i, p, a, b in joined[["id", "prompt", "response_a", "response_b"]].values}
joined = joined.drop(columns=["prompt", "response_a", "response_b"])
return joined, id2texts
MAIN_DF, id2texts = make_df()
def filter_df(lower_limit, upper_limit, file, all_check):
if all_check or file is None or file == "":
loss_col = "avg_loss"
else:
loss_col = f"loss_{exps[file]}"
temp = MAIN_DF[
(MAIN_DF[loss_col] > lower_limit) & (MAIN_DF[loss_col] < upper_limit)
]
temp = temp.sort_values(loss_col, ascending=False).reset_index(drop=True)
return 0, temp
def make_chat(prompt, response, side, label):
prompts = json.loads(prompt)
responses = json.loads(response)
header = None
if side == label:
header = "βœ… Winner βœ…"
elif label == 2 or label == "Tie":
header = "🟨 Tie 🟨"
else:
header = "❌ Loser ❌"
chat = []
for p, r in zip(prompts, responses):
chat.append(
ChatMessage(
role="user",
content=header + "\n" + p,
)
)
if r is None:
r = ""
chat.append(ChatMessage(role="assistant", content=header + "\n" + r))
return chat
def show_chats(idx, df, file, all_check):
if idx is None:
return None, None
if idx >= df.shape[0]:
idx = df.shape[0] - 1
if idx < 0:
idx = 0
row = df.iloc[idx]
label = row["winner"]
id_ = row["id"]
p, a, b = id2texts[id_]
chat_a = make_chat(p, a, "A", label)
chat_b = make_chat(p, b, "B", label)
# chat_a = make_chat(row["prompt"], row["response_a"], 0, label_idx)
# chat_b = make_chat(row["prompt"], row["response_b"], 1, label_idx)
if all_check or file is None or file == "":
score_cols = ["avg_winner_model_a", "avg_winner_model_b", "avg_winner_tie"]
else:
score_cols = [
f"winner_model_a_prob_{exps[file]}",
f"winner_model_b_prob_{exps[file]}",
f"winner_tie_prob_{exps[file]}",
]
scores = row[score_cols].to_list()
if all_check or file is None or file == "":
loss_col = "avg_loss"
else:
loss_col = f"loss_{exps[file]}"
loss = row[loss_col]
# labels = ["A", "B", "Tie"]
return chat_a, chat_b, label, *scores, loss
def show_split(text):
if len(text) == 0:
gr.Markdown("## No Input Provided")
else:
for letter in text:
with gr.Row():
text = gr.Textbox(letter)
btn = gr.Button("Clear")
btn.click(lambda: gr.Textbox(value=""), None, text)
def update_plot(df, file, all_check):
print(df.columns)
print("avg_loss" in df.columns)
if all_check or file is None or file == "":
loss_col = "avg_loss"
else:
loss_col = f"loss_{exps[file]}"
return plt.hist(df[loss_col], bins=50)
with gr.Blocks() as demo:
gr.Markdown(
"""
# OOF Visualization
This is a demo for visualizing the out-of-fold predictions of a model.
It currently shows the predictions for the outputs of [this notebook](https://www.kaggle.com/code/kcotton21/lmsys-preds/notebook).
"""
)
with gr.Row():
with gr.Column():
file = gr.Dropdown(label="File", choices=[x.split("/")[-1] for x in files])
with gr.Column():
all_check = gr.Checkbox(label="Use average loss of all files", value=True)
with gr.Row():
lower_limit = gr.Slider(
label="Show samples with loss > this value", minimum=0, maximum=5, value=1
)
upper_limit = gr.Slider(
label="Show samples with loss < this value", minimum=0, maximum=5, value=5
)
# id_ = gr.Number(label="ID")
idx = gr.Number(visible=True)
hidden_df = gr.Dataframe(visible=False)
with gr.Row():
correct_label = gr.Textbox(label="Correct Label", interactive=False)
score_a = gr.Textbox(label="Model A Score", interactive=False)
score_b = gr.Textbox(label="Model B Score", interactive=False)
score_tie = gr.Textbox(label="Tie Score", interactive=False)
loss = gr.Textbox(label="Loss", interactive=False)
with gr.Row():
with gr.Column():
prev_btn = gr.Button(value="Previous")
with gr.Column():
next_btn = gr.Button(value="Next")
with gr.Row():
with gr.Column():
chat_a = gr.Chatbot(label="Model A", type="messages", height=1000)
with gr.Column():
chat_b = gr.Chatbot(label="Model B", type="messages", height=1000)
# with gr.Row():
# plot = gr.Plot()
lower_limit.change(
filter_df,
inputs=[lower_limit, upper_limit, file, all_check],
outputs=[idx, hidden_df],
)
upper_limit.change(
filter_df,
inputs=[lower_limit, upper_limit, file, all_check],
outputs=[idx, hidden_df],
)
# hidden_df.change(update_plot, [hidden_df, file, all_check], plot)
idx.change(
show_chats,
inputs=[idx, hidden_df, file, all_check],
outputs=[chat_a, chat_b, correct_label, score_a, score_b, score_tie, loss],
)
prev_btn.click(lambda x: max(0, x - 1), inputs=idx, outputs=idx)
next_btn.click(lambda x: x + 1, inputs=idx, outputs=idx)
if __name__ == "__main__":
demo.launch(debug=True)