cmpatino's picture
Include ROC-AUC table and improve 2D plots
b7b95ef
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from datasets import load_dataset
import histos
dataset = load_dataset("cmpatino/optimal_observables", "train")
dataset_df = dataset["train"].to_pandas()
dataset_df["target"] = dataset_df["target"].map({0: "spin-OFF", 1: "spin-ON"})
def get_roc_auc_scores(pos_samples, neg_samples):
y_score = np.concatenate([pos_samples, neg_samples], axis=0)
if pos_samples.mean() >= neg_samples.mean():
y_true = np.concatenate(
[np.ones_like(pos_samples), np.zeros_like(neg_samples)], axis=0
)
roc_auc_score = metrics.roc_auc_score(y_true, y_score)
else:
y_true = np.concatenate(
[np.zeros_like(pos_samples), np.ones_like(neg_samples)], axis=0
)
roc_auc_score = metrics.roc_auc_score(y_true, y_score)
return roc_auc_score
def get_plot(features, n_bins):
plotting_df = dataset_df.copy()
if len(features) == 1:
fig, ax = plt.subplots()
pos_samples = plotting_df[plotting_df["target"] == "spin-ON"][features[0]]
neg_samples = plotting_df[plotting_df["target"] == "spin-OFF"][features[0]]
roc_auc_score = get_roc_auc_scores(pos_samples, neg_samples)
values = [
pos_samples,
neg_samples,
]
labels = ["spin-ON", "spin-OFF"]
fig = histos.ratio_hist(
processes_q=values,
hist_labels=labels,
reference_label=labels[1],
n_bins=n_bins,
hist_range=None,
title=f"{features[0]} (ROC AUC: {roc_auc_score:.3f})",
)
return fig
if len(features) == 2:
fig, ax = plt.subplots(ncols=2, figsize=(12, 6))
pos_samples = plotting_df[plotting_df["target"] == "spin-ON"][features]
neg_samples = plotting_df[plotting_df["target"] == "spin-OFF"][features]
x_lims = (
min(pos_samples[features[0]].min(), neg_samples[features[0]].min()),
max(pos_samples[features[0]].max(), neg_samples[features[0]].max()),
)
y_lims = (
min(pos_samples[features[1]].min(), neg_samples[features[1]].min()),
max(pos_samples[features[1]].max(), neg_samples[features[1]].max()),
)
ranges = (x_lims, y_lims)
sns.histplot(
pos_samples,
x=features[0],
y=features[1],
bins=n_bins,
ax=ax[0],
color="C0",
binrange=ranges,
)
sns.histplot(
neg_samples,
x=features[0],
y=features[1],
bins=n_bins,
ax=ax[1],
color="C1",
binrange=ranges,
)
ax[0].set_title("spin-ON")
ax[1].set_title("spin-OFF")
return fig
with gr.Blocks() as demo:
with gr.Tab("Plots"):
with gr.Column():
with gr.Row():
features = gr.Dropdown(
choices=dataset_df.columns.to_list(),
label="Feature",
value="m_tt",
multiselect=True,
)
n_bins = gr.Slider(
label="Number of Bins for Histogram",
value=10,
minimum=10,
maximum=100,
step=10,
)
feature_plot = gr.Plot(label="Feature's Plot")
with gr.Tab("ROC-AUC Table"):
roc_auc_values = []
for feature in dataset_df.columns.to_list():
if feature in ["target", "reco_weight"]:
continue
pos_samples = dataset_df[dataset_df["target"] == "spin-ON"][feature]
neg_samples = dataset_df[dataset_df["target"] == "spin-OFF"][feature]
roc_auc_score = get_roc_auc_scores(pos_samples, neg_samples)
roc_auc_values.append([feature, roc_auc_score])
roc_auc_table = gr.Dataframe(
label="ROC-AUC Table", headers=["Feature", "ROC-AUC"], value=roc_auc_values
)
features.change(
get_plot,
[features, n_bins],
feature_plot,
queue=False,
)
n_bins.change(
get_plot,
[features, n_bins],
feature_plot,
queue=False,
)
demo.load(
get_plot,
[features, n_bins],
feature_plot,
queue=False,
)
if __name__ == "__main__":
demo.launch()