|
import gradio as gr |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
from sklearn import metrics |
|
from datasets import load_dataset |
|
|
|
import histos |
|
|
|
dataset = load_dataset("cmpatino/optimal_observables", "train") |
|
dataset_df = dataset["train"].to_pandas() |
|
dataset_df["target"] = dataset_df["target"].map({0: "spin-OFF", 1: "spin-ON"}) |
|
|
|
|
|
def get_roc_auc_scores(pos_samples, neg_samples): |
|
y_score = np.concatenate([pos_samples, neg_samples], axis=0) |
|
if pos_samples.mean() >= neg_samples.mean(): |
|
y_true = np.concatenate( |
|
[np.ones_like(pos_samples), np.zeros_like(neg_samples)], axis=0 |
|
) |
|
roc_auc_score = metrics.roc_auc_score(y_true, y_score) |
|
else: |
|
y_true = np.concatenate( |
|
[np.zeros_like(pos_samples), np.ones_like(neg_samples)], axis=0 |
|
) |
|
roc_auc_score = metrics.roc_auc_score(y_true, y_score) |
|
return roc_auc_score |
|
|
|
|
|
def get_plot(features, n_bins): |
|
plotting_df = dataset_df.copy() |
|
if len(features) == 1: |
|
fig, ax = plt.subplots() |
|
pos_samples = plotting_df[plotting_df["target"] == "spin-ON"][features[0]] |
|
neg_samples = plotting_df[plotting_df["target"] == "spin-OFF"][features[0]] |
|
roc_auc_score = get_roc_auc_scores(pos_samples, neg_samples) |
|
values = [ |
|
pos_samples, |
|
neg_samples, |
|
] |
|
labels = ["spin-ON", "spin-OFF"] |
|
fig = histos.ratio_hist( |
|
processes_q=values, |
|
hist_labels=labels, |
|
reference_label=labels[1], |
|
n_bins=n_bins, |
|
hist_range=None, |
|
title=f"{features[0]} (ROC AUC: {roc_auc_score:.3f})", |
|
) |
|
return fig |
|
if len(features) == 2: |
|
fig, ax = plt.subplots(ncols=2, figsize=(12, 6)) |
|
pos_samples = plotting_df[plotting_df["target"] == "spin-ON"][features] |
|
neg_samples = plotting_df[plotting_df["target"] == "spin-OFF"][features] |
|
x_lims = ( |
|
min(pos_samples[features[0]].min(), neg_samples[features[0]].min()), |
|
max(pos_samples[features[0]].max(), neg_samples[features[0]].max()), |
|
) |
|
y_lims = ( |
|
min(pos_samples[features[1]].min(), neg_samples[features[1]].min()), |
|
max(pos_samples[features[1]].max(), neg_samples[features[1]].max()), |
|
) |
|
ranges = (x_lims, y_lims) |
|
|
|
sns.histplot( |
|
pos_samples, |
|
x=features[0], |
|
y=features[1], |
|
bins=n_bins, |
|
ax=ax[0], |
|
color="C0", |
|
binrange=ranges, |
|
) |
|
sns.histplot( |
|
neg_samples, |
|
x=features[0], |
|
y=features[1], |
|
bins=n_bins, |
|
ax=ax[1], |
|
color="C1", |
|
binrange=ranges, |
|
) |
|
ax[0].set_title("spin-ON") |
|
ax[1].set_title("spin-OFF") |
|
return fig |
|
|
|
|
|
with gr.Blocks() as demo: |
|
with gr.Tab("Plots"): |
|
with gr.Column(): |
|
with gr.Row(): |
|
features = gr.Dropdown( |
|
choices=dataset_df.columns.to_list(), |
|
label="Feature", |
|
value="m_tt", |
|
multiselect=True, |
|
) |
|
n_bins = gr.Slider( |
|
label="Number of Bins for Histogram", |
|
value=10, |
|
minimum=10, |
|
maximum=100, |
|
step=10, |
|
) |
|
|
|
feature_plot = gr.Plot(label="Feature's Plot") |
|
with gr.Tab("ROC-AUC Table"): |
|
roc_auc_values = [] |
|
for feature in dataset_df.columns.to_list(): |
|
if feature in ["target", "reco_weight"]: |
|
continue |
|
pos_samples = dataset_df[dataset_df["target"] == "spin-ON"][feature] |
|
neg_samples = dataset_df[dataset_df["target"] == "spin-OFF"][feature] |
|
roc_auc_score = get_roc_auc_scores(pos_samples, neg_samples) |
|
roc_auc_values.append([feature, roc_auc_score]) |
|
roc_auc_table = gr.Dataframe( |
|
label="ROC-AUC Table", headers=["Feature", "ROC-AUC"], value=roc_auc_values |
|
) |
|
|
|
features.change( |
|
get_plot, |
|
[features, n_bins], |
|
feature_plot, |
|
queue=False, |
|
) |
|
n_bins.change( |
|
get_plot, |
|
[features, n_bins], |
|
feature_plot, |
|
queue=False, |
|
) |
|
demo.load( |
|
get_plot, |
|
[features, n_bins], |
|
feature_plot, |
|
queue=False, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|