Setup first version
Browse files- .gitignore +2 -0
- app.py +84 -0
- histos.py +194 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.vscode
|
2 |
+
__pycache__
|
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
import seaborn as sns
|
4 |
+
|
5 |
+
from datasets import load_dataset
|
6 |
+
|
7 |
+
import histos
|
8 |
+
|
9 |
+
|
10 |
+
dataset = load_dataset("cmpatino/optimal_observables", "train")
|
11 |
+
dataset_df = dataset["train"].to_pandas()
|
12 |
+
dataset_df["target"] = dataset_df["target"].map({0: "spin-OFF", 1: "spin-ON"})
|
13 |
+
|
14 |
+
|
15 |
+
def get_plot(features, n_bins):
|
16 |
+
plotting_df = dataset_df.copy()
|
17 |
+
if len(features) == 1:
|
18 |
+
fig, ax = plt.subplots()
|
19 |
+
values = [
|
20 |
+
plotting_df[plotting_df["target"] == "spin-ON"][features[0]],
|
21 |
+
plotting_df[plotting_df["target"] == "spin-OFF"][features[0]],
|
22 |
+
]
|
23 |
+
labels = ["spin-ON", "spin-OFF"]
|
24 |
+
fig = histos.ratio_hist(
|
25 |
+
processes_q=values,
|
26 |
+
hist_labels=labels,
|
27 |
+
reference_label=labels[1],
|
28 |
+
n_bins=n_bins,
|
29 |
+
hist_range=None,
|
30 |
+
title=features[0],
|
31 |
+
)
|
32 |
+
return fig
|
33 |
+
if len(features) == 2:
|
34 |
+
return sns.displot(
|
35 |
+
plotting_df,
|
36 |
+
x=features[0],
|
37 |
+
y=features[1],
|
38 |
+
hue="target",
|
39 |
+
bins=n_bins,
|
40 |
+
height=8,
|
41 |
+
aspect=1,
|
42 |
+
).fig
|
43 |
+
|
44 |
+
|
45 |
+
with gr.Blocks() as demo:
|
46 |
+
with gr.Column():
|
47 |
+
with gr.Row():
|
48 |
+
features = gr.Dropdown(
|
49 |
+
choices=dataset_df.columns.to_list(),
|
50 |
+
label="Feature",
|
51 |
+
value="m_tt",
|
52 |
+
multiselect=True,
|
53 |
+
)
|
54 |
+
n_bins = gr.Slider(
|
55 |
+
label="Number of Bins for Histogram",
|
56 |
+
value=10,
|
57 |
+
minimum=10,
|
58 |
+
maximum=100,
|
59 |
+
step=10,
|
60 |
+
)
|
61 |
+
|
62 |
+
feature_plot = gr.Plot(label="Feature's Plot")
|
63 |
+
|
64 |
+
features.change(
|
65 |
+
get_plot,
|
66 |
+
[features, n_bins],
|
67 |
+
feature_plot,
|
68 |
+
queue=False,
|
69 |
+
)
|
70 |
+
n_bins.change(
|
71 |
+
get_plot,
|
72 |
+
[features, n_bins],
|
73 |
+
feature_plot,
|
74 |
+
queue=False,
|
75 |
+
)
|
76 |
+
demo.load(
|
77 |
+
get_plot,
|
78 |
+
[features, n_bins],
|
79 |
+
feature_plot,
|
80 |
+
queue=False,
|
81 |
+
)
|
82 |
+
|
83 |
+
if __name__ == "__main__":
|
84 |
+
demo.launch()
|
histos.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Tuple
|
2 |
+
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import numpy as np
|
5 |
+
from matplotlib.figure import Figure
|
6 |
+
|
7 |
+
|
8 |
+
def hist_n_particles(q: List[int], label: str) -> Figure:
|
9 |
+
"""Generate histogram for particle counts on events.
|
10 |
+
|
11 |
+
:param q: Count per event.
|
12 |
+
:type q: List[int]
|
13 |
+
:param label: Plot title.
|
14 |
+
:type label: str
|
15 |
+
:return: Figure with histogram and histogram ratio.
|
16 |
+
:rtype: Figure
|
17 |
+
"""
|
18 |
+
|
19 |
+
fig, ax = plt.subplots(nrows=1, figsize=(15, 8))
|
20 |
+
|
21 |
+
bins, edges = np.histogram(q, bins=15, range=(0, 15))
|
22 |
+
|
23 |
+
for idx, val in enumerate(bins[::-1]):
|
24 |
+
if val > 0:
|
25 |
+
max_idx = len(bins) - idx - 1
|
26 |
+
edges = edges[:max_idx]
|
27 |
+
bins = bins[:max_idx]
|
28 |
+
break
|
29 |
+
|
30 |
+
ax.bar(edges, bins, width=1, alpha=0.6)
|
31 |
+
ax.set_title(label, fontsize=20, y=1.04)
|
32 |
+
ax.set_xticks(edges)
|
33 |
+
|
34 |
+
return fig
|
35 |
+
|
36 |
+
|
37 |
+
def hist_var(q: List[float], ax: plt.Axes, **kwargs) -> plt.Axes:
|
38 |
+
"""Create histogram with error bars.
|
39 |
+
|
40 |
+
:param q: Values to create histogram.
|
41 |
+
:type q: List[float]
|
42 |
+
:param ax: Axes in which histrogram is plotted.
|
43 |
+
:type ax: plt.Axes
|
44 |
+
:return: Axes with histogram.
|
45 |
+
:rtype: plt.Axes
|
46 |
+
"""
|
47 |
+
|
48 |
+
bins, edges, _ = ax.hist(
|
49 |
+
q, alpha=0.6, histtype="step", align="left", linewidth=4, **kwargs
|
50 |
+
)
|
51 |
+
errors = np.sqrt(bins)
|
52 |
+
bin_width = edges[1] - edges[0]
|
53 |
+
|
54 |
+
ax.bar(
|
55 |
+
x=edges[:-1],
|
56 |
+
bottom=bins,
|
57 |
+
height=errors,
|
58 |
+
width=bin_width,
|
59 |
+
alpha=0.0,
|
60 |
+
color="w",
|
61 |
+
hatch="/",
|
62 |
+
)
|
63 |
+
ax.bar(
|
64 |
+
x=edges[:-1],
|
65 |
+
bottom=bins,
|
66 |
+
height=-errors,
|
67 |
+
width=bin_width,
|
68 |
+
alpha=0.0,
|
69 |
+
color="w",
|
70 |
+
hatch="/",
|
71 |
+
)
|
72 |
+
return ax
|
73 |
+
|
74 |
+
|
75 |
+
def ratio_hist(
|
76 |
+
processes_q: List[List[float]],
|
77 |
+
hist_labels: List[str],
|
78 |
+
reference_label: str,
|
79 |
+
n_bins: int,
|
80 |
+
hist_range: Tuple[int, int],
|
81 |
+
title: str,
|
82 |
+
figsize=(15, 8),
|
83 |
+
) -> Figure:
|
84 |
+
"""Generate histrograms with ratio pad
|
85 |
+
|
86 |
+
:param processes_q: Quantity for each event and process
|
87 |
+
:type processes_q: List[List[float]]
|
88 |
+
:param hist_labels: Labels for each process
|
89 |
+
:type hist_labels: List[str]
|
90 |
+
:param reference_label: Label of process taken as the denominator of ratios
|
91 |
+
:type reference_label: str
|
92 |
+
:param n_bins: Number of bins for histograms
|
93 |
+
:type n_bins: int
|
94 |
+
:param hist_range: Range for histogram bins
|
95 |
+
:type hist_range: Tuple[int]
|
96 |
+
:param title: Plot title
|
97 |
+
:type title: str
|
98 |
+
:param figsize: Figure size for output plot
|
99 |
+
:type figsize: Tuple[int]
|
100 |
+
:return: Figure with histogram and histogram ratio.
|
101 |
+
:rtype: Figure
|
102 |
+
"""
|
103 |
+
|
104 |
+
fig, ax = plt.subplots(
|
105 |
+
nrows=len(processes_q),
|
106 |
+
ncols=1,
|
107 |
+
gridspec_kw={"height_ratios": [3] + [1] * (len(processes_q) - 1)},
|
108 |
+
sharex=True,
|
109 |
+
figsize=figsize,
|
110 |
+
)
|
111 |
+
legends = []
|
112 |
+
|
113 |
+
p_bins = {}
|
114 |
+
p_edges = {}
|
115 |
+
p_errors = {}
|
116 |
+
edges = None
|
117 |
+
for p, label in zip(processes_q, hist_labels):
|
118 |
+
bins = n_bins
|
119 |
+
if edges is not None:
|
120 |
+
bins = edges
|
121 |
+
bins, edges, _ = ax[0].hist(
|
122 |
+
x=p,
|
123 |
+
bins=bins,
|
124 |
+
range=hist_range,
|
125 |
+
fill=False,
|
126 |
+
label=label,
|
127 |
+
align="left",
|
128 |
+
histtype="step",
|
129 |
+
linewidth=4,
|
130 |
+
)
|
131 |
+
p_bins[label] = bins
|
132 |
+
p_edges[label] = edges
|
133 |
+
p_errors[label] = np.sqrt(bins)
|
134 |
+
legends += [label]
|
135 |
+
|
136 |
+
bin_width = edges[1] - edges[0]
|
137 |
+
|
138 |
+
for label in hist_labels:
|
139 |
+
ax[0].bar(
|
140 |
+
x=p_edges[label][:-1],
|
141 |
+
bottom=p_bins[label],
|
142 |
+
height=p_errors[label],
|
143 |
+
width=bin_width,
|
144 |
+
alpha=0.0,
|
145 |
+
color="w",
|
146 |
+
hatch="/",
|
147 |
+
)
|
148 |
+
ax[0].bar(
|
149 |
+
x=p_edges[label][:-1],
|
150 |
+
bottom=p_bins[label],
|
151 |
+
height=-p_errors[label],
|
152 |
+
width=bin_width,
|
153 |
+
alpha=0.0,
|
154 |
+
color="w",
|
155 |
+
hatch="/",
|
156 |
+
)
|
157 |
+
legends += ["_", "_"]
|
158 |
+
|
159 |
+
ax[0].set_ylabel("Events", fontsize=15)
|
160 |
+
ax[0].set_title(title, fontsize=20)
|
161 |
+
legends[-1] = "Stat. Uncertainty"
|
162 |
+
ax[0].legend(legends)
|
163 |
+
|
164 |
+
plot_idx = 1
|
165 |
+
ref_bins = p_bins[reference_label]
|
166 |
+
ref_edges = p_edges[reference_label]
|
167 |
+
ref_frac_error = p_errors[reference_label] / ref_bins
|
168 |
+
for label in hist_labels:
|
169 |
+
if label == reference_label:
|
170 |
+
continue
|
171 |
+
ratios = p_bins[label] / ref_bins
|
172 |
+
error_ratio = ratios * (ref_frac_error + p_errors[label] / p_bins[label])
|
173 |
+
|
174 |
+
ax[plot_idx].bar(
|
175 |
+
bottom=1.0,
|
176 |
+
height=error_ratio,
|
177 |
+
x=ref_edges[:-1],
|
178 |
+
width=bin_width,
|
179 |
+
alpha=0.3,
|
180 |
+
color="blue",
|
181 |
+
)
|
182 |
+
ax[plot_idx].bar(
|
183 |
+
bottom=1.0,
|
184 |
+
height=-error_ratio,
|
185 |
+
x=ref_edges[:-1],
|
186 |
+
width=bin_width,
|
187 |
+
alpha=0.3,
|
188 |
+
color="blue",
|
189 |
+
)
|
190 |
+
ax[plot_idx].scatter(ref_edges[:-1], ratios, marker="o", color="black")
|
191 |
+
ax[plot_idx].set_ylabel(f"{label}/{reference_label}")
|
192 |
+
plot_idx += 1
|
193 |
+
|
194 |
+
return fig
|