lauracabayol commited on
Commit
94ab0b0
·
1 Parent(s): 1ef970d

notebook to generate tables

Browse files
Files changed (1) hide show
  1. temps/utils.py +55 -0
temps/utils.py CHANGED
@@ -166,3 +166,58 @@ def compute_kernel(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num=5):
166
 
167
  return kernel_matrix
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  return kernel_matrix
168
 
169
+
170
+ def select_cut(df,
171
+ completenss_lim=None,
172
+ nmad_lim = None,
173
+ outliers_lim=None,
174
+ return_df=False):
175
+
176
+
177
+ if (completenss_lim is None)&(nmad_lim is None)&(outliers_lim is None):
178
+ raise(ValueError("Select at least one cut"))
179
+ elif sum(c is not None for c in [completenss_lim, nmad_lim, outliers_lim]) > 1:
180
+ raise ValueError("Select only one cut at a time")
181
+
182
+ else:
183
+ bin_edges = stats.mstats.mquantiles(df.zflag, np.arange(0,1.01,0.1))
184
+ scatter, eta, cmptnss, nobj = [],[],[], []
185
+
186
+ for k in range(len(bin_edges)-1):
187
+ edge_min = bin_edges[k]
188
+ edge_max = bin_edges[k+1]
189
+
190
+ df_bin = df[(df.zflag > edge_min)]
191
+
192
+
193
+ cmptnss.append(np.round(len(df_bin)/len(df),2)*100)
194
+ scatter.append(nmad(df_bin.zwerr))
195
+ eta.append(len(df_bin[np.abs(df_bin.zwerr)>0.15])/len(df_bin)*100)
196
+ nobj.append(len(df_bin))
197
+
198
+ dfcuts = pd.DataFrame(data=np.c_[np.round(bin_edges[:-1],5), np.round(nobj,1), np.round(cmptnss,1), np.round(scatter,3), np.round(eta,2)], columns=['flagcut', 'Nobj','completeness', 'nmad', 'eta'])
199
+
200
+ if completenss_lim is not None:
201
+ print('Selecting cut based on completeness')
202
+ selected_cut = dfcuts[dfcuts['completeness'] <= completenss_lim].iloc[0]
203
+
204
+
205
+ elif nmad_lim is not None:
206
+ print('Selecting cut based on nmad')
207
+ selected_cut = dfcuts[dfcuts['nmad'] <= nmad_lim].iloc[0]
208
+
209
+
210
+ elif outliers_lim is not None:
211
+ print('Selecting cut based on outliers')
212
+ selected_cut = dfcuts[dfcuts['eta'] <= outliers_lim].iloc[0]
213
+
214
+
215
+ print(f"This cut provides completeness of {selected_cut['completeness']}, nmad={selected_cut['nmad']} and eta={selected_cut['eta']}")
216
+
217
+ df_cut = df[(df.zflag > selected_cut['flagcut'])]
218
+ if return_df==True:
219
+ return df_cut, selected_cut['flagcut'], dfcuts
220
+ else:
221
+ return selected_cut['flagcut'], dfcuts
222
+
223
+