Spaces:
Runtime error
Runtime error
# --- | |
# jupyter: | |
# jupytext: | |
# text_representation: | |
# extension: .py | |
# format_name: light | |
# format_version: '1.5' | |
# jupytext_version: 1.16.2 | |
# kernelspec: | |
# display_name: temps | |
# language: python | |
# name: temps | |
# --- | |
# # QUALITY CUTS | |
# %load_ext autoreload | |
# %autoreload 2 | |
import pandas as pd | |
import numpy as np | |
import os | |
import torch | |
from scipy import stats | |
from pathlib import Path | |
#matplotlib settings | |
from matplotlib import rcParams | |
import matplotlib.pyplot as plt | |
rcParams["mathtext.fontset"] = "stix" | |
rcParams["font.family"] = "STIXGeneral" | |
from temps.archive import Archive | |
from temps.utils import nmad, caluclate_eta | |
from temps.temps_arch import EncoderPhotometry, MeasureZ | |
from temps.temps import TempsModule | |
# ### LOAD DATA (ONLY SPECZ) | |
#define here the directory containing the photometric catalogues | |
parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
modules_dir = Path('../data/models/') | |
photoz_archive = Archive(path = parent_dir,only_zspec=True,flags_kept=[1. , 1.1, 1.4, 1.5, 2,2.1,2.4,2.5,3., 3.1, 3.4, 3.5, 4., 9. , 9.1, 9.3, 9.4, 9.5,11.1, 11.5, 12.1, 12.5, 13. , 13.1, 13.5, 14, ]) | |
f_test_specz, ferr_test_specz, specz_test ,VIS_mag_test = photoz_archive.get_testing_data() | |
# ### LOAD TRAINED MODELS AND EVALUATE PDF OF RANDOM EXAMPLES | |
# Initialize an empty dictionary to store DataFrames | |
dfs = {} | |
pzs = np.zeros(shape = (3,11016,1000)) | |
for il, lab in enumerate(['z','L15','DA']): | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict(torch.load(modules_dir / f'modelF_{lab}.pt',map_location=torch.device('cpu'))) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_{lab}.pt', map_location=torch.device('cpu'))) | |
temps_module = TempsModule(nn_features, nn_z) | |
z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(f_test_specz), | |
return_pz=True) | |
pzs[il] = pz | |
# Create a DataFrame with the desired columns | |
df = pd.DataFrame(np.c_[z, odds, specz_test], | |
columns=['z', 'odds' ,'ztarget']) | |
# Calculate additional columns or operations if needed | |
df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget) | |
# Drop any rows with NaN values | |
df = df.dropna() | |
# Assign the DataFrame to a key in the dictionary | |
dfs[lab] = df | |
# ### STATS | |
# + | |
#odds_test = [0, 0.01, 0.03, 0.05, 0.07, 0.1, 0.13, 0.15] | |
odds_test = np.arange(0,0.15,0.01) | |
df = dfs['DA'].copy() | |
zgrid = np.linspace(0, 5, 1000) | |
pz = pzs[2] | |
# - | |
diff_matrix = np.abs(df.z.values[:,None] - zgrid[None,:]) | |
idx_peak = np.argmax(pz,1) | |
idx = np.argmin(diff_matrix,1) | |
odds_cat = np.zeros(shape = (len(odds_test),len(df))) | |
for ii, odds_ in enumerate(odds_test): | |
diff_matrix_upper = np.abs((df.z.values+odds_)[:,None] - zgrid[None,:]) | |
diff_matrix_lower = np.abs((df.z.values-odds_)[:,None] - zgrid[None,:]) | |
idx = np.argmin(diff_matrix,1) | |
idx_upper = np.argmin(diff_matrix_upper,1) | |
idx_lower = np.argmin(diff_matrix_lower,1) | |
odds = [] | |
for jj in range(len(pz)): | |
odds.append(pz[jj,idx_lower[jj]:(idx_upper[jj]+1)].sum()) | |
odds_cat[ii] = np.array(odds) | |
odds_df = pd.DataFrame(odds_cat.T, columns=[f'odds_{x}' for x in odds_test]) | |
df = pd.concat([df, odds_df], axis=1) | |
# ## statistics on ODDS | |
# + | |
scatter_odds, eta_odds,xlab_odds, oddsmean = [],[],[], [] | |
for c in complenteness: | |
percentile_cutoff = df['odds'].quantile(c) | |
df_bin = df[(df.odds > percentile_cutoff)] | |
xlab_odds.append((1-c)*100) | |
oddsmean.append(np.mean(df_bin.odds)) | |
scatter_odds.append(nmad(df_bin.zwerr)) | |
eta_odds.append(caluclate_eta(df_bin)) | |
if np.round(c,1) ==0.3: | |
percentiles_cutoff = [df[f'odds_{col}'].quantile(c) for col in odds_test] | |
scatters_odds = [nmad(df[df[f'odds_{col}'] > percentile_cutoff].zwerr) for (col, percentile_cutoff) in zip(odds_test,percentiles_cutoff)] | |
etas_odds = [caluclate_eta(df[df[f'odds_{col}'] > percentile_cutoff]) for (col, percentile_cutoff) in zip(odds_test,percentiles_cutoff)] | |
# - | |
df_completeness = pd.DataFrame(np.c_[xlab_odds,scatter_odds, eta_odds], | |
columns = ['completeness', 'sigma_odds', 'eta_odds']) | |
# ## PLOTS | |
# + | |
# Initialize the figure and axis | |
fig, ax1 = plt.subplots(figsize=(7, 5)) | |
# First plot (Sigma) - using the left y-axis | |
color = 'crimson' | |
ax1.plot(df_completeness.completeness, | |
df_completeness.sigma_odds, | |
marker='.', | |
color=color, | |
label=r'NMAD', | |
ls='-', | |
alpha=0.5, | |
) | |
ax1.set_xlabel('Completeness', fontsize=16) | |
ax1.set_ylabel(r'NMAD [$\Delta z$]', color=color, fontsize=16) | |
ax1.tick_params(axis='x', labelsize=14) | |
ax1.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3, labelcolor=color) | |
ax1.set_xticks(np.arange(5, 101, 10)) | |
ax2 = ax1.twinx() # Create another y-axis that shares the same x-axis | |
color = 'navy' | |
ax2.plot(df_completeness.completeness, | |
df_completeness.eta_odds, | |
marker='.', | |
color=color, | |
label=r'$\eta$ [%]', | |
ls='--', | |
alpha=0.5) | |
ax2.set_ylabel(r'$\eta$ [%]', color=color, fontsize=16) | |
# Adjust notation to allow comparison | |
ax1.yaxis.get_major_formatter().set_powerlimits((0, 0)) # Adjust scientific notation for Sigma | |
ax2.yaxis.get_major_formatter().set_powerlimits((0, 0)) # Adjust scientific notation for Eta | |
ax2.tick_params(axis='x', labelsize=14) | |
ax2.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3, labelcolor=color) | |
# Final adjustments | |
fig.tight_layout() | |
fig.legend(bbox_to_anchor = [-0.18,0.75,0.5,0.2], fontsize = 14) | |
#plt.savefig('Flag_nmad_eta_sigma_comparison.pdf', bbox_inches='tight') | |
plt.show() | |
# + | |
# Initialize the figure and axis | |
fig, ax1 = plt.subplots(figsize=(7, 5)) | |
# First plot (Sigma) - using the left y-axis | |
color = 'crimson' | |
ax1.plot(odds_test, | |
scatters_odds, | |
marker='.', | |
color=color, | |
label=r'NMAD', | |
ls='-', | |
alpha=0.5, | |
) | |
ax1.set_xlabel(r'$\delta z$ (ODDS)', fontsize=16) | |
ax1.set_ylabel(r'NMAD [$\Delta z$]', color=color, fontsize=16) | |
ax1.tick_params(axis='x', labelsize=14) | |
ax1.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3, labelcolor=color) | |
ax1.set_xticks(np.arange(0,0.16,0.02)) | |
ax2 = ax1.twinx() # Create another y-axis that shares the same x-axis | |
color = 'navy' | |
ax2.plot(odds_test, | |
etas_odds, | |
marker='.', | |
color=color, | |
label=r'$\eta$ [%]', | |
ls='--', | |
alpha=0.5) | |
ax2.set_ylabel(r'$\eta$ [%]', color=color, fontsize=16) | |
# Adjust notation to allow comparison | |
ax1.yaxis.get_major_formatter().set_powerlimits((0, 0)) # Adjust scientific notation for Sigma | |
ax2.yaxis.get_major_formatter().set_powerlimits((0, 0)) # Adjust scientific notation for Eta | |
ax2.tick_params(axis='x', labelsize=14) | |
ax2.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3, labelcolor=color) | |
# Final adjustments | |
fig.tight_layout() | |
fig.legend(bbox_to_anchor = [0.10,0.75,0.5,0.2], fontsize = 14) | |
#plt.savefig('ODDS_study.pdf', bbox_inches='tight') | |
plt.show() | |
# - | |