# --- # jupyter: # jupytext: # text_representation: # extension: .py # format_name: light # format_version: '1.5' # jupytext_version: 1.16.2 # kernelspec: # display_name: temps # language: python # name: temps # --- # # QUALITY CUTS # %load_ext autoreload # %autoreload 2 import pandas as pd import numpy as np import os import torch from scipy import stats from pathlib import Path #matplotlib settings from matplotlib import rcParams import matplotlib.pyplot as plt rcParams["mathtext.fontset"] = "stix" rcParams["font.family"] = "STIXGeneral" from temps.archive import Archive from temps.utils import nmad, caluclate_eta from temps.temps_arch import EncoderPhotometry, MeasureZ from temps.temps import TempsModule # ### LOAD DATA (ONLY SPECZ) #define here the directory containing the photometric catalogues parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') modules_dir = Path('../data/models/') photoz_archive = Archive(path = parent_dir,only_zspec=True,flags_kept=[1. , 1.1, 1.4, 1.5, 2,2.1,2.4,2.5,3., 3.1, 3.4, 3.5, 4., 9. , 9.1, 9.3, 9.4, 9.5,11.1, 11.5, 12.1, 12.5, 13. , 13.1, 13.5, 14, ]) f_test_specz, ferr_test_specz, specz_test ,VIS_mag_test = photoz_archive.get_testing_data() # ### LOAD TRAINED MODELS AND EVALUATE PDF OF RANDOM EXAMPLES # Initialize an empty dictionary to store DataFrames dfs = {} pzs = np.zeros(shape = (3,11016,1000)) for il, lab in enumerate(['z','L15','DA']): nn_features = EncoderPhotometry() nn_features.load_state_dict(torch.load(modules_dir / f'modelF_{lab}.pt',map_location=torch.device('cpu'))) nn_z = MeasureZ(num_gauss=6) nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_{lab}.pt', map_location=torch.device('cpu'))) temps_module = TempsModule(nn_features, nn_z) z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(f_test_specz), return_pz=True) pzs[il] = pz # Create a DataFrame with the desired columns df = pd.DataFrame(np.c_[z, odds, specz_test], columns=['z', 'odds' ,'ztarget']) # Calculate additional columns or operations if needed df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget) # Drop any rows with NaN values df = df.dropna() # Assign the DataFrame to a key in the dictionary dfs[lab] = df # ### STATS # + #odds_test = [0, 0.01, 0.03, 0.05, 0.07, 0.1, 0.13, 0.15] odds_test = np.arange(0,0.15,0.01) df = dfs['DA'].copy() zgrid = np.linspace(0, 5, 1000) pz = pzs[2] # - diff_matrix = np.abs(df.z.values[:,None] - zgrid[None,:]) idx_peak = np.argmax(pz,1) idx = np.argmin(diff_matrix,1) odds_cat = np.zeros(shape = (len(odds_test),len(df))) for ii, odds_ in enumerate(odds_test): diff_matrix_upper = np.abs((df.z.values+odds_)[:,None] - zgrid[None,:]) diff_matrix_lower = np.abs((df.z.values-odds_)[:,None] - zgrid[None,:]) idx = np.argmin(diff_matrix,1) idx_upper = np.argmin(diff_matrix_upper,1) idx_lower = np.argmin(diff_matrix_lower,1) odds = [] for jj in range(len(pz)): odds.append(pz[jj,idx_lower[jj]:(idx_upper[jj]+1)].sum()) odds_cat[ii] = np.array(odds) odds_df = pd.DataFrame(odds_cat.T, columns=[f'odds_{x}' for x in odds_test]) df = pd.concat([df, odds_df], axis=1) # ## statistics on ODDS # + scatter_odds, eta_odds,xlab_odds, oddsmean = [],[],[], [] for c in complenteness: percentile_cutoff = df['odds'].quantile(c) df_bin = df[(df.odds > percentile_cutoff)] xlab_odds.append((1-c)*100) oddsmean.append(np.mean(df_bin.odds)) scatter_odds.append(nmad(df_bin.zwerr)) eta_odds.append(caluclate_eta(df_bin)) if np.round(c,1) ==0.3: percentiles_cutoff = [df[f'odds_{col}'].quantile(c) for col in odds_test] scatters_odds = [nmad(df[df[f'odds_{col}'] > percentile_cutoff].zwerr) for (col, percentile_cutoff) in zip(odds_test,percentiles_cutoff)] etas_odds = [caluclate_eta(df[df[f'odds_{col}'] > percentile_cutoff]) for (col, percentile_cutoff) in zip(odds_test,percentiles_cutoff)] # - df_completeness = pd.DataFrame(np.c_[xlab_odds,scatter_odds, eta_odds], columns = ['completeness', 'sigma_odds', 'eta_odds']) # ## PLOTS # + # Initialize the figure and axis fig, ax1 = plt.subplots(figsize=(7, 5)) # First plot (Sigma) - using the left y-axis color = 'crimson' ax1.plot(df_completeness.completeness, df_completeness.sigma_odds, marker='.', color=color, label=r'NMAD', ls='-', alpha=0.5, ) ax1.set_xlabel('Completeness', fontsize=16) ax1.set_ylabel(r'NMAD [$\Delta z$]', color=color, fontsize=16) ax1.tick_params(axis='x', labelsize=14) ax1.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3, labelcolor=color) ax1.set_xticks(np.arange(5, 101, 10)) ax2 = ax1.twinx() # Create another y-axis that shares the same x-axis color = 'navy' ax2.plot(df_completeness.completeness, df_completeness.eta_odds, marker='.', color=color, label=r'$\eta$ [%]', ls='--', alpha=0.5) ax2.set_ylabel(r'$\eta$ [%]', color=color, fontsize=16) # Adjust notation to allow comparison ax1.yaxis.get_major_formatter().set_powerlimits((0, 0)) # Adjust scientific notation for Sigma ax2.yaxis.get_major_formatter().set_powerlimits((0, 0)) # Adjust scientific notation for Eta ax2.tick_params(axis='x', labelsize=14) ax2.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3, labelcolor=color) # Final adjustments fig.tight_layout() fig.legend(bbox_to_anchor = [-0.18,0.75,0.5,0.2], fontsize = 14) #plt.savefig('Flag_nmad_eta_sigma_comparison.pdf', bbox_inches='tight') plt.show() # + # Initialize the figure and axis fig, ax1 = plt.subplots(figsize=(7, 5)) # First plot (Sigma) - using the left y-axis color = 'crimson' ax1.plot(odds_test, scatters_odds, marker='.', color=color, label=r'NMAD', ls='-', alpha=0.5, ) ax1.set_xlabel(r'$\delta z$ (ODDS)', fontsize=16) ax1.set_ylabel(r'NMAD [$\Delta z$]', color=color, fontsize=16) ax1.tick_params(axis='x', labelsize=14) ax1.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3, labelcolor=color) ax1.set_xticks(np.arange(0,0.16,0.02)) ax2 = ax1.twinx() # Create another y-axis that shares the same x-axis color = 'navy' ax2.plot(odds_test, etas_odds, marker='.', color=color, label=r'$\eta$ [%]', ls='--', alpha=0.5) ax2.set_ylabel(r'$\eta$ [%]', color=color, fontsize=16) # Adjust notation to allow comparison ax1.yaxis.get_major_formatter().set_powerlimits((0, 0)) # Adjust scientific notation for Sigma ax2.yaxis.get_major_formatter().set_powerlimits((0, 0)) # Adjust scientific notation for Eta ax2.tick_params(axis='x', labelsize=14) ax2.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3, labelcolor=color) # Final adjustments fig.tight_layout() fig.legend(bbox_to_anchor = [0.10,0.75,0.5,0.2], fontsize = 14) #plt.savefig('ODDS_study.pdf', bbox_inches='tight') plt.show() # -