Spaces:
Runtime error
Runtime error
# --- | |
# jupyter: | |
# jupytext: | |
# text_representation: | |
# extension: .py | |
# format_name: light | |
# format_version: '1.5' | |
# jupytext_version: 1.16.2 | |
# kernelspec: | |
# display_name: temps | |
# language: python | |
# name: temps | |
# --- | |
# + | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from astropy.io import fits | |
import os | |
from astropy.table import Table | |
from temps.utils import nmad | |
from scipy import stats | |
from pathlib import Path | |
# - | |
#define here the directory containing the photometric catalogues | |
parent_dir = '/data/astro/scratch/lcabayol/EUCLID/DAz/DC2_results_to_share/' | |
# + | |
# List of FITS files to be processed | |
fits_files = [ | |
'GDE_RF_full.fits', | |
'GDE_PHOSPHOROS_V2_full.fits', | |
'OIL_LEPHARE_full.fits', | |
'JDV_DNF_A_full.fits', | |
'JSP_FRANKENZ_full.fits', | |
'MBR_METAPHOR_full.fits', | |
'GDE_ADABOOST_full.fits', | |
'CSC_GPZ_best_full.fits', | |
'SFO_CPZ_full.fits', | |
'AAL_NNPZ_V3_full.fits' | |
] | |
# Corresponding redshift column names | |
redshift_columns = [ | |
'REDSHIFT_RF', | |
'REDSHIFT_PHOSPHOROS', | |
'REDSHIFT_LEPHARE', | |
'REDSHIFT_DNF', | |
'REDSHIFT_FRANKENZ', | |
'REDSHIFT_METAPHOR', | |
'REDSHIFT_ADABOOST', | |
'REDSHIFT_GPZ', | |
'REDSHIFT_CPZ', | |
'REDSHIFT_NNPZ' | |
] | |
# Initialize an empty DataFrame for merging | |
merged_df = pd.DataFrame() | |
# Process each FITS file | |
for fits_file, redshift_col in zip(fits_files, redshift_columns): | |
print(fits_file) | |
# Open the FITS file | |
hdu_list = fits.open(os.path.join(parent_dir,fits_file)) | |
df = Table(hdu_list[1].data).to_pandas() | |
df = df[df.REDSHIFT!=0] | |
df = df[['ID', 'VIS','SPECZ', 'REDSHIFT']].rename(columns={'REDSHIFT': redshift_col}) | |
# Merge with the main DataFrame | |
if merged_df.empty: | |
merged_df = df | |
else: | |
merged_df = pd.merge(merged_df, df, on=['ID', 'VIS', 'SPECZ'], how='outer') | |
# - | |
# ## OPEN DATA | |
# + | |
modules_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits' | |
hdu_list = fits.open(modules_dir/filename_valid) | |
cat_full = Table(hdu_list[1].data).to_pandas() | |
cat_full = cat_full[['ID','z_spec_S15','reliable_S15','mu_class_L07']] | |
merged_df['reliable_S15'] = cat_full.reliable_S15 | |
merged_df['z_spec_S15'] = cat_full.z_spec_S15 | |
merged_df['mu_class_L07'] = cat_full.mu_class_L07 | |
merged_df['ID_catfull'] = cat_full.ID | |
# - | |
merged_df_specz = merged_df[(merged_df.z_spec_S15>0)&(merged_df.SPECZ>0)&(merged_df.reliable_S15==1)&(merged_df.mu_class_L07==1)&(merged_df.VIS!=np.inf)] | |
# ## ONLY SPECZ SAMPLE | |
scatter, outliers =[],[] | |
for im, method in enumerate(redshift_columns): | |
print(method) | |
df_method = merged_df_specz.dropna(subset=method) | |
zerr = (df_method.SPECZ - df_method[method] ) / (1 + df_method.SPECZ) | |
print(len(zerr[np.abs(zerr)>0.15]) /len(zerr)) | |
scatter.append(nmad(zerr)) | |
outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method)) | |
# + | |
labs = [ | |
'RF', | |
'PHOSPHOROS', | |
'LEPHARE', | |
'DNF', | |
'FRANKENZ', | |
'METAPHOR', | |
'ADABOOST', | |
'GPZ', | |
'CPZ', | |
'NNPZ', | |
] | |
# Colors from colormap | |
cmap = plt.get_cmap('tab20') | |
colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
# Plotting | |
plt.figure(figsize=(10, 6)) | |
for i in range(len(labs)): | |
plt.scatter(outliers[i]*100, scatter[i], color=colors[i], label=labs[i], marker = '^') | |
# Adding legend | |
plt.legend(fontsize=12) | |
plt.ylabel(r'NMAD $[\Delta z]$', fontsize=14) | |
plt.xlabel('Outlier fraction [%]', fontsize=14) | |
plt.xticks(fontsize=14) | |
plt.yticks(fontsize=14) | |
plt.xlim(5,35) | |
plt.ylim(0,0.14) | |
# Display plot | |
plt.show() | |
# - | |
# ### ADD TEMPS PREDICTIONS | |
import torch | |
from temps.archive import Archive | |
from temps.temps_arch import EncoderPhotometry, MeasureZ | |
from temps.temps import TempsModule | |
# + | |
data_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits' | |
hdu_list = fits.open(data_dir/filename_valid) | |
cat_phot = Table(hdu_list[1].data).to_pandas() | |
# - | |
cat_phot = cat_phot[cat_phot.ID.isin(merged_df_specz.ID_catfull)] | |
# + | |
photoz_archive = Archive(path = '/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5', | |
only_zspec=True) | |
f, ferr = photoz_archive._extract_fluxes(catalogue= cat_phot) | |
col, colerr = photoz_archive._to_colors(f, ferr) | |
ID = cat_phot.ID | |
# + | |
modules_dir = Path('/nfs/pic.es/user/l/lcabayol/EUCLID/TEMPS/data/models') | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict(torch.load(modules_dir / f'modelF_DA.pt',map_location=torch.device('cpu'))) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_DA.pt', map_location=torch.device('cpu'))) | |
temps_module = TempsModule(nn_features, nn_z) | |
z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col), | |
return_pz=True) | |
df = pd.DataFrame(np.c_[ID, z], | |
columns=['ID','TEMPS']) | |
df = df.dropna() | |
# - | |
merged_df_specz= merged_df_specz.merge(df, left_on='ID_catfull', right_on='ID') | |
# Corresponding redshift column names | |
redshift_columns = redshift_columns + ['TEMPS'] | |
scatter, outliers =[],[] | |
for im, method in enumerate(redshift_columns): | |
print(method) | |
df_method = merged_df_specz.dropna(subset=method) | |
zerr = (df_method.SPECZ - df_method[method] ) / (1 + df_method.SPECZ) | |
print(len(zerr[np.abs(zerr)>0.15]) /len(zerr)) | |
scatter.append(nmad(zerr)) | |
outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method)) | |
# + | |
labs = [ | |
'RF', | |
'PHOSPHOROS', | |
'LEPHARE', | |
'DNF', | |
'FRANKENZ', | |
'METAPHOR', | |
'ADABOOST', | |
'GPZ', | |
'CPZ', | |
'NNPZ', | |
'TEMPS' | |
] | |
# Colors from colormap | |
cmap = plt.get_cmap('tab20') | |
colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
# Plotting | |
plt.figure(figsize=(10, 6)) | |
for i in range(len(labs)): | |
plt.scatter(outliers[i]*100, scatter[i], color=colors[i], label=labs[i], marker = '^') | |
# Adding legend | |
plt.legend(fontsize=12) | |
plt.ylabel(r'NMAD $[\Delta z]$', fontsize=14) | |
plt.xlabel('Outlier fraction [%]', fontsize=14) | |
plt.xticks(fontsize=14) | |
plt.yticks(fontsize=14) | |
plt.xlim(5,35) | |
plt.ylim(0,0.14) | |
# Display plot | |
plt.show() | |
# - | |
# ## ANOTHER SELECTION | |
# + | |
# List of FITS files to be processed | |
fits_files = [ | |
'GDE_RF_full.fits', | |
'GDE_PHOSPHOROS_V2_full.fits', | |
'OIL_LEPHARE_full.fits', | |
'JDV_DNF_A_full.fits', | |
'JSP_FRANKENZ_full.fits', | |
'MBR_METAPHOR_full.fits', | |
'GDE_ADABOOST_full.fits', | |
'CSC_GPZ_best_full.fits', | |
'SFO_CPZ_full.fits', | |
'AAL_NNPZ_V3_full.fits' | |
] | |
# Corresponding redshift column names | |
redshift_columns = [ | |
'REDSHIFT_RF', | |
'REDSHIFT_PHOSPHOROS', | |
'REDSHIFT_LEPHARE', | |
'REDSHIFT_DNF', | |
'REDSHIFT_FRANKENZ', | |
'REDSHIFT_METAPHOR', | |
'REDSHIFT_ADABOOST', | |
'REDSHIFT_GPZ', | |
'REDSHIFT_CPZ', | |
'REDSHIFT_NNPZ' | |
] | |
use_columns = [ | |
'USE_RF', | |
'USE_PHOSPHOROS', | |
'USE_LEPHARE', | |
'USE_DNF', | |
'USE_FRANKENZ', | |
'USE_METAPHOR', | |
'USE_ADABOOST', | |
'USE_GPZ', | |
'USE_CPZ', | |
'USE_NNPZ' | |
] | |
# Initialize an empty DataFrame for merging | |
merged_df = pd.DataFrame() | |
# Process each FITS file | |
for fits_file, redshift_col,use_col in zip(fits_files, redshift_columns,use_columns): | |
print(fits_file) | |
# Open the FITS file | |
hdu_list = fits.open(os.path.join(parent_dir,fits_file)) | |
df = Table(hdu_list[1].data).to_pandas() | |
df = df[df.REDSHIFT!=0] | |
df = df[['ID', 'VIS', 'SPECZ', 'REDSHIFT', 'L15PHZ', 'USE']].rename(columns={'REDSHIFT': redshift_col, 'USE': use_col}) | |
# Merge with the main DataFrame | |
if merged_df.empty: | |
merged_df = df | |
else: | |
merged_df = pd.merge(merged_df, df, on=['ID', 'VIS', 'SPECZ','L15PHZ'], how='outer') | |
# - | |
merged_df['comp_z'] = np.where(merged_df['SPECZ'] > 0, merged_df['SPECZ'], merged_df['L15PHZ']) | |
#merged_df = merged_df[(merged_df.comp_z>0)&(merged_df.comp_z<4)&(merged_df.VIS>23.5)] | |
merged_df = merged_df[(merged_df.comp_z>0)&(merged_df.comp_z<4)&(merged_df.VIS<25)] | |
# + | |
modules_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits' | |
hdu_list = fits.open(modules_dir/filename_valid) | |
cat_full = Table(hdu_list[1].data).to_pandas() | |
merged_df['ID_catfull'] = cat_full.ID | |
# + | |
data_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits' | |
hdu_list = fits.open(data_dir/filename_valid) | |
cat_phot = Table(hdu_list[1].data).to_pandas() | |
# - | |
cat_phot = cat_phot[cat_phot.ID.isin(merged_df.ID_catfull)] | |
# + | |
photoz_archive = Archive(path = '/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5', | |
only_zspec=False) | |
f, ferr = photoz_archive._extract_fluxes(catalogue= cat_phot) | |
col, colerr = photoz_archive._to_colors(f, ferr) | |
ID = cat_phot.ID | |
# + | |
modules_dir = Path('/nfs/pic.es/user/l/lcabayol/EUCLID/TEMPS/data/models') | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict(torch.load(modules_dir/f'modelF_DA.pt',map_location=torch.device('cpu'))) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_DA.pt',map_location=torch.device('cpu'))) | |
temps_module = TempsModule(nn_features, nn_z) | |
z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col), | |
return_pz=True) | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict(torch.load(modules_dir/f'modelF_z.pt',map_location=torch.device('cpu'))) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_z.pt',map_location=torch.device('cpu'))) | |
temps_module = TempsModule(nn_features, nn_z) | |
znoda, pz, odds_noda = temps_module.get_pz(input_data=torch.Tensor(col), | |
return_pz=True) | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict(torch.load(modules_dir/f'modelF_L15.pt',map_location=torch.device('cpu'))) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_L15.pt',map_location=torch.device('cpu'))) | |
temps_module = TempsModule(nn_features, nn_z) | |
z_L15, pz, odds_L15 = temps_module.get_pz(input_data=torch.Tensor(col), | |
return_pz=True) | |
df = pd.DataFrame(np.c_[ID, z, odds, znoda, odds_noda,z_L15, odds_L15], | |
columns=['ID','TEMPS', 'flag_TEMPS', 'TEMPS_noda', 'flag_TEMPSnoda', 'TEMPS_L15', 'flag_L15']) | |
df = df.dropna() | |
# + | |
percent=0.3 | |
df['USE_TEMPS'] = np.zeros(shape=len(df)) | |
# Calculate the 50th percentile (median) value of 'Flag_temps' | |
threshold = df['flag_TEMPS'].quantile(percent) | |
# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
df['USE_TEMPS'] = np.where(df['flag_TEMPS'] >= threshold, 1, 0) | |
# + | |
percent=0.3 | |
df['USE_TEMPS_noda'] = np.zeros(shape=len(df)) | |
# Calculate the 50th percentile (median) value of 'Flag_temps' | |
threshold = df['flag_TEMPSnoda'].quantile(percent) | |
# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
df['USE_TEMPS_noda'] = np.where(df['flag_TEMPSnoda'] >= threshold, 1, 0) | |
# + | |
percent=0.3 | |
df['USE_TEMPS_L15'] = np.zeros(shape=len(df)) | |
# Calculate the 50th percentile (median) value of 'Flag_temps' | |
threshold = df['flag_L15'].quantile(percent) | |
# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
df['USE_TEMPS_L15'] = np.where(df['flag_L15'] >= threshold, 1, 0) | |
# - | |
merged_df_temps = merged_df.merge(df, left_on='ID_catfull', right_on='ID') | |
# Corresponding redshift column names | |
redshift_columns = [ | |
'REDSHIFT_RF', | |
'REDSHIFT_PHOSPHOROS', | |
'REDSHIFT_LEPHARE', | |
'REDSHIFT_DNF', | |
'REDSHIFT_FRANKENZ', | |
'REDSHIFT_METAPHOR', | |
'REDSHIFT_ADABOOST', | |
'REDSHIFT_GPZ', | |
'REDSHIFT_CPZ', | |
'REDSHIFT_NNPZ' | |
] | |
redshift_columns = redshift_columns + ['TEMPS', 'TEMPS_noda', 'TEMPS_L15'] | |
use_columns = use_columns + ['USE_TEMPS','USE_TEMPS_noda', 'USE_TEMPS_L15'] | |
merged_df_temps = merged_df_temps[merged_df_temps.VIS <25] | |
scatter, outliers, size =[],[], [] | |
for method, use in(zip(redshift_columns, use_columns)): | |
print(method) | |
#df_method = merged_df_temps.dropna(subset=method) | |
df_method = merged_df_temps[(merged_df_temps.loc[:, method]>0.2)&(merged_df_temps.loc[:, method]<2.6)] | |
df_method = df_method[df_method.VIS<24.5] | |
norm_size = len(df_method) | |
df_method = df_method[df_method.loc[:, use]==1] | |
zerr = (df_method.comp_z - df_method[method] ) / (1 + df_method.comp_z) | |
scatter.append(nmad(zerr)) | |
outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method)) | |
size.append(len(df_method)/norm_size) | |
print(nmad(zerr),len(zerr[np.abs(zerr)>0.15]) / len(df_method), len(df_method) /norm_size ) | |
scatter_faint, outliers_faint, size_faint =[],[], [] | |
for method, use in(zip(redshift_columns, use_columns)): | |
print(method) | |
#df_method = merged_df_temps.dropna(subset=method) | |
df_method = merged_df_temps[(merged_df_temps.loc[:,'VIS']>23.5)&(merged_df_temps.loc[:,'VIS']<25)] | |
#df_method = df_method[df_method.loc[:, use]==1] | |
#df_method = merged_df_temps[(merged_df_temps.loc[:,'VIS']>23.5)&(merged_df_temps.loc[:,'VIS']<24.5)] | |
zerr = (df_method.comp_z - df_method[method] ) / (1 + df_method.comp_z) | |
scatter_faint.append(nmad(zerr)) | |
outliers_faint.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method)) | |
size_faint.append(len(df_method)) | |
print(nmad(zerr),len(zerr[np.abs(zerr)>0.15]) / len(df_method), len(df_method)) | |
# + | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from pastamarkers import markers | |
# Define labels for the models | |
labs = [ | |
'RF', 'PHOSPHOROS', 'LEPHARE', 'DNF', 'FRANKENZ', 'METAPHOR', | |
'ADABOOST', 'GPZ', 'CPZ', 'NNPZ', 'TEMPS', 'TEMPS - no DA', 'TEMPS - L15' | |
] | |
markers_pasta = [markers.penne, markers.conchiglie, markers.tortellini, markers.creste, markers.spaghetti, markers.ravioli, markers.tagliatelle, markers.mezzelune,markers.puntine, markers.stelline , 's', 'o', '^'] | |
labs_faint = [f"{lab}_faint" for lab in labs] # Labels for the faint data | |
# Colors from colormap | |
cmap = plt.get_cmap('tab20') | |
colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
# Create subplots with 2 panels stacked vertically | |
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12), sharex=False) | |
# Plotting for the top panel | |
for i in range(len(labs)): | |
if labs[i] == 'TEMPS - no DA' or labs[i] == 'TEMPS - L15': | |
ax1.scatter(np.nan, np.nan, color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
elif labs[i]=='CPZ': | |
ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
ax1.text(outliers[i] * 100 -0.2, scatter[i] + 0.001, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom') | |
elif labs[i]=='ADABOOST': | |
ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
ax1.text(outliers[i] * 100 - 0.5, scatter[i] - 0.004, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom') | |
else: | |
ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
ax1.text(outliers[i] * 100 - 0.5, scatter[i] + 0.001, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom') | |
# Customizations for the top plot | |
ax1.set_ylabel(r'NMAD $[\Delta z]$', fontsize=24) | |
ax1.legend(fontsize=14) | |
ax1.tick_params(axis='both', which='major', labelsize=20) | |
# Plotting for the bottom panel (faint data) | |
for i in range(len(labs)): | |
ax2.scatter(outliers_faint[i] * 100, scatter_faint[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
# Customizations for the bottom plot | |
ax2.set_ylabel(r'NMAD $[\Delta z]$', fontsize=24) | |
ax2.set_xlabel('Outlier fraction [%]', fontsize=24) | |
ax2.tick_params(axis='both', which='major', labelsize=20) | |
# Display the plot | |
plt.tight_layout() | |
#plt.savefig('Comparison_paper.pdf', bbox_inches='tight') | |
plt.show() | |
# - | |
cat_val_z = cat_val[['RA','DEC']].merge(cat_all[['RA','DEC','z_spec_S15','photo_z_L15','reliable_S15','mu_class_L07']], on = ['RA','DEC']) | |
merged_df = merged_df.merge(cat_val_z, on = ['RA','DEC']) | |