Spaces:
Runtime error
Runtime error
# --- | |
# jupyter: | |
# jupytext: | |
# text_representation: | |
# extension: .py | |
# format_name: light | |
# format_version: '1.5' | |
# jupytext_version: 1.16.2 | |
# kernelspec: | |
# display_name: temps | |
# language: python | |
# name: temps | |
# --- | |
# + | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from astropy.io import fits | |
import os | |
from astropy.table import Table | |
from temps.utils import nmad | |
from scipy import stats | |
from pathlib import Path | |
# - | |
# define here the directory containing the photometric catalogues | |
parent_dir = "/data/astro/scratch/lcabayol/EUCLID/DAz/DC2_results_to_share/" | |
# + | |
# List of FITS files to be processed | |
fits_files = [ | |
"GDE_RF_full.fits", | |
"GDE_PHOSPHOROS_V2_full.fits", | |
"OIL_LEPHARE_full.fits", | |
"JDV_DNF_A_full.fits", | |
"JSP_FRANKENZ_full.fits", | |
"MBR_METAPHOR_full.fits", | |
"GDE_ADABOOST_full.fits", | |
"CSC_GPZ_best_full.fits", | |
"SFO_CPZ_full.fits", | |
"AAL_NNPZ_V3_full.fits", | |
] | |
# Corresponding redshift column names | |
redshift_columns = [ | |
"REDSHIFT_RF", | |
"REDSHIFT_PHOSPHOROS", | |
"REDSHIFT_LEPHARE", | |
"REDSHIFT_DNF", | |
"REDSHIFT_FRANKENZ", | |
"REDSHIFT_METAPHOR", | |
"REDSHIFT_ADABOOST", | |
"REDSHIFT_GPZ", | |
"REDSHIFT_CPZ", | |
"REDSHIFT_NNPZ", | |
] | |
# Initialize an empty DataFrame for merging | |
merged_df = pd.DataFrame() | |
# Process each FITS file | |
for fits_file, redshift_col in zip(fits_files, redshift_columns): | |
print(fits_file) | |
# Open the FITS file | |
hdu_list = fits.open(os.path.join(parent_dir, fits_file)) | |
df = Table(hdu_list[1].data).to_pandas() | |
df = df[df.REDSHIFT != 0] | |
df = df[["ID", "VIS", "SPECZ", "REDSHIFT"]].rename( | |
columns={"REDSHIFT": redshift_col} | |
) | |
# Merge with the main DataFrame | |
if merged_df.empty: | |
merged_df = df | |
else: | |
merged_df = pd.merge(merged_df, df, on=["ID", "VIS", "SPECZ"], how="outer") | |
# - | |
# ## OPEN DATA | |
# + | |
modules_dir = Path( | |
"/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5" | |
) | |
filename_valid = "euclid_cosmos_DC2_S1_v2.1_valid_matched.fits" | |
hdu_list = fits.open(modules_dir / filename_valid) | |
cat_full = Table(hdu_list[1].data).to_pandas() | |
cat_full = cat_full[["ID", "z_spec_S15", "reliable_S15", "mu_class_L07"]] | |
merged_df["reliable_S15"] = cat_full.reliable_S15 | |
merged_df["z_spec_S15"] = cat_full.z_spec_S15 | |
merged_df["mu_class_L07"] = cat_full.mu_class_L07 | |
merged_df["ID_catfull"] = cat_full.ID | |
# - | |
merged_df_specz = merged_df[ | |
(merged_df.z_spec_S15 > 0) | |
& (merged_df.SPECZ > 0) | |
& (merged_df.reliable_S15 == 1) | |
& (merged_df.mu_class_L07 == 1) | |
& (merged_df.VIS != np.inf) | |
] | |
# ## ONLY SPECZ SAMPLE | |
scatter, outliers = [], [] | |
for im, method in enumerate(redshift_columns): | |
print(method) | |
df_method = merged_df_specz.dropna(subset=method) | |
zerr = (df_method.SPECZ - df_method[method]) / (1 + df_method.SPECZ) | |
print(len(zerr[np.abs(zerr) > 0.15]) / len(zerr)) | |
scatter.append(nmad(zerr)) | |
outliers.append(len(zerr[np.abs(zerr) > 0.15]) / len(df_method)) | |
# + | |
labs = [ | |
"RF", | |
"PHOSPHOROS", | |
"LEPHARE", | |
"DNF", | |
"FRANKENZ", | |
"METAPHOR", | |
"ADABOOST", | |
"GPZ", | |
"CPZ", | |
"NNPZ", | |
] | |
# Colors from colormap | |
cmap = plt.get_cmap("tab20") | |
colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
# Plotting | |
plt.figure(figsize=(10, 6)) | |
for i in range(len(labs)): | |
plt.scatter( | |
outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker="^" | |
) | |
# Adding legend | |
plt.legend(fontsize=12) | |
plt.ylabel(r"NMAD $[\Delta z]$", fontsize=14) | |
plt.xlabel("Outlier fraction [%]", fontsize=14) | |
plt.xticks(fontsize=14) | |
plt.yticks(fontsize=14) | |
plt.xlim(5, 35) | |
plt.ylim(0, 0.14) | |
# Display plot | |
plt.show() | |
# - | |
# ### ADD TEMPS PREDICTIONS | |
import torch | |
from temps.archive import Archive | |
from temps.temps_arch import EncoderPhotometry, MeasureZ | |
from temps.temps import TempsModule | |
# + | |
data_dir = Path("/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5") | |
filename_valid = "euclid_cosmos_DC2_S1_v2.1_valid_matched.fits" | |
hdu_list = fits.open(data_dir / filename_valid) | |
cat_phot = Table(hdu_list[1].data).to_pandas() | |
# - | |
cat_phot = cat_phot[cat_phot.ID.isin(merged_df_specz.ID_catfull)] | |
# + | |
photoz_archive = Archive( | |
path="/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5", | |
only_zspec=True, | |
) | |
f, ferr = photoz_archive._extract_fluxes(catalogue=cat_phot) | |
col, colerr = photoz_archive._to_colors(f, ferr) | |
ID = cat_phot.ID | |
# + | |
modules_dir = Path("/nfs/pic.es/user/l/lcabayol/EUCLID/TEMPS/data/models") | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict( | |
torch.load(modules_dir / f"modelF_DA.pt", map_location=torch.device("cpu")) | |
) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict( | |
torch.load(modules_dir / f"modelZ_DA.pt", map_location=torch.device("cpu")) | |
) | |
temps_module = TempsModule(nn_features, nn_z) | |
z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col), return_pz=True) | |
df = pd.DataFrame(np.c_[ID, z], columns=["ID", "TEMPS"]) | |
df = df.dropna() | |
# - | |
merged_df_specz = merged_df_specz.merge(df, left_on="ID_catfull", right_on="ID") | |
# Corresponding redshift column names | |
redshift_columns = redshift_columns + ["TEMPS"] | |
scatter, outliers = [], [] | |
for im, method in enumerate(redshift_columns): | |
print(method) | |
df_method = merged_df_specz.dropna(subset=method) | |
zerr = (df_method.SPECZ - df_method[method]) / (1 + df_method.SPECZ) | |
print(len(zerr[np.abs(zerr) > 0.15]) / len(zerr)) | |
scatter.append(nmad(zerr)) | |
outliers.append(len(zerr[np.abs(zerr) > 0.15]) / len(df_method)) | |
# + | |
labs = [ | |
"RF", | |
"PHOSPHOROS", | |
"LEPHARE", | |
"DNF", | |
"FRANKENZ", | |
"METAPHOR", | |
"ADABOOST", | |
"GPZ", | |
"CPZ", | |
"NNPZ", | |
"TEMPS", | |
] | |
# Colors from colormap | |
cmap = plt.get_cmap("tab20") | |
colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
# Plotting | |
plt.figure(figsize=(10, 6)) | |
for i in range(len(labs)): | |
plt.scatter( | |
outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker="^" | |
) | |
# Adding legend | |
plt.legend(fontsize=12) | |
plt.ylabel(r"NMAD $[\Delta z]$", fontsize=14) | |
plt.xlabel("Outlier fraction [%]", fontsize=14) | |
plt.xticks(fontsize=14) | |
plt.yticks(fontsize=14) | |
plt.xlim(5, 35) | |
plt.ylim(0, 0.14) | |
# Display plot | |
plt.show() | |
# - | |
# ## ANOTHER SELECTION | |
# + | |
# List of FITS files to be processed | |
fits_files = [ | |
"GDE_RF_full.fits", | |
"GDE_PHOSPHOROS_V2_full.fits", | |
"OIL_LEPHARE_full.fits", | |
"JDV_DNF_A_full.fits", | |
"JSP_FRANKENZ_full.fits", | |
"MBR_METAPHOR_full.fits", | |
"GDE_ADABOOST_full.fits", | |
"CSC_GPZ_best_full.fits", | |
"SFO_CPZ_full.fits", | |
"AAL_NNPZ_V3_full.fits", | |
] | |
# Corresponding redshift column names | |
redshift_columns = [ | |
"REDSHIFT_RF", | |
"REDSHIFT_PHOSPHOROS", | |
"REDSHIFT_LEPHARE", | |
"REDSHIFT_DNF", | |
"REDSHIFT_FRANKENZ", | |
"REDSHIFT_METAPHOR", | |
"REDSHIFT_ADABOOST", | |
"REDSHIFT_GPZ", | |
"REDSHIFT_CPZ", | |
"REDSHIFT_NNPZ", | |
] | |
use_columns = [ | |
"USE_RF", | |
"USE_PHOSPHOROS", | |
"USE_LEPHARE", | |
"USE_DNF", | |
"USE_FRANKENZ", | |
"USE_METAPHOR", | |
"USE_ADABOOST", | |
"USE_GPZ", | |
"USE_CPZ", | |
"USE_NNPZ", | |
] | |
# Initialize an empty DataFrame for merging | |
merged_df = pd.DataFrame() | |
# Process each FITS file | |
for fits_file, redshift_col, use_col in zip(fits_files, redshift_columns, use_columns): | |
print(fits_file) | |
# Open the FITS file | |
hdu_list = fits.open(os.path.join(parent_dir, fits_file)) | |
df = Table(hdu_list[1].data).to_pandas() | |
df = df[df.REDSHIFT != 0] | |
df = df[["ID", "VIS", "SPECZ", "REDSHIFT", "L15PHZ", "USE"]].rename( | |
columns={"REDSHIFT": redshift_col, "USE": use_col} | |
) | |
# Merge with the main DataFrame | |
if merged_df.empty: | |
merged_df = df | |
else: | |
merged_df = pd.merge( | |
merged_df, df, on=["ID", "VIS", "SPECZ", "L15PHZ"], how="outer" | |
) | |
# - | |
merged_df["comp_z"] = np.where( | |
merged_df["SPECZ"] > 0, merged_df["SPECZ"], merged_df["L15PHZ"] | |
) | |
# merged_df = merged_df[(merged_df.comp_z>0)&(merged_df.comp_z<4)&(merged_df.VIS>23.5)] | |
merged_df = merged_df[ | |
(merged_df.comp_z > 0) & (merged_df.comp_z < 4) & (merged_df.VIS < 25) | |
] | |
# + | |
modules_dir = Path( | |
"/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5" | |
) | |
filename_valid = "euclid_cosmos_DC2_S1_v2.1_valid_matched.fits" | |
hdu_list = fits.open(modules_dir / filename_valid) | |
cat_full = Table(hdu_list[1].data).to_pandas() | |
merged_df["ID_catfull"] = cat_full.ID | |
# + | |
data_dir = Path("/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5") | |
filename_valid = "euclid_cosmos_DC2_S1_v2.1_valid_matched.fits" | |
hdu_list = fits.open(data_dir / filename_valid) | |
cat_phot = Table(hdu_list[1].data).to_pandas() | |
# - | |
cat_phot = cat_phot[cat_phot.ID.isin(merged_df.ID_catfull)] | |
# + | |
photoz_archive = Archive( | |
path="/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5", | |
only_zspec=False, | |
) | |
f, ferr = photoz_archive._extract_fluxes(catalogue=cat_phot) | |
col, colerr = photoz_archive._to_colors(f, ferr) | |
ID = cat_phot.ID | |
# + | |
modules_dir = Path("/nfs/pic.es/user/l/lcabayol/EUCLID/TEMPS/data/models") | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict( | |
torch.load(modules_dir / f"modelF_DA.pt", map_location=torch.device("cpu")) | |
) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict( | |
torch.load(modules_dir / f"modelZ_DA.pt", map_location=torch.device("cpu")) | |
) | |
temps_module = TempsModule(nn_features, nn_z) | |
z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col), return_pz=True) | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict( | |
torch.load(modules_dir / f"modelF_z.pt", map_location=torch.device("cpu")) | |
) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict( | |
torch.load(modules_dir / f"modelZ_z.pt", map_location=torch.device("cpu")) | |
) | |
temps_module = TempsModule(nn_features, nn_z) | |
znoda, pz, odds_noda = temps_module.get_pz(input_data=torch.Tensor(col), return_pz=True) | |
nn_features = EncoderPhotometry() | |
nn_features.load_state_dict( | |
torch.load(modules_dir / f"modelF_L15.pt", map_location=torch.device("cpu")) | |
) | |
nn_z = MeasureZ(num_gauss=6) | |
nn_z.load_state_dict( | |
torch.load(modules_dir / f"modelZ_L15.pt", map_location=torch.device("cpu")) | |
) | |
temps_module = TempsModule(nn_features, nn_z) | |
z_L15, pz, odds_L15 = temps_module.get_pz(input_data=torch.Tensor(col), return_pz=True) | |
df = pd.DataFrame( | |
np.c_[ID, z, odds, znoda, odds_noda, z_L15, odds_L15], | |
columns=[ | |
"ID", | |
"TEMPS", | |
"flag_TEMPS", | |
"TEMPS_noda", | |
"flag_TEMPSnoda", | |
"TEMPS_L15", | |
"flag_L15", | |
], | |
) | |
df = df.dropna() | |
# + | |
percent = 0.3 | |
df["USE_TEMPS"] = np.zeros(shape=len(df)) | |
# Calculate the 50th percentile (median) value of 'Flag_temps' | |
threshold = df["flag_TEMPS"].quantile(percent) | |
# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
df["USE_TEMPS"] = np.where(df["flag_TEMPS"] >= threshold, 1, 0) | |
# + | |
percent = 0.3 | |
df["USE_TEMPS_noda"] = np.zeros(shape=len(df)) | |
# Calculate the 50th percentile (median) value of 'Flag_temps' | |
threshold = df["flag_TEMPSnoda"].quantile(percent) | |
# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
df["USE_TEMPS_noda"] = np.where(df["flag_TEMPSnoda"] >= threshold, 1, 0) | |
# + | |
percent = 0.3 | |
df["USE_TEMPS_L15"] = np.zeros(shape=len(df)) | |
# Calculate the 50th percentile (median) value of 'Flag_temps' | |
threshold = df["flag_L15"].quantile(percent) | |
# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
df["USE_TEMPS_L15"] = np.where(df["flag_L15"] >= threshold, 1, 0) | |
# - | |
merged_df_temps = merged_df.merge(df, left_on="ID_catfull", right_on="ID") | |
# Corresponding redshift column names | |
redshift_columns = [ | |
"REDSHIFT_RF", | |
"REDSHIFT_PHOSPHOROS", | |
"REDSHIFT_LEPHARE", | |
"REDSHIFT_DNF", | |
"REDSHIFT_FRANKENZ", | |
"REDSHIFT_METAPHOR", | |
"REDSHIFT_ADABOOST", | |
"REDSHIFT_GPZ", | |
"REDSHIFT_CPZ", | |
"REDSHIFT_NNPZ", | |
] | |
redshift_columns = redshift_columns + ["TEMPS", "TEMPS_noda", "TEMPS_L15"] | |
use_columns = use_columns + ["USE_TEMPS", "USE_TEMPS_noda", "USE_TEMPS_L15"] | |
merged_df_temps = merged_df_temps[merged_df_temps.VIS < 25] | |
scatter, outliers, size = [], [], [] | |
for method, use in zip(redshift_columns, use_columns): | |
print(method) | |
# df_method = merged_df_temps.dropna(subset=method) | |
df_method = merged_df_temps[ | |
(merged_df_temps.loc[:, method] > 0.2) & (merged_df_temps.loc[:, method] < 2.6) | |
] | |
df_method = df_method[df_method.VIS < 24.5] | |
norm_size = len(df_method) | |
df_method = df_method[df_method.loc[:, use] == 1] | |
zerr = (df_method.comp_z - df_method[method]) / (1 + df_method.comp_z) | |
scatter.append(nmad(zerr)) | |
outliers.append(len(zerr[np.abs(zerr) > 0.15]) / len(df_method)) | |
size.append(len(df_method) / norm_size) | |
print( | |
nmad(zerr), | |
len(zerr[np.abs(zerr) > 0.15]) / len(df_method), | |
len(df_method) / norm_size, | |
) | |
scatter_faint, outliers_faint, size_faint = [], [], [] | |
for method, use in zip(redshift_columns, use_columns): | |
print(method) | |
# df_method = merged_df_temps.dropna(subset=method) | |
df_method = merged_df_temps[ | |
(merged_df_temps.loc[:, "VIS"] > 23.5) & (merged_df_temps.loc[:, "VIS"] < 25) | |
] | |
# df_method = df_method[df_method.loc[:, use]==1] | |
# df_method = merged_df_temps[(merged_df_temps.loc[:,'VIS']>23.5)&(merged_df_temps.loc[:,'VIS']<24.5)] | |
zerr = (df_method.comp_z - df_method[method]) / (1 + df_method.comp_z) | |
scatter_faint.append(nmad(zerr)) | |
outliers_faint.append(len(zerr[np.abs(zerr) > 0.15]) / len(df_method)) | |
size_faint.append(len(df_method)) | |
print(nmad(zerr), len(zerr[np.abs(zerr) > 0.15]) / len(df_method), len(df_method)) | |
# + | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from pastamarkers import markers | |
# Define labels for the models | |
labs = [ | |
"RF", | |
"PHOSPHOROS", | |
"LEPHARE", | |
"DNF", | |
"FRANKENZ", | |
"METAPHOR", | |
"ADABOOST", | |
"GPZ", | |
"CPZ", | |
"NNPZ", | |
"TEMPS", | |
"TEMPS - no DA", | |
"TEMPS - L15", | |
] | |
markers_pasta = [ | |
markers.penne, | |
markers.conchiglie, | |
markers.tortellini, | |
markers.creste, | |
markers.spaghetti, | |
markers.ravioli, | |
markers.tagliatelle, | |
markers.mezzelune, | |
markers.puntine, | |
markers.stelline, | |
"s", | |
"o", | |
"^", | |
] | |
labs_faint = [f"{lab}_faint" for lab in labs] # Labels for the faint data | |
# Colors from colormap | |
cmap = plt.get_cmap("tab20") | |
colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
# Create subplots with 2 panels stacked vertically | |
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12), sharex=False) | |
# Plotting for the top panel | |
for i in range(len(labs)): | |
if labs[i] == "TEMPS - no DA" or labs[i] == "TEMPS - L15": | |
ax1.scatter( | |
np.nan, | |
np.nan, | |
color=colors[i], | |
label=labs[i], | |
marker=markers_pasta[i], | |
s=300, | |
) | |
elif labs[i] == "CPZ": | |
ax1.scatter( | |
outliers[i] * 100, | |
scatter[i], | |
color=colors[i], | |
label=labs[i], | |
marker=markers_pasta[i], | |
s=300, | |
) | |
ax1.text( | |
outliers[i] * 100 - 0.2, | |
scatter[i] + 0.001, | |
f"{int(np.around(size[i] * 100))}", | |
fontsize=12, | |
verticalalignment="bottom", | |
) | |
elif labs[i] == "ADABOOST": | |
ax1.scatter( | |
outliers[i] * 100, | |
scatter[i], | |
color=colors[i], | |
label=labs[i], | |
marker=markers_pasta[i], | |
s=300, | |
) | |
ax1.text( | |
outliers[i] * 100 - 0.5, | |
scatter[i] - 0.004, | |
f"{int(np.around(size[i] * 100))}", | |
fontsize=12, | |
verticalalignment="bottom", | |
) | |
else: | |
ax1.scatter( | |
outliers[i] * 100, | |
scatter[i], | |
color=colors[i], | |
label=labs[i], | |
marker=markers_pasta[i], | |
s=300, | |
) | |
ax1.text( | |
outliers[i] * 100 - 0.5, | |
scatter[i] + 0.001, | |
f"{int(np.around(size[i] * 100))}", | |
fontsize=12, | |
verticalalignment="bottom", | |
) | |
# Customizations for the top plot | |
ax1.set_ylabel(r"NMAD $[\Delta z]$", fontsize=24) | |
ax1.legend(fontsize=14) | |
ax1.tick_params(axis="both", which="major", labelsize=20) | |
# Plotting for the bottom panel (faint data) | |
for i in range(len(labs)): | |
ax2.scatter( | |
outliers_faint[i] * 100, | |
scatter_faint[i], | |
color=colors[i], | |
label=labs[i], | |
marker=markers_pasta[i], | |
s=300, | |
) | |
# Customizations for the bottom plot | |
ax2.set_ylabel(r"NMAD $[\Delta z]$", fontsize=24) | |
ax2.set_xlabel("Outlier fraction [%]", fontsize=24) | |
ax2.tick_params(axis="both", which="major", labelsize=20) | |
# Display the plot | |
plt.tight_layout() | |
# plt.savefig('Comparison_paper.pdf', bbox_inches='tight') | |
plt.show() | |
# - | |
cat_val_z = cat_val[["RA", "DEC"]].merge( | |
cat_all[["RA", "DEC", "z_spec_S15", "photo_z_L15", "reliable_S15", "mu_class_L07"]], | |
on=["RA", "DEC"], | |
) | |
merged_df = merged_df.merge(cat_val_z, on=["RA", "DEC"]) | |