Spaces:

lauracabayol
/

TEMPS

Runtime error

App Files Files Community

lauracabayol commited on Oct 21, 2024

Commit

57fa8fc

1 Parent(s): 692f707

clear code and notebooks

Browse files

Files changed (12) hide show

notebooks/Feature_space.py +0 -494
notebooks/Fig2_NMAD.py +0 -170
notebooks/Fig3_PIT_CRPS.py +0 -120
notebooks/Fig4_pz_examples.py +0 -128
notebooks/Fig6_qualitycut.py +0 -164
notebooks/Fig7_colourspace.py +0 -261
notebooks/Table_metrics.py +0 -148
temps/archive.py +31 -16
temps/plots.py +41 -62
temps/temps.py +224 -207
temps/temps_arch.py +11 -17
temps/utils.py +58 -144

notebooks/Feature_space.py DELETED Viewed

@@ -1,494 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     text_representation:
-#       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: insight
-#     language: python
-#     name: insight
-# ---
-# # DOMAIN ADAPTATION INTUITION
-# %load_ext autoreload
-# %autoreload 2
-import pandas as pd
-import numpy as np
-import os
-from astropy.io import fits
-from astropy.table import Table
-import torch
-#matplotlib settings
-from matplotlib import rcParams
-import matplotlib.pyplot as plt
-rcParams["mathtext.fontset"] = "stix"
-rcParams["font.family"] = "STIXGeneral"
-# +
-#insight modules
-import sys
-sys.path.append('../temps')
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-from plots import plot_nz
-# -
-# ## LOAD DATA
-#define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
-# +
-filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
-cat = Table(hdu_list[1].data).to_pandas()
-cat = cat[cat['FLAG_PHOT']==0]
-cat = cat[cat['mu_class_L07']==1]
-cat['SNR_VIS'] = cat.FLUX_VIS / cat.FLUXERR_VIS
-#cat = cat[cat.SNR_VIS>10]
-# -
-ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
-specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
-ID = cat['ID']
-VISmag = cat['MAG_VIS']
-zsflag = cat['reliable_S15']
-cat['ztarget']=ztarget
-cat['specz_or_photo']=specz_or_photo
-# ### EXTRACT PHOTOMETRY
-photoz_archive = archive(path = parent_dir,only_zspec=False)
-f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
-col, colerr = photoz_archive._to_colors(f, ferr)
-# ### MEASURE FEATURES
-features_all = np.zeros((3,len(cat),10))
-for il, lab in enumerate(['z','L15','DA']):
-    nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-    features = nn_features(torch.Tensor(col))
-    features = features.detach().cpu().numpy()
-    features_all[il]=features
-# ### TRAIN AUTOENCODER TO REDUCE TO 2 DIMENSIONS
-import torch
-from torch import nn
-class Autoencoder(nn.Module):
-    def __init__(self, input_dim, latent_dim):
-        super(Autoencoder, self).__init__()
-        # Encoder layers
-        self.encoder = nn.Sequential(
-            nn.Linear(input_dim, 100),
-            nn.ReLU(),
-            nn.Linear(100, 50),
-            nn.ReLU(),
-            nn.Linear(50, latent_dim)
-        )
-        # Decoder layers
-        self.decoder = nn.Sequential(
-            nn.Linear(latent_dim, 50),
-            nn.ReLU(),
-            nn.Linear(50, 100),
-            nn.ReLU(),
-            nn.Linear(100, input_dim),
-        )
-    def forward(self, x):
-        x = self.encoder(x)
-        y = self.decoder(x)
-        return y,x
-# +
-from torch.utils.data import DataLoader, dataset, TensorDataset
-ds =TensorDataset(torch.Tensor(features_all[0]))
-train_loader = DataLoader(ds, batch_size=100, shuffle=True, drop_last=False)
-# -
-import torch.optim as optim
-autoencoder = Autoencoder(input_dim=10,
-                         latent_dim=2)
-criterion = nn.L1Loss()
-optimizer = optim.Adam(autoencoder.parameters(), lr=0.0001)
-# +
-# Define the number of epochs
-num_epochs = 100
-for epoch in range(num_epochs):
-    running_loss = 0.0
-    for data in train_loader:  # Assuming 'train_loader' is your DataLoader
-        # Forward pass
-        outputs,f1  = autoencoder(data[0])
-        loss_autoencoder = criterion(outputs, data[0])
-        optimizer.zero_grad()
-        # Backward pass
-        loss_autoencoder.backward()
-        # Update the weights
-        optimizer.step()
-        # Accumulate the loss
-        running_loss += loss_autoencoder.item()
-    # Print the average loss for the epoch
-    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, running_loss / len(train_loader)))
-print('Training finished')
-# -
-# #### EVALUTATE AUTOENCODER
-# + [markdown] jupyter={"source_hidden": true}
-# cat.to_csv('features_cat.csv', header=True, sep=',')
-# -
-indexes_specz = cat[(cat.specz_or_photo==0)&(cat.reliable_S15>0)].reset_index().index
-features_all_reduced = np.zeros(shape=(3,len(cat),2))
-for i in range(3):
-    _, features = autoencoder(torch.Tensor(features_all[i]))
-    features_all_reduced[i] = features.detach().cpu().numpy()
-# ### Plot the features
-start = 0
-end = len(cat)
-all_values = set(range(start, end))
-values_not_in_indexes_specz = all_values - set(indexes_specz)
-indexes_nospecz = sorted(values_not_in_indexes_specz)
-# +
-import seaborn as sns
-# Create subplots with three panels
-fig, axs = plt.subplots(1, 3, figsize=(15, 5))
-# Set style for all subplots
-sns.set_style("white")
-# First subplot
-sns.kdeplot(x=features_all_reduced[0, indexes_nospecz,0],
-            y=features_all_reduced[0, indexes_nospecz,1],
-            clip=(-150, 150),
-            ax=axs[0],
-            color='salmon')
-sns.kdeplot(x=features_all_reduced[0, indexes_specz,0],
-            y=features_all_reduced[0, indexes_specz,1],
-            clip=(-150, 150),
-            ax=axs[0],
-            color='lightskyblue')
-axs[0].set_xlim(-150, 150)
-axs[0].set_ylim(-150, 150)
-axs[0].set_title(r'Trained on $z_{\rm s}$')
-# Second subplot
-sns.kdeplot(x=features_all_reduced[1, indexes_nospecz, 0],
-            y=features_all_reduced[1, indexes_nospecz, 1],
-            clip=(-50, 50),
-            ax=axs[1],
-            color='salmon')
-sns.kdeplot(x=features_all_reduced[1, indexes_specz, 0],
-            y=features_all_reduced[1, indexes_specz,1],
-            clip=(-50, 50),
-            ax=axs[1],
-            color='lightskyblue')
-axs[1].set_xlim(-50, 50)
-axs[1].set_ylim(-50, 50)
-axs[1].set_title('Trained on L15')
-# Third subplot
-features_all_reduced_nospecz = pd.DataFrame(features_all_reduced[2, indexes_nospecz, :]).drop_duplicates().values
-sns.kdeplot(x=features_all_reduced_nospecz[:, 0],
-            y=features_all_reduced_nospecz[:, 1],
-            clip=(-1, 5),
-            ax=axs[2],
-            color='salmon',
-            label='Wide-field sample')
-sns.kdeplot(x=features_all_reduced_specz[:, 0],
-            y=features_all_reduced_specz[:, 1],
-            clip=(-1, 5),
-            ax=axs[2],
-            color='lightskyblue',
-            label=r'$z_{\rm s}$ sample')
-axs[2].set_xlim(-2, 5)
-axs[2].set_ylim(-2, 5)
-axs[2].set_title('TEMPS')
-axs[0].set_xlabel('Feature 1')
-axs[1].set_xlabel('Feature 1')
-axs[2].set_xlabel('Feature 1')
-axs[0].set_ylabel('Feature 2')
-# Create custom legend with desired colors
-legend_labels = ['Wide-field sample', r'$z_{\rm s}$ sample']
-legend_handles = [plt.Line2D([0], [0], color='salmon', lw=2),
-                  plt.Line2D([0], [0], color='lightskyblue', lw=2)]
-axs[2].legend(legend_handles, legend_labels, loc='upper right', fontsize=16)
-# Adjust layout
-plt.tight_layout()
-plt.savefig('Contourplot.pdf', bbox_inches='tight')
-plt.show()
-# -
-np.savetxt('features.txt',features_all_reduced.reshape(3*164816, 2))
-# +
-photoz_archive = archive(path = parent_dir,only_zspec=False)
-fig, ax = plt.subplots(ncols = 3, figsize=(15,4), sharex=True, sharey=True)
-colors = ['navy', 'goldenrod']
-titles = [r'Training: $z_s$', r'Training: L15',r'Training: $z_s$ + DA']
-x_min, x_max = -5,5
-y_min, y_max = -5,5
-x_grid, y_grid = np.meshgrid(np.linspace(x_min, x_max, 10), np.linspace(y_min, y_max, 10))
-xy_grid = np.vstack([x_grid.ravel(), y_grid.ravel()])
-density_grid = density_estimation(xy_grid).reshape(x_grid.shape)
-for il, lab in enumerate(['z','L15','DA']):
-    nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-    for it, target_type in enumerate(['L15','zs']):
-        if target_type=='zs':
-            cat_sub = photoz_archive._select_only_zspec(cat)
-            cat_sub = photoz_archive._clean_zspec_sample(cat_sub)
-        elif target_type=='L15':
-            cat_sub = photoz_archive._exclude_only_zspec(cat)
-        else:
-            assert False
-        cat_sub = photoz_archive._clean_photometry(cat_sub)
-        print(cat_sub.shape)
-        f, ferr = photoz_archive._extract_fluxes(cat_sub)
-        col, colerr = photoz_archive._to_colors(f, ferr)
-        features = nn_features(torch.Tensor(col))
-        features = features.detach().cpu().numpy()
-        #xy = np.vstack([features[:1000,0], features[:1000,1]])
-        #zd = gaussian_kde(xy)(xy)
-        #ax[il].scatter(features[:1000,0], features[:1000,1],c=zd, s=3)
-        xy = np.vstack([features[:,0], features[:,1]])
-        density_estimation = gaussian_kde(xy)
-        # Define grid for plotting density lines
-        xy_grid = np.vstack([x_grid.ravel(), y_grid.ravel()])
-        density_grid = density_estimation(xy_grid).reshape(x_grid.shape)
-        # Plot contour lines representing density
-        ax[il].contour(x_grid, y_grid, density_grid, colors=colors[it], label = f'{target_type}')
-    ax[il].set_title(titles[il])
-    ax[il].set_xlim(-5,5)
-    ax[il].set_ylim(-5,5)
-ax[0].set_ylabel('Feature 1', fontsize=14)
-#plt.ylabel('Feature 2', fontsize=14)
-    #assert False
-# -
-H
-H
-xedges
-yedges
-# +
-import matplotlib.colors as colors
-from matplotlib import path
-import numpy as np
-from matplotlib import pyplot as plt
-try:
-    from astropy.convolution import Gaussian2DKernel, convolve
-    astro_smooth = True
-except ImportError as IE:
-    astro_smooth = False
-np.random.seed(123)
-#t = np.linspace(-5,1.2,1000)
-x = features[:1000,0]
-y = features[:1000,1]
-H, xedges, yedges = np.histogram2d(x,y, bins=(10,10))
-xmesh, ymesh = np.meshgrid(xedges[:-1], yedges[:-1])
-# Smooth the contours (if astropy is installed)
-if astro_smooth:
-    kernel = Gaussian2DKernel(x_stddev=1.)
-    H=convolve(H,kernel)
-fig,ax = plt.subplots(1, figsize=(7,6))
-clevels = ax.contour(xmesh,ymesh,H.T,lw=.9,cmap='winter')#,zorder=90)
-ax.scatter(x,y,s=3)
-#ax.set_xlim(-20,5)
-#ax.set_ylim(-20,5)
-# Identify points within contours
-#p = clevels.collections[0].get_paths()
-#inside = np.full_like(x,False,dtype=bool)
-#for level in p:
-#    inside |= level.contains_points(zip(*(x,y)))
-#ax.plot(x[~inside],y[~inside],'kx')
-#plt.show(block=False)
-# -
-density_grid
-features.shape, zd.shape
-# + jupyter={"outputs_hidden": true}
-xy = np.vstack([features[:,0], features[:,1]])
-zd = gaussian_kde(xy)(xy)
-plt.scatter(features[:,0], features[:,1],c=zd)
-# +
-# Make the base corner plot
-figure = corner.corner(features[:,:2], quantiles=[0.16, 0.84], show_titles=False, color ='crimson')
-corner.corner(samples2, fig=fig)
-ndim=2
-# Extract the axes
-axes = np.array(figure.axes).reshape((ndim, ndim))
-for a in axes[np.triu_indices(ndim)]:
-    a.remove()
-# +
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy.stats import gaussian_kde
-# Assuming 'features' is your data array with shape (n_samples, 2)
-# Calculate the density estimate
-xy = np.vstack([features[:,0], features[:,1]])
-density_estimation = gaussian_kde(xy)
-# Define grid for plotting density lines
-xy_grid = np.vstack([x_grid.ravel(), y_grid.ravel()])
-density_grid = density_estimation(xy_grid).reshape(x_grid.shape)
-# Plot contour lines representing density
-plt.contour(x_grid, y_grid, density_grid, colors='black')
-# Optionally, you can add a scatter plot on top of the density lines for better visualization
-#plt.scatter(features[:,0], features[:,1], color='blue', alpha=0.5)
-# Set labels and title
-plt.xlabel('Feature 1')
-plt.ylabel('Feature 2')
-plt.title('Density Lines Plot')
-# Show plot
-plt.show()
-# -
-corner_plot = corner.corner(Arinyo_preds,
-              labels=[r'$b$', r'$\beta$', '$q_1$', '$k_{vav}$','$a_v$','$b_v$','$k_p$','$q_2$'],
-              truths=Arinyo_coeffs_central[test_snap],
-              truth_color='crimson')
-import corner
-figure = corner.corner(features, quantiles=[0.16, 0.5, 0.84], show_titles=False)
-axes = np.array(fig.axes).reshape((ndim, ndim))
-for a in axes[np.triu_indices(ndim)]:
-    a.remove()
-# +
-# My data
-x = features[:,0]
-y = features[:,1]
-# Peform the kernel density estimate
-k = stats.gaussian_kde(np.vstack([x, y]))
-xi, yi = np.mgrid[-5:5,-5:5]
-zi = k(np.vstack([xi.flatten(), yi.flatten()]))
-fig = plt.figure()
-ax = fig.gca()
-CS = ax.contour(xi, yi, zi.reshape(xi.shape), colors='crimson')
-ax.set_xlim(-5, 5)
-ax.set_ylim(-5, 5)
-plt.show()
-# -

notebooks/Fig2_NMAD.py DELETED Viewed

@@ -1,170 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     formats: ipynb,py:percent
-#     text_representation:
-#       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: insight
-#     language: python
-#     name: insight
-# ---
-# %% [markdown]
-# # FIGURE 2 IN THE PAPER
-# %% [markdown]
-# ## METRICS FOR THE DIFFERENT METHODS ON THE WIDE FIELD SAMPLE
-# %% [markdown]
-# ### LOAD PYTHON MODULES
-# %%
-# %load_ext autoreload
-# %autoreload 2
-# %%
-import pandas as pd
-import numpy as np
-import os
-from astropy.io import fits
-from astropy.table import Table
-import torch
-# %%
-#matplotlib settings
-from matplotlib import rcParams
-import matplotlib.pyplot as plt
-rcParams["mathtext.fontset"] = "stix"
-rcParams["font.family"] = "STIXGeneral"
-# %%
-#insight modules
-import sys
-sys.path.append('../temps')
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-# %%
-eval_methods=True
-# %% [markdown]
-# ### LOAD DATA
-# %%
-#define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
-# %%
-#load catalogue and apply cuts
-filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
-cat = Table(hdu_list[1].data).to_pandas()
-cat = cat[cat['FLAG_PHOT']==0]
-cat = cat[cat['mu_class_L07']==1]
-cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
-cat = cat[cat['MAG_VIS']<25]
-# %%
-ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
-specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
-ID = cat['ID']
-VISmag = cat['MAG_VIS']
-zsflag = cat['reliable_S15']
-# %%
-photoz_archive = archive(path = parent_dir,only_zspec=False)
-f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
-col, colerr = photoz_archive._to_colors(f, ferr)
-# %% [markdown]
-# ### EVALUATE USING TRAINED MODELS
-# %%
-if eval_methods:
-    dfs = {}
-    for il, lab in enumerate(['z','L15','DA']):
-        nn_features = EncoderPhotometry()
-        nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-        nn_z = MeasureZ(num_gauss=6)
-        nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-        temps = Temps_module(nn_features, nn_z)
-        z,zerr, zmode,pz, flag, odds = temps.get_pz(input_data=torch.Tensor(col),
-                                    return_pz=True)
-        # Create a DataFrame with the desired columns
-        df = pd.DataFrame(np.c_[ID, VISmag,z, zmode, flag, ztarget,zsflag,zerr, specz_or_photo],
-                          columns=['ID','VISmag','z', 'zmode','zflag', 'ztarget','zsflag','zuncert','S15_L15_flag'])
-        # Calculate additional columns or operations if needed
-        df['zwerr'] = (df.zmode - df.ztarget) / (1 + df.ztarget)
-        # Drop any rows with NaN values
-        df = df.dropna()
-        # Assign the DataFrame to a key in the dictionary
-        dfs[lab] = df
-# %%
-dfs['z']['zwerr'] = (dfs['z'].z - dfs['z'].ztarget) / (1 + dfs['z'].ztarget)
-dfs['L15']['zwerr'] = (dfs['L15'].z - dfs['L15'].ztarget) / (1 + dfs['L15'].ztarget)
-dfs['DA']['zwerr'] = (dfs['DA'].z - dfs['DA'].ztarget) / (1 + dfs['DA'].ztarget)
-# %% [markdown]
-# ### LOAD CATALOGUES FROM PREVIOUS TRAINING
-# %%
-if not eval_methods:
-    dfs = {}
-    dfs['z'] = pd.read_csv(os.path.join(parent_dir, 'predictions_specztraining.csv'), header=0)
-    dfs['L15'] = pd.read_csv(os.path.join(parent_dir, 'predictions_speczL15training.csv'), header=0)
-    dfs['DA'] = pd.read_csv(os.path.join(parent_dir, 'predictions_speczDAtraining.csv'), header=0)
-# %% [markdown]
-# ### MAKE PLOT
-# %%
-plot_photoz(df_list,
-            nbins=8,
-            xvariable='VISmag',
-            metric='nmad',
-            type_bin='bin',
-            label_list = ['zs','zs+L15',r'TEMPS'],
-            save=False,
-            samp='L15'
-           )
-# %%
-plot_photoz(df_list,
-            nbins=8,
-            xvariable='VISmag',
-            metric='outliers',
-            type_bin='bin',
-            label_list = ['zs','zs+L15',r'TEMPS'],
-            save=False,
-            samp='L15'
-           )
-# %%
-# %%

notebooks/Fig3_PIT_CRPS.py DELETED Viewed

@@ -1,120 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     formats: ipynb,py:percent
-#     text_representation:
-#       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: insight
-#     language: python
-#     name: insight
-# ---
-# %% [markdown]
-# # FIGURE 3 IN THE PAPER
-# %% [markdown]
-# ## PIT AND CRPS FOR THE THREE METHODS
-# %% [markdown]
-# ### LOAD PYTHON MODULES
-# %%
-# %load_ext autoreload
-# %autoreload 2
-# %%
-import pandas as pd
-import numpy as np
-import os
-from astropy.io import fits
-from astropy.table import Table
-import torch
-# %%
-#matplotlib settings
-from matplotlib import rcParams
-import matplotlib.pyplot as plt
-rcParams["mathtext.fontset"] = "stix"
-rcParams["font.family"] = "STIXGeneral"
-# %%
-#insight modules
-import sys
-sys.path.append('../temps')
-#from insight_arch import EncoderPhotometry, MeasureZ
-#from insight import Insight_module
-from archive import archive
-from utils import nmad
-from plots import plot_PIT, plot_crps
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-# %% [markdown]
-# ### LOAD DATA
-# %%
-photoz_archive = archive(path = parent_dir,
-                         only_zspec=False,
-                         flags_kept=[1. , 1.1, 1.4, 1.5, 2,2.1,2.4,2.5,3., 3.1, 3.4, 3.5,  4., 9. , 9.1, 9.3, 9.4, 9.5,11.1, 11.5, 12.1, 12.5, 13. , 13.1, 13.5, 14, ],
-                         target_test='L15')
-f_test, ferr_test, specz_test ,VIS_mag_test = photoz_archive.get_testing_data()
-# %% [markdown]
-# ## CREATE PIT; CRPS; SPECTROSCOPIC SAMPLE
-# %% [markdown]
-# This loads pre-trained models (for the sake of time). You can learn how to train the models in the Tutorial notebook.
-# %%
-# Initialize an empty dictionary to store DataFrames
-crps_dict = {}
-pit_dict = {}
-for il, lab in enumerate(['z','L15','DA']):
-    nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-    nn_z = MeasureZ(num_gauss=6)
-    nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-    temps = Temps_module(nn_features, nn_z)
-    pit_list = temps.pit(input_data=torch.Tensor(f_test), target_data=torch.Tensor(specz_test))
-    crps_list = temps.crps(input_data=torch.Tensor(f_test), target_data=specz_test)
-    # Assign the DataFrame to a key in the dictionary
-    crps_dict[lab] = crps_list
-    pit_dict[lab] = pit_list
-# %%
-plot_PIT(pit_dict['z'],
-         pit_dict['L15'],
-         pit_dict['DA'],
-         labels=[r'$z_{rm s}$', 'L15', 'TEMPS'],
-         sample='L15',
-        save=True)
-# %%
-plot_crps(crps_dict['z'],
-          crps_dict['L15'],
-          crps_dict['DA'],
-          labels=[r'$z_{\rm s}$', 'L15', 'TEMPS'],
-          sample = 'L15',
-         save=True)
-# %%

notebooks/Fig4_pz_examples.py DELETED Viewed

@@ -1,128 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     formats: ipynb,py:percent
-#     text_representation:
-#       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: insight
-#     language: python
-#     name: insight
-# ---
-# %% [markdown]
-# # FIGURE 4 IN THE PAPER
-# %% [markdown]
-# ## IMPACT OF TEMPS ON CONCRETE P(Z) EXAMPLES
-# %% [markdown]
-# ### LOAD PYTHON MODULES
-# %%
-# %load_ext autoreload
-# %autoreload 2
-# %%
-import pandas as pd
-import numpy as np
-import os
-from astropy.io import fits
-from astropy.table import Table
-import torch
-# %%
-#matplotlib settings
-from matplotlib import rcParams
-import matplotlib.pyplot as plt
-rcParams["mathtext.fontset"] = "stix"
-rcParams["font.family"] = "STIXGeneral"
-# %%
-#insight modules
-import sys
-sys.path.append('../temps')
-#from insight_arch import EncoderPhotometry, MeasureZ
-#from insight import Insight_module
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-# %% [markdown]
-# ### LOAD DATA
-# %%
-#define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
-# %%
-filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
-cat = Table(hdu_list[1].data).to_pandas()
-cat = cat[cat['FLAG_PHOT']==0]
-cat = cat[cat['mu_class_L07']==1]
-cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
-cat = cat[cat['MAG_VIS']<25]
-# %%
-ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
-specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
-ID = cat['ID']
-VISmag = cat['MAG_VIS']
-zsflag = cat['reliable_S15']
-# %%
-photoz_archive = archive(path = parent_dir,only_zspec=False)
-f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
-col, colerr = photoz_archive._to_colors(f, ferr)
-# %% [markdown]
-# ### LOAD TRAINED MODELS AND EVALUATE PDF OF RANDOM EXAMPLES
-# %% [markdown]
-# The notebook 'Tutorial_temps' gives an example of how to train and save models.
-# %%
-# Initialize an empty dictionary to store DataFrames
-ii = np.random.randint(0,len(col),1)
-pz_dict = {}
-for il, lab in enumerate(['z','L15','DA']):
-    nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-    nn_z = MeasureZ(num_gauss=6)
-    nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-    temps = Temps_module(nn_features, nn_z)
-    z,zerr, pz, flag,_ = temps.get_pz(input_data=torch.Tensor(col[ii]),return_pz=True)
-    # Assign the DataFrame to a key in the dictionary
-    pz_dict[lab] = pz
-# %%
-cmap = plt.get_cmap('Dark2')
-plt.plot(np.linspace(0,5,1000),pz_dict['z'][0],label='z', color = cmap(0), ls ='--')
-plt.plot(np.linspace(0,5,1000),pz_dict['L15'][0],label='L15', color = cmap(1), ls =':')
-plt.plot(np.linspace(0,5,1000),pz_dict['DA'][0],label='TEMPS', color = cmap(2), ls ='-')
-plt.axvline(x=np.array(ztarget)[ii][0],ls='-.',color='black')
-#plt.xlim(0,2)
-plt.legend()
-plt.xlabel(r'$z$', fontsize=14)
-plt.ylabel('Probability', fontsize=14)
-#plt.savefig(f'pz_{ii[0]}.pdf', bbox_inches='tight')
-# %%

notebooks/Fig6_qualitycut.py DELETED Viewed

@@ -1,164 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     text_representation:
-#       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: insight
-#     language: python
-#     name: insight
-# ---
-# # FIGURE 6 IN THE PAPER
-# ## QUALITY CUTS
-# %load_ext autoreload
-# %autoreload 2
-import pandas as pd
-import numpy as np
-import os
-import torch
-from scipy import stats
-#matplotlib settings
-from matplotlib import rcParams
-import matplotlib.pyplot as plt
-rcParams["mathtext.fontset"] = "stix"
-rcParams["font.family"] = "STIXGeneral"
-#insight modules
-import sys
-sys.path.append('../temps')
-#from insight_arch import EncoderPhotometry, MeasureZ
-#from insight import Insight_module
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-# ### LOAD DATA (ONLY SPECZ)
-#define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
-photoz_archive = archive(path = parent_dir,only_zspec=True,flags_kept=[1. , 1.1, 1.4, 1.5, 2,2.1,2.4,2.5,3., 3.1, 3.4, 3.5,  4., 9. , 9.1, 9.3, 9.4, 9.5,11.1, 11.5, 12.1, 12.5, 13. , 13.1, 13.5, 14, ])
-f_test_specz, ferr_test_specz, specz_test ,VIS_mag_test = photoz_archive.get_testing_data()
-# ### LOAD TRAINED MODELS AND EVALUATE PDF OF RANDOM EXAMPLES
-# +
-# Initialize an empty dictionary to store DataFrames
-dfs = {}
-for il, lab in enumerate(['z','L15','DA']):
-    nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-    nn_z = MeasureZ(num_gauss=6)
-    nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-    temps = Temps_module(nn_features, nn_z)
-    z,zerr, pz, flag, odds = temps.get_pz(input_data=torch.Tensor(f_test_specz),
-                                return_pz=True)
-    # Create a DataFrame with the desired columns
-    df = pd.DataFrame(np.c_[z, flag, odds, specz_test],
-                      columns=['z','zflag', 'odds' ,'ztarget'])
-    # Calculate additional columns or operations if needed
-    df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
-    # Drop any rows with NaN values
-    df = df.dropna()
-    # Assign the DataFrame to a key in the dictionary
-    dfs[lab] = df
-# -
-# ### STATISTICS BASED ON OUR QUALITY CUT
-# +
-bin_edges = stats.mstats.mquantiles(df.zflag, np.arange(0,1.01,0.05))
-scatter, eta, xlab, xmag, xzs, flagmean = [],[],[], [], [], []
-for k in range(len(bin_edges)-1):
-    edge_min = bin_edges[k]
-    edge_max = bin_edges[k+1]
-    df_bin = df[(df.zflag > edge_min)]
-    xlab.append(np.round(len(df_bin)/len(df),2)*100)
-    xzs.append(0.5*(df_bin.ztarget.min()+df_bin.ztarget.max()))
-    flagmean.append(np.mean(df_bin.zflag))
-    scatter.append(nmad(df_bin.zwerr))
-    eta.append(len(df_bin[np.abs(df_bin.zwerr)>0.15])/len(df)*100)
-# -
-# ### STATISTICS BASED ON ODDS
-# +
-bin_edges = stats.mstats.mquantiles(df.odds, np.arange(0,1.01,0.05))
-scatter_odds, eta_odds,xlab_odds,  oddsmean = [],[],[], []
-for k in range(len(bin_edges)-1):
-    edge_min = bin_edges[k]
-    edge_max = bin_edges[k+1]
-    df_bin = df[(df.odds > edge_min)]
-    xlab_odds.append(np.round(len(df_bin)/len(df),2)*100)
-    oddsmean.append(np.mean(df_bin.zflag))
-    scatter_odds.append(nmad(df_bin.zwerr))
-    eta_odds.append(len(df_bin[np.abs(df_bin.zwerr)>0.15])/len(df)*100)
-# -
-# ### PLOTS
-# +
-plt.plot(xlab_odds,scatter_odds, marker = '.', color ='crimson', label=r'$\theta(\Delta z)$', ls='--', alpha=0.5)
-plt.plot(xlab,scatter, marker = '.', color ='navy',label=r'$\xi = \theta(\Delta z)$')
-plt.ylabel(r'NMAD [$\Delta z\ /\ (1 + z_{\rm s})$]', fontsize=16)
-plt.xlabel('Completeness', fontsize=16)
-plt.yticks(fontsize=12)
-plt.xticks(np.arange(5,101,10), fontsize=12)
-plt.legend(fontsize=14)
-plt.savefig('Flag_nmad_zspec.pdf', bbox_inches='tight')
-plt.show()
-# +
-plt.plot(xlab_odds,eta_odds, marker='.', color ='crimson', label=r'$\theta(\Delta z)$', ls='--', alpha=0.5)
-plt.plot(xlab,eta, marker='.', color ='navy',label=r'$\xi = \theta(\Delta z)$')
-plt.yticks(fontsize=12)
-plt.xticks(np.arange(5,101,10), fontsize=12)
-plt.ylabel(r'$\eta$ [%]', fontsize=16)
-plt.xlabel('Completeness', fontsize=16)
-plt.legend()
-plt.savefig('Flag_eta_zspec.pdf', bbox_inches='tight')
-plt.show()
-# -

notebooks/Fig7_colourspace.py DELETED Viewed

@@ -1,261 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     text_representation:
-#       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: insight
-#     language: python
-#     name: insight
-# ---
-# # FIGURE COLOURSPACE IN THE PAPER
-# %load_ext autoreload
-# %autoreload 2
-import pandas as pd
-import numpy as np
-import os
-from astropy.io import fits
-from astropy.table import Table
-import torch
-#matplotlib settings
-from matplotlib import rcParams
-import matplotlib.pyplot as plt
-rcParams["mathtext.fontset"] = "stix"
-rcParams["font.family"] = "STIXGeneral"
-# +
-#insight modules
-import sys
-sys.path.append('../temps')
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-from plots import plot_nz
-# -
-def estimate_som_map(df, plot_arg='z', nx=40, ny=40):
-    """
-    Estimate a Self-Organizing Map (SOM) visualization from a DataFrame.
-    Parameters:
-    - df (pandas.DataFrame): Input DataFrame containing data for SOM estimation.
-    - plot_arg (str, optional): Column name to be used for plotting. Default is 'z'.
-    - nx (int, optional): Number of cells along the X-axis. Default is 40.
-    - ny (int, optional): Number of cells along the Y-axis. Default is 40.
-    Returns:
-    - som_data (numpy.ndarray): Estimated SOM visualization data.
-    """
-    x_cells = np.arange(0, nx)
-    y_cells = np.arange(0, ny)
-    index_cell = np.arange(nx * ny)
-    cells = np.array(np.meshgrid(x_cells, y_cells)).T.reshape(-1, 2)
-    cells = pd.DataFrame(np.c_[cells[:, 0], cells[:, 1], index_cell], columns=['x_cell', 'y_cell', 'cell'])
-    if plot_arg == 'count':
-        som_vis = df.groupby('cell')['z'].count().reset_index().rename(columns={f'z': 'plot_som'})
-    else:
-        som_vis = df.groupby('cell')[f'{plot_arg}'].mean().reset_index().rename(columns={f'{plot_arg}': 'plot_som'})
-    som_data = som_vis.merge(cells, on='cell')
-    som_data = som_data.pivot(index='x_cell', columns='y_cell', values='plot_som')
-    return som_data
-def plot_som_map(som_data, plot_arg = 'z', vmin=0, vmax=1):
-    """
-    Plot the Self-Organizing Map (SOM) data.
-    Parameters:
-    - som_data (numpy.ndarray): The SOM data to be visualized.
-    - plot_arg (str, optional): The column name to be plotted. Default is 'z'.
-    - vmin (float, optional): Minimum value for color scaling. Default is 0.
-    - vmax (float, optional): Maximum value for color scaling. Default is 1.
-    Returns:
-    None
-    """
-    plt.imshow(som_data, vmin=vmin, vmax=vmax, cmap='viridis')  # Choose an appropriate colormap
-    plt.colorbar(label=f'{plot_arg}')  # Add a colorbar with a label
-    plt.xlabel(r'$x$ [pixel]', fontsize=14)  # Add an appropriate X-axis label
-    plt.ylabel(r'$y$ [pixel]', fontsize=14)  # Add an appropriate Y-axis label
-    plt.show()
-# ### LOAD DATA
-# +
-filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
-cat = Table(hdu_list[1].data).to_pandas()
-cat = cat[cat['FLAG_PHOT']==0]
-cat = cat[cat['mu_class_L07']==1]
-cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
-cat = cat[cat['MAG_VIS']<25]
-# -
-ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
-specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
-ID = cat['ID']
-VISmag = cat['MAG_VIS']
-zsflag = cat['reliable_S15']
-photoz_archive = archive(path = parent_dir,only_zspec=False)
-f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
-col, colerr = photoz_archive._to_colors(f, ferr)
-# +
-dfs = {}
-for il, lab in enumerate(['z','L15','DA']):
-    nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-    nn_z = MeasureZ(num_gauss=6)
-    nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-    temps = Temps_module(nn_features, nn_z)
-    z,zerr ,pz, flag, odds = temps.get_pz(input_data=torch.Tensor(col),
-                                return_pz=True)
-    # Create a DataFrame with the desired columns
-    df = pd.DataFrame(np.c_[ID, VISmag,z, flag, ztarget,zsflag,zerr, specz_or_photo],
-                      columns=['ID','VISmag','z','zflag', 'ztarget','zsflag','zuncert','S15_L15_flag'])
-    # Calculate additional columns or operations if needed
-    df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
-    # Drop any rows with NaN values
-    df = df.dropna()
-    # Assign the DataFrame to a key in the dictionary
-    dfs[lab] = df
-# -
-# ### LOAD TRAINED MODELS AND EVALUATE PDFs AND REDSHIFT
-#define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
-df_z = dfs['z']
-df_z_DA = dfs['DA']
-# ##### LOAD TRAIN SOM ON TRAINING DATA
-df_som = pd.read_csv(os.path.join(parent_dir,'som_dataframe.csv'), header = 0, sep =',')
-df_z = df_z.merge(df_som, on = 'ID')
-df_z_DA = df_z_DA.merge(df_som, on = 'ID')
-# ##### APPLY CUTS FOR DIFFERENT SAMPLES
-df_zspec = df_z[(df_z.S15_L15_flag==0) & (df_z.zsflag==1)]
-df_l15 = df_z[(df_z.ztarget>0)]
-df_l15_DA = df_z_DA[(df_z_DA.ztarget>0)]
-df_l15_euclid = df_z[(df_z.VISmag <24.5) & (df_z.z > 0.2) & (df_z.z < 2.6)]
-df_l15_euclid_cut= df_l15_euclid[df_l15_euclid.zflag>0.033]
-df_l15_euclid_da = df_z_DA[(df_z_DA.VISmag <24.5) & (df_z_DA.z > 0.2) & (df_z_DA.z < 2.6)]
-df_l15_euclid_cut_da= df_l15_euclid_da[df_l15_euclid_da.zflag>0.018]
-# ## MAKE SOM PLOT
-from mpl_toolkits.axes_grid1 import make_axes_locatable
-# +
-fig, axs = plt.subplots(6, 4, figsize=(13, 15), sharex=True, sharey=True, gridspec_kw={'hspace': 0.05, 'wspace': 0.06})
-# Plot in the top row (axs[0, i])
-#top row, spectroscopic sample
-columns = ['ztarget','z','zwerr','count']
-titles = [r'$z_{true}$',r'$z$',r'$z_{\rm error}$','Counts']
-limits = [[0,4],[0,4],[-0.5,0.5],[0,50]]
-for ii in range(4):
-    som_data = estimate_som_map(df_zspec, plot_arg=columns[ii], nx=40, ny=40)
-    im = axs[0,ii].imshow(som_data, vmin=limits[ii][0], vmax=limits[ii][1], cmap='viridis')  # Choose an appropriate colormap
-    axs[0, ii].set_title(f'{titles[ii]}', fontsize=18)
-    if ii==0:
-        axs[0, 0].set_ylabel(r'$y$', fontsize=14)
-    elif ii==1:
-        cbar_ax = fig.add_axes([0.49, 0.11, 0.01, 0.77])
-        fig.colorbar(im, cax=cbar_ax)
-    elif ii==2:
-        cbar_ax = fig.add_axes([0.685, 0.11, 0.01, 0.77])
-        fig.colorbar(im, cax=cbar_ax)
-    elif ii==3:
-        cbar_ax = fig.add_axes([0.885, 0.11, 0.01, 0.77])
-        fig.colorbar(im, cax=cbar_ax)
-for jj in range(4):
-    som_data = estimate_som_map(df_l15, plot_arg=columns[jj], nx=40, ny=40)
-    im = axs[1,jj].imshow(som_data, vmin=limits[jj][0], vmax=limits[jj][1], cmap='viridis')  # Choose an appropriate colormap
-    #axs[1, jj].set_title(f'{titles[jj]}', fontsize=14)
-    #axs[1, jj].set_xlabel(r'$x$', fontsize=14)
-for kk in range(4):
-    som_data = estimate_som_map(df_l15_DA, plot_arg=columns[kk], nx=40, ny=40)
-    im = axs[2,kk].imshow(som_data, vmin=limits[kk][0], vmax=limits[kk][1], cmap='viridis')  # Choose an appropriate colormap
-    #axs[2, kk].set_title(f'{titles[kk]}', fontsize=14)
-    #axs[2, kk].set_xlabel(r'$x$', fontsize=14)
-for rr in range(4):
-    som_data = estimate_som_map(df_l15_euclid_da, plot_arg=columns[rr], nx=40, ny=40)
-    im = axs[3,rr].imshow(som_data, vmin=limits[rr][0], vmax=limits[rr][1], cmap='viridis')  # Choose an appropriate colormap
-    #axs[3, rr].set_title(f'{titles[rr]}', fontsize=14)
-    #axs[3, rr].set_xlabel(r'$x$', fontsize=14)
-for ll in range(4):
-    som_data = estimate_som_map(df_l15_euclid_cut, plot_arg=columns[ll], nx=40, ny=40)
-    im = axs[4,ll].imshow(som_data, vmin=limits[ll][0], vmax=limits[ll][1], cmap='viridis')  # Choose an appropriate colormap
-    #axs[4, ll].set_title(f'{titles[ll]}', fontsize=14)
-    axs[4, ll].set_xlabel(r'$x$', fontsize=14)
-for ll in range(4):
-    som_data = estimate_som_map(df_l15_euclid_cut_da, plot_arg=columns[ll], nx=40, ny=40)
-    im = axs[5,ll].imshow(som_data, vmin=limits[ll][0], vmax=limits[ll][1], cmap='viridis')  # Choose an appropriate colormap
-    #axs[4, ll].set_title(f'{titles[ll]}', fontsize=14)
-    axs[5, ll].set_xlabel(r'$x$', fontsize=14)
-axs[0, 0].set_ylabel(r'$y$', fontsize=14)
-axs[1, 0].set_ylabel(r'$y$', fontsize=14)
-axs[2, 0].set_ylabel(r'$y$', fontsize=14)
-axs[3, 0].set_ylabel(r'$y$', fontsize=14)
-axs[4, 0].set_ylabel(r'$y$', fontsize=14)
-axs[5, 0].set_ylabel(r'$y$', fontsize=14)
-fig.text(0.09, 0.815, r'$z_{\rm s}$ sample', va='center', rotation='vertical', fontsize=16)
-fig.text(0.09, 0.69, r'L15 sample', va='center', rotation='vertical', fontsize=16)
-fig.text(0.09, 0.56, r'L15 sample + DA', va='center', rotation='vertical', fontsize=14)
-fig.text(0.09, 0.44, r'$Euclid$ sample + DA', va='center', rotation='vertical', fontsize=14)
-fig.text(0.09, 0.3, r'$Euclid$ sample + QC', va='center', rotation='vertical', fontsize=14)
-fig.text(0.09, 0.17, r'$Euclid$ sample + DA + QC', va='center', rotation='vertical', fontsize=13)
-plt.savefig('SOM_colourspace.pdf', format='pdf', bbox_inches='tight', dpi=300)
-# -

notebooks/Table_metrics.py DELETED Viewed

@@ -1,148 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     text_representation:
-#       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: insight
-#     language: python
-#     name: insight
-# ---
-# # TABLE METRICS
-# %load_ext autoreload
-# %autoreload 2
-import pandas as pd
-import numpy as np
-import os
-import torch
-from scipy import stats
-from astropy.io import fits
-from astropy.table import Table
-#matplotlib settings
-from matplotlib import rcParams
-import matplotlib.pyplot as plt
-rcParams["mathtext.fontset"] = "stix"
-rcParams["font.family"] = "STIXGeneral"
-#insight modules
-import sys
-sys.path.append('../temps')
-#from insight_arch import EncoderPhotometry, MeasureZ
-#from insight import Insight_module
-from archive import archive
-from utils import nmad, select_cut
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-# ## LOAD DATA
-#define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
-# +
-filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
-cat = Table(hdu_list[1].data).to_pandas()
-cat = cat[cat['FLAG_PHOT']==0]
-cat = cat[cat['mu_class_L07']==1]
-cat['SNR_VIS'] = cat.FLUX_VIS / cat.FLUXERR_VIS
-# -
-cat = cat[cat.SNR_VIS>10]
-ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
-specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
-ID = cat['ID']
-VISmag = cat['MAG_VIS']
-zsflag = cat['reliable_S15']
-cat['ztarget']=ztarget
-cat['specz_or_photo']=specz_or_photo
-cat = cat[cat.ztarget>0]
-# ### EXTRACT PHOTOMETRY
-photoz_archive = archive(path = parent_dir,only_zspec=False)
-f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
-col, colerr = photoz_archive._to_colors(f, ferr)
-# ### MEASURE CATALOGUE
-# +
-# Initialize an empty dictionary to store DataFrames
-lab='DA'
-nn_features = EncoderPhotometry()
-nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-nn_z = MeasureZ(num_gauss=6)
-nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-temps = Temps_module(nn_features, nn_z)
-z,zerr, pz, flag, odds = temps.get_pz(input_data=torch.Tensor(col),
-                            return_pz=True)
-# Create a DataFrame with the desired columns
-df = pd.DataFrame(np.c_[z, flag, odds, cat.ztarget, cat.reliable_S15, cat.specz_or_photo],
-                  columns=['z','zflag', 'odds' ,'ztarget','reliable_S15', 'specz_or_photo'])
-# Calculate additional columns or operations if needed
-df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
-# Drop any rows with NaN values
-df = df.dropna()
-# -
-# ### SPECZ SAMPLE
-df_specz = df[(df.reliable_S15==1)&(df.specz_or_photo==0)]
-# +
-df_selected, cut, dfcuts  = select_cut(df_specz,
-                          completenss_lim=None,
-                          nmad_lim=0.055,
-                          outliers_lim=None,
-                          return_df=True)
-# -
-print(dfcuts.to_latex(float_format="%.3f",
-                columns=['Nobj','completeness', 'nmad', 'eta'],
-                index=False
-               ))
-# ### EUCLID SAMPLE
-df_euclid = df[(df.z >0.2)&(df.z < 2.6)]
-# +
-df_selected, cut, dfcuts  = select_cut(df_euclid,
-                          completenss_lim=None,
-                          nmad_lim=0.055,
-                          outliers_lim=None,
-                          return_df=True)
-# -
-print(dfcuts.to_latex(float_format="%.3f",
-                columns=['Nobj','completeness', 'nmad', 'eta'],
-                index=False
-               ))

temps/archive.py CHANGED Viewed

@@ -1,18 +1,18 @@
 import numpy as np
 import pandas as pd
 from astropy.io import fits
-import os
 from astropy.table import Table
 from scipy.spatial import KDTree
-import matplotlib.pyplot as plt
-from matplotlib import rcParams
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
-class archive():
     def __init__(self, path,
                  aperture=2,
                  drop_stars=True,
@@ -21,31 +21,42 @@ class archive():
                  extinction_corr=True,
                  only_zspec=True,
                  all_apertures=False,
-                 target_test='specz', flags_kept=[3,3.1,3.4,3.5,4]):
         self.aperture = aperture
-        self.all_apertures=all_apertures
-        self.flags_kept=flags_kept
-        filename_calib='euclid_cosmos_DC2_S1_v2.1_calib_clean.fits'
-        filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-        hdu_list = fits.open(os.path.join(path,filename_calib))
-        cat = Table(hdu_list[1].data).to_pandas()
-        cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
-        hdu_list = fits.open(os.path.join(path,filename_valid))
-        cat_test = Table(hdu_list[1].data).to_pandas()
         if drop_stars==True:
             cat = cat[cat.mu_class_L07==1]
             cat_test = cat_test[cat_test.mu_class_L07==1]
         if clean_photometry==True:
             cat = self._clean_photometry(cat)
             cat_test = self._clean_photometry(cat_test)
@@ -216,9 +227,11 @@ class archive():
         if only_zspec:
             catalogue = self._select_only_zspec(catalogue, cat_flag='Calib')
             catalogue = self._clean_zspec_sample(catalogue, flags_kept=self.flags_kept)
         else:
             catalogue = self._take_zspec_and_photoz(catalogue, cat_flag='Calib')
@@ -233,9 +246,11 @@ class archive():
         if extinction_corr==True:
             f = self._correct_extinction(catalogue,f)
         if convert_colors==True:
             col, colerr = self._to_colors(f, ferr)
             col_DA, colerr_DA = self._to_colors(f_DA, ferr_DA)

 import numpy as np
 import pandas as pd
 from astropy.io import fits
 from astropy.table import Table
 from scipy.spatial import KDTree
+from matplotlib import pyplot as plt
+from matplotlib import rcParams
+from pathlib import Path
+from loguru import logger
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
+class Archive:
     def __init__(self, path,
                  aperture=2,
                  drop_stars=True,
                  extinction_corr=True,
                  only_zspec=True,
                  all_apertures=False,
+                 target_test='specz', flags_kept=[3, 3.1, 3.4, 3.5, 4]):
+        logger.info("Starting archive")
         self.aperture = aperture
+        self.all_apertures = all_apertures
+        self.flags_kept = flags_kept
+        filename_calib = 'euclid_cosmos_DC2_S1_v2.1_calib_clean.fits'
+        filename_valid = 'euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+        # Use Path for file handling
+        path_calib = Path(path) / filename_calib
+        path_valid = Path(path) / filename_valid
+        # Open the calibration FITS file
+        with fits.open(path_calib) as hdu_list:
+            cat = Table(hdu_list[1].data).to_pandas()
+            cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
+        # Open the validation FITS file
+        with fits.open(path_valid) as hdu_list:
+            cat_test = Table(hdu_list[1].data).to_pandas()
+        # Store the catalogs for later use
+        self.cat = cat
+        self.cat_test = cat_test
         if drop_stars==True:
+            logger.info("dropping stars...")
             cat = cat[cat.mu_class_L07==1]
             cat_test = cat_test[cat_test.mu_class_L07==1]
         if clean_photometry==True:
+            logger.info("cleaning stars...")
             cat = self._clean_photometry(cat)
             cat_test = self._clean_photometry(cat_test)
         if only_zspec:
+            logger.info("Selecting only galaxies with spectroscopic redshift")
             catalogue = self._select_only_zspec(catalogue, cat_flag='Calib')
             catalogue = self._clean_zspec_sample(catalogue, flags_kept=self.flags_kept)
         else:
+            logger.info("Selecting galaxies with spectroscopic redshift and high-precision photo-z")
             catalogue = self._take_zspec_and_photoz(catalogue, cat_flag='Calib')
         if extinction_corr==True:
+            logger.info("Correcting MW extinction")
             f = self._correct_extinction(catalogue,f)
         if convert_colors==True:
+            logger.info("Converting to colors")
             col, colerr = self._to_colors(f, ferr)
             col_DA, colerr_DA = self._to_colors(f_DA, ferr_DA)

temps/plots.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
-from utils import nmad
 import numpy as np
 import matplotlib.pyplot as plt
@@ -181,68 +181,7 @@ def plot_PIT(pit_list_1, pit_list_2 = None, pit_list_3=None, sample='specz', lab
     # Show the plot
     plt.show()
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy import stats
-def plot_photoz(df_list, nbins, xvariable, metric, type_bin='bin',label_list=None, samp='zs', save=False):
-    #plot properties
-    plt.rcParams['font.family'] = 'serif'
-    plt.rcParams['font.size'] = 12
-    bin_edges = stats.mstats.mquantiles(df_list[0][xvariable].values, np.linspace(0.05, 1, nbins))
-    print(bin_edges)
-    cmap = plt.get_cmap('Dark2')  # Choose a colormap for coloring lines
-    plt.figure(figsize=(6, 5))
-    ls = ['--',':','-']
-    for i, df in enumerate(df_list):
-        ydata, xlab = [], []
-        for k in range(len(bin_edges)-1):
-            edge_min = bin_edges[k]
-            edge_max = bin_edges[k+1]
-            mean_mag = (edge_max + edge_min) / 2
-            if type_bin == 'bin':
-                df_plot = df[(df[xvariable] > edge_min) & (df[xvariable] < edge_max)]
-            elif type_bin == 'cum':
-                df_plot = df[(df[xvariable] < edge_max)]
-            else:
-                raise ValueError("Only type_bin=='bin' for binned and 'cum' for cumulative are supported")
-            xlab.append(mean_mag)
-            if metric == 'sig68':
-                ydata.append(sigma68(df_plot.zwerr))
-            elif metric == 'bias':
-                ydata.append(np.mean(df_plot.zwerr))
-            elif metric == 'nmad':
-                ydata.append(nmad(df_plot.zwerr))
-            elif metric == 'outliers':
-                ydata.append(len(df_plot[np.abs(df_plot.zwerr) > 0.15]) / len(df_plot)*100)
-        print(ydata)
-        color = cmap(i)  # Get a different color for each dataframe
-        plt.plot(xlab, ydata,marker='.', lw=1, label=f'{label_list[i]}', color=color, ls=ls[i])
-    if xvariable == 'VISmag':
-        xvariable_lab = 'VIS'
-    plt.ylabel(f'{metric} $[\\Delta z]$', fontsize=18)
-    plt.xlabel(f'{xvariable_lab}', fontsize=16)
-    plt.grid(False)
-    plt.legend()
-    if save==True:
-        plt.savefig(f'{metric}_{xvariable}_{samp}.pdf', dpi=300, bbox_inches='tight')
-    plt.show()
 def plot_nz(df_list,
@@ -336,3 +275,43 @@ def plot_crps(crps_list_1, crps_list_2 = None, crps_list_3=None, labels=None,  s
     # Show the plot
     plt.show()

 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
+from temps.utils import nmad
 import numpy as np
 import matplotlib.pyplot as plt
     # Show the plot
     plt.show()
 def plot_nz(df_list,
     # Show the plot
     plt.show()
+def plot_nz(df, bins=np.arange(0,5,0.2)):
+    kwargs=dict( bins=bins,alpha=0.5)
+    plt.hist(df.zs.values, color='grey', ls='-' ,**kwargs)
+    counts, _, =np.histogram(df.z.values, bins=bins)
+    plt.plot((bins[:-1]+bins[1:])*0.5,counts, color ='purple')
+    #plt.legend(fontsize=14)
+    plt.xlabel(r'Redshift', fontsize=14)
+    plt.ylabel(r'Counts', fontsize=14)
+    plt.yscale('log')
+    plt.show()
+    return
+def plot_scatter(df, sample='specz', save=True):
+    # Calculate the point density
+    xy = np.vstack([df.zs.values,df.z.values])
+    zd = gaussian_kde(xy)(xy)
+    fig, ax = plt.subplots()
+    plt.scatter(df.zs.values, df.z.values,c=zd, s=1)
+    plt.xlim(0,5)
+    plt.ylim(0,5)
+    plt.xlabel(r'$z_{\rm s}$', fontsize = 14)
+    plt.ylabel('$z$', fontsize = 14)
+    plt.xticks(fontsize = 12)
+    plt.yticks(fontsize = 12)
+    if save==True:
+        plt.savefig(f'{sample}_scatter.pdf', dpi = 300, bbox_inches='tight')
+    plt.show()

temps/temps.py CHANGED Viewed

@@ -1,249 +1,267 @@
 import torch
-from torch.utils.data import DataLoader, dataset, TensorDataset
 from torch import nn, optim
 from torch.optim import lr_scheduler
-import numpy as np
-import pandas as pd
-from astropy.io import fits
-import os
-from astropy.table import Table
-from scipy.spatial import KDTree
-from scipy.special import erf
 from scipy.stats import norm
-import sys
-sys.path.append('/.')
-from utils import maximum_mean_discrepancy, compute_kernel
-class Temps_module():
-    """ Define class"""
-    def __init__(self, modelF, modelZ, batch_size=100,rejection_param=1, da=True, verbose=False):
-        self.modelZ=modelZ
-        self.modelF=modelF
-        self.da=da
-        self.verbose=verbose
-        self.ngaussians=modelZ.ngaussians
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.batch_size=batch_size
-        self.rejection_parameter=rejection_param
-    def _get_dataloaders(self, input_data, target_data, input_data_DA, val_fraction=0.1):
         input_data = torch.Tensor(input_data)
         target_data = torch.Tensor(target_data)
-        if input_data_DA is not None:
-            input_data_DA = torch.Tensor(input_data_DA)
-        else:
-            input_data_DA = input_data.clone()
-        dataset = TensorDataset(input_data, input_data_DA, target_data)
-        trainig_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(len(dataset)*(1-val_fraction)), int(len(dataset)*val_fraction)+1])
-        loader_train = DataLoader(trainig_dataset, batch_size=self.batch_size, shuffle = True)
-        loader_val = DataLoader(val_dataset, batch_size=64, shuffle = True)
         return loader_train, loader_val
-    def _loss_function(self,mean, std, logmix, true):
-        log_prob =   logmix - 0.5*(mean - true[:,None]).pow(2) / std.pow(2) - torch.log(std)
-        log_prob = torch.logsumexp(log_prob, 1)
         loss = -log_prob.mean()
-        return loss
-    def _loss_function_DA(self,f1, f2):
-        kl_loss = nn.KLDivLoss(reduction="batchmean",log_target=True)
         loss = kl_loss(f1, f2)
-        loss = torch.log(loss)
-        #print('f1',f1)
-        #print('f2',f2)
-        return loss
-    def _to_numpy(self,x):
         return x.detach().cpu().numpy()
-    def train(self,input_data,
-              input_data_DA,
-              target_data,
-              nepochs=10,
-              step_size = 100,
-              val_fraction=0.1,
-              lr=1e-3,
-             weight_decay=0):
-        self.modelZ = self.modelZ.train()
-        self.modelF = self.modelF.train()
-        loader_train, loader_val = self._get_dataloaders(input_data, target_data, input_data_DA, val_fraction=0.1)
-        optimizerZ = optim.Adam(self.modelZ.parameters(), lr=lr, weight_decay=weight_decay)
-        optimizerF = optim.Adam(self.modelF.parameters(), lr=lr, weight_decay=weight_decay)
-        schedulerZ = torch.optim.lr_scheduler.StepLR(optimizerZ, step_size=step_size, gamma =0.1)
-        schedulerF = torch.optim.lr_scheduler.StepLR(optimizerF, step_size=step_size, gamma =0.1)
-        self.modelZ = self.modelZ.to(self.device)
-        self.modelF = self.modelF.to(self.device)
-        self.loss_train, self.loss_validation = [],[]
-        for epoch in range(nepochs):
-            for input_data, input_data_da, target_data  in loader_train:
-                _loss_train, _loss_validation = [],[]
-                input_data = input_data.to(self.device)
-                target_data = target_data.to(self.device)
                 if self.da:
                     input_data_da = input_data_da.to(self.device)
-                optimizerF.zero_grad()
-                optimizerZ.zero_grad()
-                features = self.modelF(input_data)
-                if self.da:
-                    features_DA = self.modelF(input_data_da)
-                mu, logsig, logmix_coeff = self.modelZ(features)
-                logsig = torch.clamp(logsig,-6,2)
                 sig = torch.exp(logsig)
-                lossZ = self._loss_function(mu, sig, logmix_coeff, target_data)
-                if self.da:
-                    lossDA = maximum_mean_discrepancy(features, features_DA, kernel_type='rbf')
-                    lossDA = lossDA.sum()
-                    loss = lossZ +1e3*lossDA
-                else:
-                    loss = lossZ
-                _loss_train.append(lossZ.item())
-                loss.backward()
-                optimizerF.step()
-                optimizerZ.step()
-            schedulerF.step()
-            schedulerZ.step()
-            self.loss_train.append(np.mean(_loss_train))
-            for input_data, _, target_data in loader_val:
                 input_data = input_data.to(self.device)
                 target_data = target_data.to(self.device)
-                features = self.modelF(input_data)
-                mu, logsig, logmix_coeff = self.modelZ(features)
-                logsig = torch.clamp(logsig,-6,2)
                 sig = torch.exp(logsig)
                 loss_val = self._loss_function(mu, sig, logmix_coeff, target_data)
                 _loss_validation.append(loss_val.item())
-            self.loss_validation.append(np.mean(_loss_validation))
-            if self.verbose:
-                print(f'training_loss:{loss}',f'testing_loss:{loss_val}')
     def get_features(self, input_data):
-        self.modelF = self.modelF.eval()
-        self.modelF = self.modelF.to(self.device)
         input_data = input_data.to(self.device)
-        features = self.modelF(input_data)
-        return features.detach().cpu().numpy()
-    def get_pz(self,input_data, return_pz=True, return_flag=True, retrun_odds=False):
-        self.modelZ = self.modelZ.eval()
-        self.modelZ = self.modelZ.to(self.device)
-        self.modelF = self.modelF.eval()
-        self.modelF = self.modelF.to(self.device)
         input_data = input_data.to(self.device)
-        features = self.modelF(input_data)
-        mu, logsig, logmix_coeff = self.modelZ(features)
-        logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
         mix_coeff = torch.exp(logmix_coeff)
-        z = (mix_coeff * mu).sum(1)
-        zerr = torch.sqrt( (mix_coeff * sig**2).sum(1) + (mix_coeff * (mu - mu.mean(1)[:,None])**2).sum(1))
-        mu,  mix_coeff, sig = mu.detach().cpu().numpy(),  mix_coeff.detach().cpu().numpy(), sig.detach().cpu().numpy()
-        if return_pz==True:
-            zgrid = np.linspace(0, 5, 1000)
-            pdf_mixture = np.zeros(shape=(len(input_data), len(zgrid)))
-            for ii in range(len(input_data)):
-                for i in range(self.ngaussians):
-                    pdf_mixture[ii] += mix_coeff[ii,i] * norm.pdf(zgrid, mu[ii,i], sig[ii,i])
-            if return_flag==True:
-                #narrow peak
-                pdf_mixture = pdf_mixture / pdf_mixture.sum(1)[:,None]
-                diff_matrix = np.abs(self._to_numpy(z)[:,None] - zgrid[None,:])
-                #odds
-                idx_peak = np.argmax(pdf_mixture,1)
-                zpeak = zgrid[idx_peak]
-                diff_matrix_upper = np.abs((zpeak+0.05)[:,None] - zgrid[None,:])
-                diff_matrix_lower = np.abs((zpeak-0.05)[:,None] - zgrid[None,:])
-                idx = np.argmin(diff_matrix,1)
-                idx_upper = np.argmin(diff_matrix_upper,1)
-                idx_lower = np.argmin(diff_matrix_lower,1)
-                p_z_x = np.zeros(shape=(len(z)))
-                odds = np.zeros(shape=(len(z)))
-                for ii in range(len(z)):
-                    p_z_x[ii] = pdf_mixture[ii,idx[ii]]
-                    odds[ii] = pdf_mixture[ii,:idx_upper[ii]].sum() - pdf_mixture[ii,:idx_lower[ii]].sum()
-                return self._to_numpy(z),self._to_numpy(zerr), pdf_mixture, p_z_x, odds
-            else:
-                return self._to_numpy(z),self._to_numpy(zerr), pdf_mixture
         else:
-            return self._to_numpy(z),self._to_numpy(zerr)
-    def pit(self, input_data, target_data):
         pit_list = []
-        self.modelF = self.modelF.eval()
-        self.modelF = self.modelF.to(self.device)
-        self.modelZ = self.modelZ.eval()
-        self.modelZ = self.modelZ.to(self.device)
         input_data = input_data.to(self.device)
-        features = self.modelF(input_data)
-        mu, logsig, logmix_coeff = self.modelZ(features)
         logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
@@ -259,7 +277,8 @@ class Temps_module():
         return pit_list
-    def crps(self, input_data, target_data):
         def measure_crps(cdf, t):
             zgrid = np.linspace(0,4,1000)
@@ -273,16 +292,16 @@ class Temps_module():
         crps_list = []
-        self.modelF = self.modelF.eval()
-        self.modelF = self.modelF.to(self.device)
-        self.modelZ = self.modelZ.eval()
-        self.modelZ = self.modelZ.to(self.device)
         input_data = input_data.to(self.device)
-        features = self.modelF(input_data)
-        mu, logsig, logmix_coeff = self.modelZ(features)
         logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
@@ -294,21 +313,19 @@ class Temps_module():
         z = (mix_coeff * mu).sum(1)
         x = np.linspace(0, 4, 1000)
-        pdf_mixture = np.zeros(shape=(len(target_data), len(x)))
         for ii in range(len(input_data)):
             for i in range(6):
-                pdf_mixture[ii] += mix_coeff[ii,i] * norm.pdf(x, mu[ii,i], sig[ii,i])
-        pdf_mixture = pdf_mixture / pdf_mixture.sum(1)[:,None]
-        cdf_mixture = np.cumsum(pdf_mixture,1)
-        crps_value = measure_crps(cdf_mixture, target_data)
         return crps_value

+import numpy as np
+import pandas as pd
 import torch
 from torch import nn, optim
+from torch.utils.data import DataLoader, TensorDataset
 from torch.optim import lr_scheduler
 from scipy.stats import norm
+from loguru import logger
+from tqdm import tqdm  # Import tqdm for progress bars
+# Local imports
+from temps.utils import maximum_mean_discrepancy
+class TempsModule:
+    """Class for managing temperature-related models and training."""
+    def __init__(
+        self,
+        model_f,
+        model_z,
+        batch_size=100,
+        rejection_param=1,
+        da=True,
+        verbose=False,
+    ):
+        self.model_z = model_z
+        self.model_f = model_f
+        self.da = da
+        self.verbose = verbose
+        self.ngaussians = model_z.ngaussians
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.batch_size = batch_size
+        self.rejection_parameter = rejection_param
+    def _get_dataloaders(
+        self, input_data, target_data, input_data_da=None, val_fraction=0.1
+    ):
+        """Create training and validation dataloaders."""
         input_data = torch.Tensor(input_data)
         target_data = torch.Tensor(target_data)
+        input_data_da = (
+            torch.Tensor(input_data_da)
+            if input_data_da is not None
+            else input_data.clone()
+        )
+        dataset = TensorDataset(input_data, input_data_da, target_data)
+        train_dataset, val_dataset = torch.utils.data.random_split(
+            dataset,
+            [int(len(dataset) * (1 - val_fraction)), int(len(dataset) * val_fraction)],
+        )
+        loader_train = DataLoader(
+            train_dataset, batch_size=self.batch_size, shuffle=True
+        )
+        loader_val = DataLoader(val_dataset, batch_size=64, shuffle=True)
         return loader_train, loader_val
+    def _loss_function(self, mean, std, logmix, true):
+        """Compute the loss function."""
+        log_prob = (
+            logmix - 0.5 * (mean - true[:, None]).pow(2) / std.pow(2) - torch.log(std)
+        )
+        log_prob = torch.logsumexp(log_prob, dim=1)
         loss = -log_prob.mean()
+        return loss
+    def _loss_function_da(self, f1, f2):
+        """Compute the KL divergence loss for domain adaptation."""
+        kl_loss = nn.KLDivLoss(reduction="batchmean", log_target=True)
         loss = kl_loss(f1, f2)
+        return torch.log(loss)
+    def _to_numpy(self, x):
+        """Convert a tensor to a NumPy array."""
         return x.detach().cpu().numpy()
+    def train(
+        self,
+        input_data,
+        input_data_da,
+        target_data,
+        nepochs=10,
+        step_size=100,
+        val_fraction=0.1,
+        lr=1e-3,
+        weight_decay=0,
+    ):
+        """Train the models using provided data."""
+        self.model_z.train()
+        self.model_f.train()
+        loader_train, loader_val = self._get_dataloaders(
+            input_data, target_data, input_data_da, val_fraction
+        )
+        optimizer_z = optim.Adam(
+            self.model_z.parameters(), lr=lr, weight_decay=weight_decay
+        )
+        optimizer_f = optim.Adam(
+            self.model_f.parameters(), lr=lr, weight_decay=weight_decay
+        )
+        scheduler_z = lr_scheduler.StepLR(optimizer_z, step_size=step_size, gamma=0.1)
+        scheduler_f = lr_scheduler.StepLR(optimizer_f, step_size=step_size, gamma=0.1)
+        self.model_z.to(self.device)
+        self.model_f.to(self.device)
+        loss_train, loss_validation = [], []
+        for epoch in range(nepochs):
+            _loss_train, _loss_validation = [], []
+            logger.info(f"Epoch {epoch + 1}/{nepochs} starting...")
+            for input_data, input_data_da, target_data in tqdm(
+                loader_train, desc="Training", unit="batch"
+            ):
+                input_data, target_data = input_data.to(self.device), target_data.to(
+                    self.device
+                )
                 if self.da:
                     input_data_da = input_data_da.to(self.device)
+                optimizer_f.zero_grad()
+                optimizer_z.zero_grad()
+                features = self.model_f(input_data)
+                features_da = self.model_f(input_data_da) if self.da else None
+                mu, logsig, logmix_coeff = self.model_z(features)
+                logsig = torch.clamp(logsig, -6, 2)
                 sig = torch.exp(logsig)
+                loss_z = self._loss_function(mu, sig, logmix_coeff, target_data)
+                loss = loss_z + (
+                    1e3
+                    * maximum_mean_discrepancy(
+                        features, features_da, kernel_type="rbf"
+                    ).sum()
+                    if self.da
+                    else 0
+                )
+                _loss_train.append(loss_z.item())
+                loss.backward()
+                optimizer_f.step()
+                optimizer_z.step()
+            scheduler_f.step()
+            scheduler_z.step()
+            loss_train.append(np.mean(_loss_train))
+            _loss_validation = self._validate(loader_val, target_data)
+            logger.info(
+                f"Epoch {epoch + 1}: Training Loss: {np.mean(_loss_train):.4f}, Validation Loss: {np.mean(_loss_validation):.4f}"
+            )
+    def _validate(self, loader_val, target_data):
+        """Validate the model on the validation dataset."""
+        self.model_z.eval()
+        self.model_f.eval()
+        _loss_validation = []
+        with torch.no_grad():
+            for input_data, _, target_data in tqdm(
+                loader_val, desc="Validating", unit="batch"
+            ):
                 input_data = input_data.to(self.device)
                 target_data = target_data.to(self.device)
+                features = self.model_f(input_data)
+                mu, logsig, logmix_coeff = self.model_z(features)
+                logsig = torch.clamp(logsig, -6, 2)
                 sig = torch.exp(logsig)
                 loss_val = self._loss_function(mu, sig, logmix_coeff, target_data)
                 _loss_validation.append(loss_val.item())
+        return _loss_validation
     def get_features(self, input_data):
+        """Get features from the model."""
+        self.model_f.eval()
         input_data = input_data.to(self.device)
+        features = self.model_f(input_data)
+        return self._to_numpy(features)
+    def get_pz(self, input_data, return_pz=True, return_flag=True, return_odds=False):
+        """Get the predicted z values and their uncertainties."""
+        logger.info("Predicting photo-z for the input galaxies...")
+        self.model_z.eval()
+        self.model_f.eval()
         input_data = input_data.to(self.device)
+        features = self.model_f(input_data)
+        mu, logsig, logmix_coeff = self.model_z(features)
+        logsig = torch.clamp(logsig, -6, 2)
         sig = torch.exp(logsig)
         mix_coeff = torch.exp(logmix_coeff)
+        z = (mix_coeff * mu).sum(dim=1)
+        zerr = torch.sqrt(
+            (mix_coeff * sig**2).sum(dim=1)
+            + (mix_coeff * (mu - mu.mean(dim=1, keepdim=True)) ** 2).sum(dim=1)
+        )
+        mu, mix_coeff, sig = map(self._to_numpy, (mu, mix_coeff, sig))
+        if return_pz:
+            logger.info("Returning p(z)")
+            return self._calculate_pdf(z, mu, sig, mix_coeff, return_flag)
         else:
+            return self._to_numpy(z), self._to_numpy(zerr)
+    def _calculate_pdf(self, z, mu, sig, mix_coeff, return_flag):
+        """Calculate the probability density function."""
+        zgrid = np.linspace(0, 5, 1000)
+        pz = np.zeros((len(z), len(zgrid)))
+        for ii in range(len(z)):
+            for i in range(self.ngaussians):
+                pz[ii] += mix_coeff[ii, i] * norm.pdf(
+                    zgrid, mu[ii, i], sig[ii, i]
+                )
+        if return_flag:
+            logger.info("Calculating and returning ODDS")
+            pz /= pz.sum(axis=1, keepdims=True)
+            return self._calculate_odds(z, pz, zgrid)
+        return self._to_numpy(z), pz
+    def _calculate_odds(self, z, pz, zgrid):
+        """Calculate odds based on the PDF."""
+        logger.info('Calculating ODDS values')
+        diff_matrix = np.abs(self._to_numpy(z)[:, None] - zgrid[None, :])
+        idx_peak = np.argmax(pz, axis=1)
+        zpeak = zgrid[idx_peak]
+        idx_upper = np.argmin(np.abs((zpeak + 0.05)[:, None] - zgrid[None, :]), axis=1)
+        idx_lower = np.argmin(np.abs((zpeak - 0.05)[:, None] - zgrid[None, :]), axis=1)
+        odds = []
+        for jj in range(len(pz)):
+            odds.append(pz[jj,idx_lower[jj]:(idx_upper[jj]+1)].sum())
+        odds = np.array(odds)
+        return self._to_numpy(z), pz, odds
+    def calculate_pit(self, input_data, target_data):
+        logger.info('Calculating PIT values')
         pit_list = []
+        self.model_f = self.model_f.eval()
+        self.model_f = self.model_f.to(self.device)
+        self.model_z = self.model_z.eval()
+        self.model_z = self.model_z.to(self.device)
         input_data = input_data.to(self.device)
+        features = self.model_f(input_data)
+        mu, logsig, logmix_coeff = self.model_z(features)
         logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
         return pit_list
+    def calculate_crps(self, input_data, target_data):
+        logger.info('Calculating CRPS values')
         def measure_crps(cdf, t):
             zgrid = np.linspace(0,4,1000)
         crps_list = []
+        self.model_f = self.model_f.eval()
+        self.model_f = self.model_f.to(self.device)
+        self.model_z = self.model_z.eval()
+        self.model_z = self.model_z.to(self.device)
         input_data = input_data.to(self.device)
+        features = self.model_f(input_data)
+        mu, logsig, logmix_coeff = self.model_z(features)
         logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
         z = (mix_coeff * mu).sum(1)
         x = np.linspace(0, 4, 1000)
+        pz = np.zeros(shape=(len(target_data), len(x)))
         for ii in range(len(input_data)):
             for i in range(6):
+                pz[ii] += mix_coeff[ii,i] * norm.pdf(x, mu[ii,i], sig[ii,i])
+        pz = pz / pz.sum(1)[:,None]
+        cdf_z = np.cumsum(pz,1)
+        crps_value = measure_crps(cdf_z, target_data)
         return crps_value

temps/temps_arch.py CHANGED Viewed

@@ -20,52 +20,46 @@ class EncoderPhotometry(nn.Module):
             nn.Linear(50, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
-            nn.Linear(20, 10)
         )
     def forward(self, x):
         f = self.features(x)
-        f =  F.log_softmax(f, dim=1)
         return f
 class MeasureZ(nn.Module):
     def __init__(self, num_gauss=10, dropout_prob=0):
         super(MeasureZ, self).__init__()
-        self.ngaussians=num_gauss
         self.measure_mu = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
-            nn.Linear(20, num_gauss)
         )
         self.measure_coeffs = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
-            nn.Linear(20, num_gauss)
         )
         self.measure_sigma = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
-            nn.Linear(20, num_gauss)
         )
     def forward(self, f):
         mu = self.measure_mu(f)
         sigma = self.measure_sigma(f)
         logmix_coeff = self.measure_coeffs(f)
-        logmix_coeff = logmix_coeff - torch.logsumexp(logmix_coeff, 1)[:,None]
-        return mu, sigma, logmix_coeff

             nn.Linear(50, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
+            nn.Linear(20, 10),
         )
     def forward(self, x):
         f = self.features(x)
+        f = F.log_softmax(f, dim=1)
         return f
 class MeasureZ(nn.Module):
     def __init__(self, num_gauss=10, dropout_prob=0):
         super(MeasureZ, self).__init__()
+        self.ngaussians = num_gauss
         self.measure_mu = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
+            nn.Linear(20, num_gauss),
         )
         self.measure_coeffs = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
+            nn.Linear(20, num_gauss),
         )
         self.measure_sigma = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
+            nn.Linear(20, num_gauss),
         )
     def forward(self, f):
         mu = self.measure_mu(f)
         sigma = self.measure_sigma(f)
         logmix_coeff = self.measure_coeffs(f)
+        logmix_coeff = logmix_coeff - torch.logsumexp(logmix_coeff, 1)[:, None]
+        return mu, sigma, logmix_coeff

temps/utils.py CHANGED Viewed

@@ -3,113 +3,22 @@ import pandas as pd
 import matplotlib.pyplot as plt
 from scipy import stats
 import torch
-from scipy.stats import gaussian_kde
-def nmad(data):
-    return 1.4826 * np.median(np.abs(data - np.median(data)))
-def sigma68(data): return 0.5*(pd.Series(data).quantile(q = 0.84) - pd.Series(data).quantile(q = 0.16))
-def plot_photoz(df_list, nbins, xvariable, metric, type_bin='bin',label_list=None, samp='zs', save=False):
-    #plot properties
-    plt.rcParams['font.family'] = 'serif'
-    plt.rcParams['font.size'] = 12
-    bin_edges = stats.mstats.mquantiles(df_list[0][xvariable].values, np.linspace(0.05, 1, nbins))
-    print(bin_edges)
-    cmap = plt.get_cmap('Dark2')  # Choose a colormap for coloring lines
-    plt.figure(figsize=(6, 5))
-    for i, df in enumerate(df_list):
-        ydata, xlab = [], []
-        for k in range(len(bin_edges)-1):
-            edge_min = bin_edges[k]
-            edge_max = bin_edges[k+1]
-            mean_mag = (edge_max + edge_min) / 2
-            if type_bin == 'bin':
-                df_plot = df[(df[xvariable] > edge_min) & (df[xvariable] < edge_max)]
-            elif type_bin == 'cum':
-                df_plot = df[(df[xvariable] < edge_max)]
-            else:
-                raise ValueError("Only type_bin=='bin' for binned and 'cum' for cumulative are supported")
-            xlab.append(mean_mag)
-            if metric == 'sig68':
-                ydata.append(sigma68(df_plot.zwerr))
-            elif metric == 'bias':
-                ydata.append(np.mean(df_plot.zwerr))
-            elif metric == 'nmad':
-                ydata.append(nmad(df_plot.zwerr))
-            elif metric == 'outliers':
-                ydata.append(len(df_plot[np.abs(df_plot.zwerr) > 0.15]) / len(df_plot)*100)
-        print(ydata)
-        color = cmap(i)  # Get a different color for each dataframe
-        plt.plot(xlab, ydata, ls='-', marker='.', lw=1, label=f'{label_list[i]}', color=color)
-    if xvariable == 'VISmag':
-        xvariable_lab = 'VIS'
-    plt.ylabel(f'{metric} $[\\Delta z]$', fontsize=18)
-    plt.xlabel(f'{xvariable_lab}', fontsize=16)
-    plt.grid(False)
-    plt.legend()
-    if save==True:
-        plt.savefig(f'{metric}_{xvariable}_{samp}.pdf', dpi=300, bbox_inches='tight')
-    plt.show()
-def plot_nz(df, bins=np.arange(0,5,0.2)):
-    kwargs=dict( bins=bins,alpha=0.5)
-    plt.hist(df.zs.values, color='grey', ls='-' ,**kwargs)
-    counts, _, =np.histogram(df.z.values, bins=bins)
-    plt.plot((bins[:-1]+bins[1:])*0.5,counts, color ='purple')
-    #plt.legend(fontsize=14)
-    plt.xlabel(r'Redshift', fontsize=14)
-    plt.ylabel(r'Counts', fontsize=14)
-    plt.yscale('log')
-    plt.show()
-    return
-def plot_scatter(df, sample='specz', save=True):
-    # Calculate the point density
-    xy = np.vstack([df.zs.values,df.z.values])
-    zd = gaussian_kde(xy)(xy)
-    fig, ax = plt.subplots()
-    plt.scatter(df.zs.values, df.z.values,c=zd, s=1)
-    plt.xlim(0,5)
-    plt.ylim(0,5)
-    plt.xlabel(r'$z_{\rm s}$', fontsize = 14)
-    plt.ylabel('$z$', fontsize = 14)
-    plt.xticks(fontsize = 12)
-    plt.yticks(fontsize = 12)
-    if save==True:
-        plt.savefig(f'{sample}_scatter.pdf', dpi = 300, bbox_inches='tight')
-    plt.show()
-def maximum_mean_discrepancy(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num=5):
     """
     Compute the Maximum Mean Discrepancy (MMD) between two sets of samples.
@@ -130,7 +39,8 @@ def maximum_mean_discrepancy(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num
     mmd_loss = torch.mean(x_kernel) + torch.mean(y_kernel) - 2 * torch.mean(xy_kernel)
     return mmd_loss
-def compute_kernel(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num=5):
     """
     Compute the kernel matrix based on the chosen kernel type.
@@ -151,73 +61,77 @@ def compute_kernel(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num=5):
     x = x.unsqueeze(1).expand(x_size, y_size, dim)
     y = y.unsqueeze(0).expand(x_size, y_size, dim)
-    kernel_input = (x - y).pow(2).mean(2)  # Pairwise squared Euclidean distances
-    if kernel_type == 'linear':
         kernel_matrix = kernel_input
-    elif kernel_type == 'poly':
         kernel_matrix = (1 + kernel_input / kernel_mul).pow(kernel_num)
-    elif kernel_type == 'rbf':
         kernel_matrix = torch.exp(-kernel_input / (2 * kernel_mul**2))
-    elif kernel_type == 'sigmoid':
         kernel_matrix = torch.tanh(kernel_mul * kernel_input)
     else:
-        raise ValueError("Invalid kernel type. Supported types are 'linear', 'poly', 'rbf', and 'sigmoid'.")
     return kernel_matrix
-def select_cut(df,
-               completenss_lim=None,
-               nmad_lim = None,
-               outliers_lim=None,
-               return_df=False):
-    if (completenss_lim is None)&(nmad_lim is None)&(outliers_lim is None):
-        raise(ValueError("Select at least one cut"))
     elif sum(c is not None for c in [completenss_lim, nmad_lim, outliers_lim]) > 1:
         raise ValueError("Select only one cut at a time")
     else:
-        bin_edges = stats.mstats.mquantiles(df.zflag, np.arange(0,1.01,0.1))
-        scatter, eta, cmptnss, nobj = [],[],[], []
-        for k in range(len(bin_edges)-1):
             edge_min = bin_edges[k]
-            edge_max = bin_edges[k+1]
-            df_bin = df[(df.zflag > edge_min)]
-            cmptnss.append(np.round(len(df_bin)/len(df),2)*100)
             scatter.append(nmad(df_bin.zwerr))
-            eta.append(len(df_bin[np.abs(df_bin.zwerr)>0.15])/len(df_bin)*100)
             nobj.append(len(df_bin))
-        dfcuts = pd.DataFrame(data=np.c_[np.round(bin_edges[:-1],5), np.round(nobj,1), np.round(cmptnss,1), np.round(scatter,3), np.round(eta,2)], columns=['flagcut', 'Nobj','completeness', 'nmad', 'eta'])
     if completenss_lim is not None:
-        print('Selecting cut based on completeness')
-        selected_cut = dfcuts[dfcuts['completeness'] <= completenss_lim].iloc[0]
     elif nmad_lim is not None:
-        print('Selecting cut based on nmad')
-        selected_cut = dfcuts[dfcuts['nmad'] <= nmad_lim].iloc[0]
     elif outliers_lim is not None:
-        print('Selecting cut based on outliers')
-        selected_cut = dfcuts[dfcuts['eta'] <= outliers_lim].iloc[0]
-    print(f"This cut provides completeness of {selected_cut['completeness']}, nmad={selected_cut['nmad']} and eta={selected_cut['eta']}")
-    df_cut = df[(df.zflag > selected_cut['flagcut'])]
-    if return_df==True:
-        return df_cut, selected_cut['flagcut'], dfcuts
     else:
-        return selected_cut['flagcut'], dfcuts

 import matplotlib.pyplot as plt
 from scipy import stats
 import torch
+from loguru import logger
+def caluclate_eta(df):
+    return len(df[np.abs(df.zwerr)>0.15])/len(df) *100
+def nmad(data):
+    return 1.4826 * np.median(np.abs(data - np.median(data)))
+def sigma68(data):
+    return 0.5 * (pd.Series(data).quantile(q=0.84) - pd.Series(data).quantile(q=0.16))
+def maximum_mean_discrepancy(x, y, kernel_type="rbf", kernel_mul=2.0, kernel_num=5):
     """
     Compute the Maximum Mean Discrepancy (MMD) between two sets of samples.
     mmd_loss = torch.mean(x_kernel) + torch.mean(y_kernel) - 2 * torch.mean(xy_kernel)
     return mmd_loss
+def compute_kernel(x, y, kernel_type="rbf", kernel_mul=2.0, kernel_num=5):
     """
     Compute the kernel matrix based on the chosen kernel type.
     x = x.unsqueeze(1).expand(x_size, y_size, dim)
     y = y.unsqueeze(0).expand(x_size, y_size, dim)
+    kernel_input = (x - y).pow(2).mean(2)
+    if kernel_type == "linear":
         kernel_matrix = kernel_input
+    elif kernel_type == "poly":
         kernel_matrix = (1 + kernel_input / kernel_mul).pow(kernel_num)
+    elif kernel_type == "rbf":
         kernel_matrix = torch.exp(-kernel_input / (2 * kernel_mul**2))
+    elif kernel_type == "sigmoid":
         kernel_matrix = torch.tanh(kernel_mul * kernel_input)
     else:
+        raise ValueError(
+            "Invalid kernel type. Supported types are 'linear', 'poly', 'rbf', and 'sigmoid'."
+        )
     return kernel_matrix
+def select_cut(
+    df, completenss_lim=None, nmad_lim=None, outliers_lim=None, return_df=False
+):
+    if (completenss_lim is None) & (nmad_lim is None) & (outliers_lim is None):
+        raise (ValueError("Select at least one cut"))
     elif sum(c is not None for c in [completenss_lim, nmad_lim, outliers_lim]) > 1:
         raise ValueError("Select only one cut at a time")
     else:
+        bin_edges = stats.mstats.mquantiles(df.odds, np.arange(0, 1.01, 0.1))
+        scatter, eta, cmptnss, nobj = [], [], [], []
+        for k in range(len(bin_edges) - 1):
             edge_min = bin_edges[k]
+            edge_max = bin_edges[k + 1]
+            df_bin = df[(df.odds > edge_min)]
+            cmptnss.append(np.round(len(df_bin) / len(df), 2) * 100)
             scatter.append(nmad(df_bin.zwerr))
+            eta.append(len(df_bin[np.abs(df_bin.zwerr) > 0.15]) / len(df_bin) * 100)
             nobj.append(len(df_bin))
+        dfcuts = pd.DataFrame(
+            data=np.c_[
+                np.round(bin_edges[:-1], 5),
+                np.round(nobj, 1),
+                np.round(cmptnss, 1),
+                np.round(scatter, 3),
+                np.round(eta, 2),
+            ],
+            columns=["flagcut", "Nobj", "completeness", "nmad", "eta"],
+        )
     if completenss_lim is not None:
+        logger.info("Selecting cut based on completeness")
+        selected_cut = dfcuts[dfcuts["completeness"] <= completenss_lim].iloc[0]
     elif nmad_lim is not None:
+        logger.info("Selecting cut based on nmad")
+        selected_cut = dfcuts[dfcuts["nmad"] <= nmad_lim].iloc[0]
     elif outliers_lim is not None:
+        logger.info("Selecting cut based on outliers")
+        selected_cut = dfcuts[dfcuts["eta"] <= outliers_lim].iloc[0]
+    logger.info(
+        f"This cut provides completeness of {selected_cut['completeness']}, nmad={selected_cut['nmad']} and eta={selected_cut['eta']}"
+    )
+    df_cut = df[(df.odds > selected_cut["flagcut"])]
+    if return_df == True:
+        return df_cut, selected_cut["flagcut"], dfcuts
     else:
+        return selected_cut["flagcut"], dfcuts