Spaces:

lauracabayol
/

TEMPS

Runtime error

App Files Files Community

lauracabayol commited on Jul 27, 2023

Commit

c212435

0 Parent(s):

Archive code and network training

Browse files

Files changed (16) hide show

insight/.ipynb_checkpoints/archive-checkpoint.py +211 -0
insight/.ipynb_checkpoints/insight-checkpoint.py +166 -0
insight/.ipynb_checkpoints/insight_arch-checkpoint.py +81 -0
insight/.ipynb_checkpoints/utils-checkpoint.py +51 -0
insight/__pycache__/archive.cpython-310.pyc +0 -0
insight/__pycache__/archive.cpython-39.pyc +0 -0
insight/__pycache__/insight.cpython-310.pyc +0 -0
insight/__pycache__/insight.cpython-39.pyc +0 -0
insight/__pycache__/insight_arch.cpython-310.pyc +0 -0
insight/__pycache__/insight_arch.cpython-39.pyc +0 -0
insight/__pycache__/utils.cpython-310.pyc +0 -0
insight/__pycache__/utils.cpython-39.pyc +0 -0
insight/archive.py +211 -0
insight/insight.py +166 -0
insight/insight_arch.py +81 -0
insight/utils.py +51 -0

insight/.ipynb_checkpoints/archive-checkpoint.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import numpy as np
+import pandas as pd
+from astropy.io import fits
+import os
+from astropy.table import Table
+from scipy.spatial import KDTree
+import matplotlib.pyplot as plt
+from matplotlib import rcParams
+rcParams["mathtext.fontset"] = "stix"
+rcParams["font.family"] = "STIXGeneral"
+class archive():
+    def __init__(self, path, aperture=2, drop_stars=True, clean_photometry=True, convert_colors=True, extinction_corr=True, only_zspec=True, reliable_zspec=True):
+        self.aperture = aperture
+        self.weight_dict={(-99,0.99):0,
+             (1,1.99):0.5,
+             (2,2.99):0.75,
+             (3,4):1,
+             (9,9.99):0.25,
+             (10,10.99):0,
+             (11,11.99):0.5,
+             (12,12.99):0.75,
+             (13,14):1,
+             (14.01,40):0
+            }
+        filename_calib='euclid_cosmos_DC2_S1_v2.1_calib_clean.fits'
+        filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+        filename_gold='Export_Gold_2023_07_03.csv'
+        hdu_list = fits.open(os.path.join(path,filename_calib))
+        cat = Table(hdu_list[1].data).to_pandas()
+        hdu_list = fits.open(os.path.join(path,filename_valid))
+        cat_test = Table(hdu_list[1].data).to_pandas()
+        gold_sample = pd.read_csv(os.path.join(path,filename_gold))
+        #cat_test = self._match_gold_sample(cat_test,gold_sample)
+        if drop_stars==True:
+            cat = cat[cat.mu_class_L07==1]
+        if clean_photometry==True:
+            cat = self._clean_photometry(cat)
+            cat_test = self._clean_photometry(cat_test)
+        self._get_loss_weights(cat)
+        cat = cat[cat.w_Q_f_S15>0]
+        self._set_training_data(cat, only_zspec=only_zspec, reliable_zspec=reliable_zspec, extinction_corr=extinction_corr, convert_colors=convert_colors)
+        self._set_testing_data(cat_test, only_zspec=only_zspec, reliable_zspec='Total', extinction_corr=extinction_corr, convert_colors=convert_colors)
+        self._get_loss_weights(cat)
+        #self.cat_test=cat_test
+        #self.cat_train=cat
+    def _extract_fluxes(self,catalogue):
+        columns_f = [f'FLUX_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
+        columns_ferr = [f'FLUXERR_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
+        f = catalogue[columns_f].values
+        ferr = catalogue[columns_ferr].values
+        return f, ferr
+    def _to_colors(self, flux, fluxerr):
+        """ Convert fluxes to colors"""
+        color = flux[:,:-1] / flux[:,1:]
+        color_err = fluxerr[:,:-1]**2 / flux[:,1:]**2 + flux[:,:-1]**2 / flux[:,1:]**4 * fluxerr[:,:-1]**2
+        return color,color_err
+    def _clean_photometry(self,catalogue):
+        """ Drops all object with FLAG_PHOT!=0"""
+        catalogue = catalogue[catalogue['FLAG_PHOT']==0]
+        return catalogue
+    def _correct_extinction(self,catalogue, f):
+        """Corrects for extinction"""
+        ext_correction_cols =  [f'EB_V_corr_FLUX_{x}' for x in ['G','R','I','Z','Y','J','H']]
+        ext_correction = catalogue[ext_correction_cols].values
+        f = f * ext_correction
+        return f
+    def _take_only_zspec(self,catalogue,cat_flag=None):
+        """Selects only galaxies with spectroscopic redshift"""
+        if cat_flag=='Calib':
+            catalogue = catalogue[catalogue.z_spec_S15>0]
+        elif cat_flag=='Valid':
+            catalogue = catalogue[catalogue.z_spec_S15>0]
+        return catalogue
+    def _clean_zspec_sample(self,catalogue ,kind=None):
+        if kind==None:
+            return catalogue
+        elif kind=='Total':
+            return catalogue[catalogue['reliable_S15']>0]
+        elif kind=='Partial':
+            return catalogue[(catalogue['w_Q_f_S15']>0.5)]
+    def _map_weight(self,Qz):
+        for key, value in self.weight_dict.items():
+            if key[0] <= Qz <= key[1]:
+                return value
+    def _get_loss_weights(self,catalogue):
+        catalogue['w_Q_f_S15'] = catalogue['Q_f_S15'].apply(self._map_weight)
+    def _match_gold_sample(self,catalogue_valid, catalogue_gold, max_distance_arcsec=2):
+        max_distance_deg = max_distance_arcsec / 3600.0
+        gold_sample_radec = np.c_[catalogue_gold.RIGHT_ASCENSION,catalogue_gold.DECLINATION]
+        valid_sample_radec = np.c_[catalogue_valid['RA'],catalogue_valid['DEC']]
+        kdtree = KDTree(gold_sample_radec)
+        distances, indices = kdtree.query(valid_sample_radec, k=1)
+        specz_match_gold = catalogue_gold.FINAL_SPEC_Z.values[indices]
+        zs = [specz_match_gold[i] if distance < max_distance_deg else -99 for i, distance in enumerate(distances)]
+        catalogue_valid['z_spec_gold'] = zs
+        return catalogue_valid
+    def _set_training_data(self,catalogue, only_zspec=True, reliable_zspec=True, extinction_corr=True, convert_colors=True):
+        if only_zspec:
+            catalogue = self._take_only_zspec(catalogue, cat_flag='Calib')
+            catalogue = self._clean_zspec_sample(catalogue, kind=reliable_zspec)
+        self.cat_train=catalogue
+        f, ferr = self._extract_fluxes(catalogue)
+        if extinction_corr==True:
+            f = self._correct_extinction(catalogue,f)
+        if convert_colors==True:
+            col, colerr = self._to_colors(f, ferr)
+            self.phot_train = col
+            self.photerr_train = colerr
+        else:
+            self.phot_train = f
+            self.photerr_train = ferr
+        self.target_z_train = catalogue['z_spec_S15'].values
+        self.target_qz_train = catalogue['w_Q_f_S15'].values
+    def _set_testing_data(self,catalogue, only_zspec=True, reliable_zspec=True, extinction_corr=True, convert_colors=True):
+        if only_zspec:
+            catalogue = self._take_only_zspec(catalogue, cat_flag='Valid')
+            catalogue = self._clean_zspec_sample(catalogue, kind=reliable_zspec)
+        self.cat_test=catalogue
+        f, ferr = self._extract_fluxes(catalogue)
+        if extinction_corr==True:
+            f = self._correct_extinction(catalogue,f)
+        if convert_colors==True:
+            col, colerr = self._to_colors(f, ferr)
+            self.phot_test = col
+            self.photerr_test = colerr
+        else:
+            self.phot_test = f
+            self.photerr_test = ferr
+        self.target_z_test = catalogue['z_spec_S15'].values
+    def get_training_data(self):
+        return self.phot_train, self.photerr_train, self.target_z_train, self.target_qz_train
+    def get_testing_data(self):
+        return self.phot_test, self.photerr_test, self.target_z_test
+    def get_VIS_mag(self, catalogue):
+        return catalogue[['MAG_VIS']].values
+    def plot_zdistribution(self, plot_test=False, bins=50):
+        _,_,specz = photoz_archive.get_training_data()
+        plt.hist(specz, bins = bins, hisstype='step', color='navy', label=r'Training sample')
+        if plot_test:
+            _,_,specz_test = photoz_archive.get_training_data()
+            plt.hist(specz, bins = bins, hisstype='step', color='goldenrod', label=r'Test sample',ls='--')
+        plt.xticks(fontsize=12)
+        plt.yticks(fontsize=12)
+        plt.xlabel(r'Redshift', fontsize=14)
+        plt.ylabel('Counts', fontsize=14)
+        plt.show()

insight/.ipynb_checkpoints/insight-checkpoint.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import torch
+from torch.utils.data import DataLoader, dataset, TensorDataset
+from torch import nn, optim
+from torch.optim import lr_scheduler
+import numpy as np
+import pandas as pd
+from astropy.io import fits
+import os
+from astropy.table import Table
+from scipy.spatial import KDTree
+class Insight_module():
+    """ Define class"""
+    def __init__(self, model):
+        self.model=model
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    def _get_dataloaders(self, input_data, target_data, target_weights, val_fraction=0.1):
+        input_data = torch.Tensor(input_data)
+        target_data = torch.Tensor(target_data)
+        target_weights = torch.Tensor(target_weights)
+        dataset = TensorDataset(input_data, target_data, target_weights)
+        trainig_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(len(dataset)*(1-val_fraction)), int(len(dataset)*val_fraction)+1])
+        loader_train = DataLoader(trainig_dataset, batch_size=64, shuffle = True)
+        loader_val = DataLoader(val_dataset, batch_size=64, shuffle = True)
+        return loader_train, loader_val
+    def _loss_function(self,mean, std, logmix, true, target_weights):
+        log_prob =   logmix - 0.5*(mean - true[:,None]).pow(2) / std.pow(2) - torch.log(std)
+        log_prob = torch.logsumexp(log_prob, 1)
+        #log_prob = log_prob * target_weights
+        loss = -log_prob.mean()
+        return loss
+    def _to_numpy(self,x):
+        return x.detach().cpu().numpy()
+    def train(self,input_data, target_data, target_weights,  nepochs=10, val_fraction=0.1, lr=1e-3 ):
+        self.model = self.model.train()
+        loader_train, loader_val = self._get_dataloaders(input_data, target_data, target_weights, val_fraction=0.1)
+        optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=1e-4)
+        self.model = self.model.to(self.device)
+        loss_train, loss_validation = [],[]
+        for epoch in range(nepochs):
+            for input_data, target_data, target_weights in loader_train:
+                input_data = input_data.to(self.device)
+                target_data = target_data.to(self.device)
+                target_weights = target_weights.to(self.device)
+                optimizer.zero_grad()
+                mu, logsig, logmix_coeff = self.model(input_data)
+                logsig = torch.clamp(logsig,-6,2)
+                sig = torch.exp(logsig)
+                #print(mu,sig,target_data,torch.exp(logmix_coeff))
+                loss = self._loss_function(mu, sig, logmix_coeff, target_data,target_weights)
+                loss.backward()
+                optimizer.step()
+            loss_train.append(loss.item())
+            for input_data, target_data, target_weights in loader_val:
+                input_data = input_data.to(self.device)
+                target_data = target_data.to(self.device)
+                target_weights = target_weights.to(self.device)
+                mu, logsig, logmix_coeff = self.model(input_data)
+                logsig = torch.clamp(logsig,-6,2)
+                sig = torch.exp(logsig)
+                loss_val = self._loss_function(mu, sig, logmix_coeff, target_data, target_weights)
+            loss_validation.append(loss_val.item())
+            print(f'training_loss:{loss}',f'testing_loss:{loss_val}')
+        self.loss_train=loss_train
+        self.loss_validation=loss_validation
+    def get_photoz(self,input_data, target_data):
+        self.model = self.model.eval()
+        self.model = self.model.to(self.device)
+        input_data = input_data.to(self.device)
+        target_data = target_data.to(self.device)
+        for ii in range(len(input_data)):
+            mu, logsig, logmix_coeff = self.model(input_data)
+            logsig = torch.clamp(logsig,-6,2)
+            sig = torch.exp(logsig)
+            mix_coeff = torch.exp(logmix_coeff)
+            z = (mix_coeff * mu).sum(1)
+            zerr = torch.sqrt( (mix_coeff * sig**2).sum(1) + (mix_coeff * (mu - target_data[:,None])**2).sum(1))
+        return self._to_numpy(z),self._to_numpy(zerr)
+        #return model
+    def plot_photoz(self, df, nbins,xvariable,metric, type_bin='bin'):
+        bin_edges = stats.mstats.mquantiles(df[xvariable].values, np.linspace(0.1,1,nbins))
+        ydata,xlab = [],[]
+        for k in range(len(bin_edges)-1):
+            edge_min = bin_edges[k]
+            edge_max = bin_edges[k+1]
+            mean_mag =  (edge_max + edge_min) / 2
+            if type_bin=='bin':
+                df_plot = df_test[(df_test.imag > edge_min) & (df_test.imag < edge_max)]
+            elif type_bin=='cum':
+                df_plot = df_test[(df_test.imag < edge_max)]
+            else:
+                raise ValueError("Only type_bin=='bin' for binned and 'cum' for cumulative are supported")
+            xlab.append(mean_mag)
+            if metric=='sig68':
+                ydata.append(sigma68(df_plot.zwerr))
+            elif metric=='bias':
+                ydata.append(np.mean(df_plot.zwerr))
+            elif metric=='nmad':
+                ydata.append(nmad(df_plot.zwerr))
+            elif metric=='outliers':
+                ydata.append(len(df_plot[np.abs(df_plot.zwerr)>0.15])/len(df_plot))
+        plt.plot(xlab,ydata, ls = '-', marker = '.', color = 'navy',lw = 1, label = '')
+        plt.ylabel(f'{metric}$[\Delta z]$', fontsize = 18)
+        plt.xlabel(f'{xvariable}', fontsize = 16)
+        plt.xticks(fontsize = 14)
+        plt.yticks(fontsize = 14)
+        plt.grid(False)
+        plt.show()

insight/.ipynb_checkpoints/insight_arch-checkpoint.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from torch import nn, optim
+import torch
+class Photoz_network(nn.Module):
+    def __init__(self, num_gauss=10, dropout_prob=0):
+        super(Photoz_network, self).__init__()
+        self.features = nn.Sequential(
+            nn.Linear(6, 10),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(10, 30),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(30, 50),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(50, 70),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(70, 100)
+        )
+        self.measure_mu = nn.Sequential(
+            nn.Linear(100, 80),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(80, 70),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(70, 60),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(60, 50),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(50, num_gauss)
+        )
+        self.measure_coeffs = nn.Sequential(
+            nn.Linear(100, 80),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(80, 70),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(70, 60),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(60, 50),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(50, num_gauss)
+        )
+        self.measure_sigma = nn.Sequential(
+            nn.Linear(100, 80),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(80, 70),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(70, 60),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(60, 50),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(50, num_gauss)
+        )
+    def forward(self, x):
+        f = self.features(x)
+        mu = self.measure_mu(f)
+        sigma = self.measure_sigma(f)
+        logmix_coeff = self.measure_coeffs(f)
+        logmix_coeff = logmix_coeff - torch.logsumexp(logmix_coeff, 1)[:,None]
+        return mu, sigma, logmix_coeff

insight/.ipynb_checkpoints/utils-checkpoint.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from scipy import stats
+def nmad(data):
+    return 1.4826 * np.median(np.abs(data - np.median(data)))
+def sigma68(data): return 0.5*(pd.Series(data).quantile(q = 0.84) - pd.Series(data).quantile(q = 0.16))
+def plot_photoz(df, nbins,xvariable,metric, type_bin='bin'):
+    bin_edges = stats.mstats.mquantiles(df[xvariable].values, np.linspace(0.1,1,nbins))
+    ydata,xlab = [],[]
+    for k in range(len(bin_edges)-1):
+        edge_min = bin_edges[k]
+        edge_max = bin_edges[k+1]
+        mean_mag =  (edge_max + edge_min) / 2
+        if type_bin=='bin':
+            df_plot = df_test[(df_test.imag > edge_min) & (df_test.imag < edge_max)]
+        elif type_bin=='cum':
+            df_plot = df_test[(df_test.imag < edge_max)]
+        else:
+            raise ValueError("Only type_bin=='bin' for binned and 'cum' for cumulative are supported")
+        xlab.append(mean_mag)
+        if metric=='sig68':
+            ydata.append(sigma68(df_plot.zwerr))
+        elif metric=='bias':
+            ydata.append(np.mean(df_plot.zwerr))
+        elif metric=='nmad':
+            ydata.append(nmad(df_plot.zwerr))
+        elif metric=='outliers':
+            ydata.append(len(df_plot[np.abs(df_plot.zwerr)>0.15])/len(df_plot))
+    plt.plot(xlab,ydata, ls = '-', marker = '.', color = 'navy',lw = 1, label = '')
+    plt.ylabel(f'{metric}$[\Delta z]$', fontsize = 18)
+    plt.xlabel(f'{xvariable}', fontsize = 16)
+    plt.xticks(fontsize = 14)
+    plt.yticks(fontsize = 14)
+    plt.grid(False)
+    plt.show()

insight/__pycache__/archive.cpython-310.pyc ADDED Viewed

Binary file (6.92 kB). View file

insight/__pycache__/archive.cpython-39.pyc ADDED Viewed

Binary file (6.2 kB). View file

insight/__pycache__/insight.cpython-310.pyc ADDED Viewed

Binary file (4.48 kB). View file

insight/__pycache__/insight.cpython-39.pyc ADDED Viewed

Binary file (4.4 kB). View file

insight/__pycache__/insight_arch.cpython-310.pyc ADDED Viewed

Binary file (1.63 kB). View file

insight/__pycache__/insight_arch.cpython-39.pyc ADDED Viewed

Binary file (1.58 kB). View file

insight/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (1.71 kB). View file

insight/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (1.67 kB). View file

insight/archive.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import numpy as np
+import pandas as pd
+from astropy.io import fits
+import os
+from astropy.table import Table
+from scipy.spatial import KDTree
+import matplotlib.pyplot as plt
+from matplotlib import rcParams
+rcParams["mathtext.fontset"] = "stix"
+rcParams["font.family"] = "STIXGeneral"
+class archive():
+    def __init__(self, path, aperture=2, drop_stars=True, clean_photometry=True, convert_colors=True, extinction_corr=True, only_zspec=True, reliable_zspec=True):
+        self.aperture = aperture
+        self.weight_dict={(-99,0.99):0,
+             (1,1.99):0.5,
+             (2,2.99):0.75,
+             (3,4):1,
+             (9,9.99):0.25,
+             (10,10.99):0,
+             (11,11.99):0.5,
+             (12,12.99):0.75,
+             (13,14):1,
+             (14.01,40):0
+            }
+        filename_calib='euclid_cosmos_DC2_S1_v2.1_calib_clean.fits'
+        filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+        filename_gold='Export_Gold_2023_07_03.csv'
+        hdu_list = fits.open(os.path.join(path,filename_calib))
+        cat = Table(hdu_list[1].data).to_pandas()
+        hdu_list = fits.open(os.path.join(path,filename_valid))
+        cat_test = Table(hdu_list[1].data).to_pandas()
+        gold_sample = pd.read_csv(os.path.join(path,filename_gold))
+        #cat_test = self._match_gold_sample(cat_test,gold_sample)
+        if drop_stars==True:
+            cat = cat[cat.mu_class_L07==1]
+        if clean_photometry==True:
+            cat = self._clean_photometry(cat)
+            cat_test = self._clean_photometry(cat_test)
+        self._get_loss_weights(cat)
+        cat = cat[cat.w_Q_f_S15>0]
+        self._set_training_data(cat, only_zspec=only_zspec, reliable_zspec=reliable_zspec, extinction_corr=extinction_corr, convert_colors=convert_colors)
+        self._set_testing_data(cat_test, only_zspec=only_zspec, reliable_zspec='Total', extinction_corr=extinction_corr, convert_colors=convert_colors)
+        self._get_loss_weights(cat)
+        #self.cat_test=cat_test
+        #self.cat_train=cat
+    def _extract_fluxes(self,catalogue):
+        columns_f = [f'FLUX_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
+        columns_ferr = [f'FLUXERR_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
+        f = catalogue[columns_f].values
+        ferr = catalogue[columns_ferr].values
+        return f, ferr
+    def _to_colors(self, flux, fluxerr):
+        """ Convert fluxes to colors"""
+        color = flux[:,:-1] / flux[:,1:]
+        color_err = fluxerr[:,:-1]**2 / flux[:,1:]**2 + flux[:,:-1]**2 / flux[:,1:]**4 * fluxerr[:,:-1]**2
+        return color,color_err
+    def _clean_photometry(self,catalogue):
+        """ Drops all object with FLAG_PHOT!=0"""
+        catalogue = catalogue[catalogue['FLAG_PHOT']==0]
+        return catalogue
+    def _correct_extinction(self,catalogue, f):
+        """Corrects for extinction"""
+        ext_correction_cols =  [f'EB_V_corr_FLUX_{x}' for x in ['G','R','I','Z','Y','J','H']]
+        ext_correction = catalogue[ext_correction_cols].values
+        f = f * ext_correction
+        return f
+    def _take_only_zspec(self,catalogue,cat_flag=None):
+        """Selects only galaxies with spectroscopic redshift"""
+        if cat_flag=='Calib':
+            catalogue = catalogue[catalogue.z_spec_S15>0]
+        elif cat_flag=='Valid':
+            catalogue = catalogue[catalogue.z_spec_S15>0]
+        return catalogue
+    def _clean_zspec_sample(self,catalogue ,kind=None):
+        if kind==None:
+            return catalogue
+        elif kind=='Total':
+            return catalogue[catalogue['reliable_S15']>0]
+        elif kind=='Partial':
+            return catalogue[(catalogue['w_Q_f_S15']>0.5)]
+    def _map_weight(self,Qz):
+        for key, value in self.weight_dict.items():
+            if key[0] <= Qz <= key[1]:
+                return value
+    def _get_loss_weights(self,catalogue):
+        catalogue['w_Q_f_S15'] = catalogue['Q_f_S15'].apply(self._map_weight)
+    def _match_gold_sample(self,catalogue_valid, catalogue_gold, max_distance_arcsec=2):
+        max_distance_deg = max_distance_arcsec / 3600.0
+        gold_sample_radec = np.c_[catalogue_gold.RIGHT_ASCENSION,catalogue_gold.DECLINATION]
+        valid_sample_radec = np.c_[catalogue_valid['RA'],catalogue_valid['DEC']]
+        kdtree = KDTree(gold_sample_radec)
+        distances, indices = kdtree.query(valid_sample_radec, k=1)
+        specz_match_gold = catalogue_gold.FINAL_SPEC_Z.values[indices]
+        zs = [specz_match_gold[i] if distance < max_distance_deg else -99 for i, distance in enumerate(distances)]
+        catalogue_valid['z_spec_gold'] = zs
+        return catalogue_valid
+    def _set_training_data(self,catalogue, only_zspec=True, reliable_zspec=True, extinction_corr=True, convert_colors=True):
+        if only_zspec:
+            catalogue = self._take_only_zspec(catalogue, cat_flag='Calib')
+            catalogue = self._clean_zspec_sample(catalogue, kind=reliable_zspec)
+        self.cat_train=catalogue
+        f, ferr = self._extract_fluxes(catalogue)
+        if extinction_corr==True:
+            f = self._correct_extinction(catalogue,f)
+        if convert_colors==True:
+            col, colerr = self._to_colors(f, ferr)
+            self.phot_train = col
+            self.photerr_train = colerr
+        else:
+            self.phot_train = f
+            self.photerr_train = ferr
+        self.target_z_train = catalogue['z_spec_S15'].values
+        self.target_qz_train = catalogue['w_Q_f_S15'].values
+    def _set_testing_data(self,catalogue, only_zspec=True, reliable_zspec=True, extinction_corr=True, convert_colors=True):
+        if only_zspec:
+            catalogue = self._take_only_zspec(catalogue, cat_flag='Valid')
+            catalogue = self._clean_zspec_sample(catalogue, kind=reliable_zspec)
+        self.cat_test=catalogue
+        f, ferr = self._extract_fluxes(catalogue)
+        if extinction_corr==True:
+            f = self._correct_extinction(catalogue,f)
+        if convert_colors==True:
+            col, colerr = self._to_colors(f, ferr)
+            self.phot_test = col
+            self.photerr_test = colerr
+        else:
+            self.phot_test = f
+            self.photerr_test = ferr
+        self.target_z_test = catalogue['z_spec_S15'].values
+    def get_training_data(self):
+        return self.phot_train, self.photerr_train, self.target_z_train, self.target_qz_train
+    def get_testing_data(self):
+        return self.phot_test, self.photerr_test, self.target_z_test
+    def get_VIS_mag(self, catalogue):
+        return catalogue[['MAG_VIS']].values
+    def plot_zdistribution(self, plot_test=False, bins=50):
+        _,_,specz = photoz_archive.get_training_data()
+        plt.hist(specz, bins = bins, hisstype='step', color='navy', label=r'Training sample')
+        if plot_test:
+            _,_,specz_test = photoz_archive.get_training_data()
+            plt.hist(specz, bins = bins, hisstype='step', color='goldenrod', label=r'Test sample',ls='--')
+        plt.xticks(fontsize=12)
+        plt.yticks(fontsize=12)
+        plt.xlabel(r'Redshift', fontsize=14)
+        plt.ylabel('Counts', fontsize=14)
+        plt.show()

insight/insight.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import torch
+from torch.utils.data import DataLoader, dataset, TensorDataset
+from torch import nn, optim
+from torch.optim import lr_scheduler
+import numpy as np
+import pandas as pd
+from astropy.io import fits
+import os
+from astropy.table import Table
+from scipy.spatial import KDTree
+class Insight_module():
+    """ Define class"""
+    def __init__(self, model):
+        self.model=model
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    def _get_dataloaders(self, input_data, target_data, target_weights, val_fraction=0.1):
+        input_data = torch.Tensor(input_data)
+        target_data = torch.Tensor(target_data)
+        target_weights = torch.Tensor(target_weights)
+        dataset = TensorDataset(input_data, target_data, target_weights)
+        trainig_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(len(dataset)*(1-val_fraction)), int(len(dataset)*val_fraction)+1])
+        loader_train = DataLoader(trainig_dataset, batch_size=64, shuffle = True)
+        loader_val = DataLoader(val_dataset, batch_size=64, shuffle = True)
+        return loader_train, loader_val
+    def _loss_function(self,mean, std, logmix, true, target_weights):
+        log_prob =   logmix - 0.5*(mean - true[:,None]).pow(2) / std.pow(2) - torch.log(std)
+        log_prob = torch.logsumexp(log_prob, 1)
+        #log_prob = log_prob * target_weights
+        loss = -log_prob.mean()
+        return loss
+    def _to_numpy(self,x):
+        return x.detach().cpu().numpy()
+    def train(self,input_data, target_data, target_weights,  nepochs=10, val_fraction=0.1, lr=1e-3 ):
+        self.model = self.model.train()
+        loader_train, loader_val = self._get_dataloaders(input_data, target_data, target_weights, val_fraction=0.1)
+        optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=1e-4)
+        self.model = self.model.to(self.device)
+        loss_train, loss_validation = [],[]
+        for epoch in range(nepochs):
+            for input_data, target_data, target_weights in loader_train:
+                input_data = input_data.to(self.device)
+                target_data = target_data.to(self.device)
+                target_weights = target_weights.to(self.device)
+                optimizer.zero_grad()
+                mu, logsig, logmix_coeff = self.model(input_data)
+                logsig = torch.clamp(logsig,-6,2)
+                sig = torch.exp(logsig)
+                #print(mu,sig,target_data,torch.exp(logmix_coeff))
+                loss = self._loss_function(mu, sig, logmix_coeff, target_data,target_weights)
+                loss.backward()
+                optimizer.step()
+            loss_train.append(loss.item())
+            for input_data, target_data, target_weights in loader_val:
+                input_data = input_data.to(self.device)
+                target_data = target_data.to(self.device)
+                target_weights = target_weights.to(self.device)
+                mu, logsig, logmix_coeff = self.model(input_data)
+                logsig = torch.clamp(logsig,-6,2)
+                sig = torch.exp(logsig)
+                loss_val = self._loss_function(mu, sig, logmix_coeff, target_data, target_weights)
+            loss_validation.append(loss_val.item())
+            print(f'training_loss:{loss}',f'testing_loss:{loss_val}')
+        self.loss_train=loss_train
+        self.loss_validation=loss_validation
+    def get_photoz(self,input_data, target_data):
+        self.model = self.model.eval()
+        self.model = self.model.to(self.device)
+        input_data = input_data.to(self.device)
+        target_data = target_data.to(self.device)
+        for ii in range(len(input_data)):
+            mu, logsig, logmix_coeff = self.model(input_data)
+            logsig = torch.clamp(logsig,-6,2)
+            sig = torch.exp(logsig)
+            mix_coeff = torch.exp(logmix_coeff)
+            z = (mix_coeff * mu).sum(1)
+            zerr = torch.sqrt( (mix_coeff * sig**2).sum(1) + (mix_coeff * (mu - target_data[:,None])**2).sum(1))
+        return self._to_numpy(z),self._to_numpy(zerr)
+        #return model
+    def plot_photoz(self, df, nbins,xvariable,metric, type_bin='bin'):
+        bin_edges = stats.mstats.mquantiles(df[xvariable].values, np.linspace(0.1,1,nbins))
+        ydata,xlab = [],[]
+        for k in range(len(bin_edges)-1):
+            edge_min = bin_edges[k]
+            edge_max = bin_edges[k+1]
+            mean_mag =  (edge_max + edge_min) / 2
+            if type_bin=='bin':
+                df_plot = df_test[(df_test.imag > edge_min) & (df_test.imag < edge_max)]
+            elif type_bin=='cum':
+                df_plot = df_test[(df_test.imag < edge_max)]
+            else:
+                raise ValueError("Only type_bin=='bin' for binned and 'cum' for cumulative are supported")
+            xlab.append(mean_mag)
+            if metric=='sig68':
+                ydata.append(sigma68(df_plot.zwerr))
+            elif metric=='bias':
+                ydata.append(np.mean(df_plot.zwerr))
+            elif metric=='nmad':
+                ydata.append(nmad(df_plot.zwerr))
+            elif metric=='outliers':
+                ydata.append(len(df_plot[np.abs(df_plot.zwerr)>0.15])/len(df_plot))
+        plt.plot(xlab,ydata, ls = '-', marker = '.', color = 'navy',lw = 1, label = '')
+        plt.ylabel(f'{metric}$[\Delta z]$', fontsize = 18)
+        plt.xlabel(f'{xvariable}', fontsize = 16)
+        plt.xticks(fontsize = 14)
+        plt.yticks(fontsize = 14)
+        plt.grid(False)
+        plt.show()

insight/insight_arch.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from torch import nn, optim
+import torch
+class Photoz_network(nn.Module):
+    def __init__(self, num_gauss=10, dropout_prob=0):
+        super(Photoz_network, self).__init__()
+        self.features = nn.Sequential(
+            nn.Linear(6, 10),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(10, 30),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(30, 50),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(50, 70),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(70, 100)
+        )
+        self.measure_mu = nn.Sequential(
+            nn.Linear(100, 80),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(80, 70),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(70, 60),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(60, 50),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(50, num_gauss)
+        )
+        self.measure_coeffs = nn.Sequential(
+            nn.Linear(100, 80),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(80, 70),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(70, 60),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(60, 50),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(50, num_gauss)
+        )
+        self.measure_sigma = nn.Sequential(
+            nn.Linear(100, 80),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(80, 70),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(70, 60),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(60, 50),
+            nn.Dropout(dropout_prob),
+            nn.ReLU(),
+            nn.Linear(50, num_gauss)
+        )
+    def forward(self, x):
+        f = self.features(x)
+        mu = self.measure_mu(f)
+        sigma = self.measure_sigma(f)
+        logmix_coeff = self.measure_coeffs(f)
+        logmix_coeff = logmix_coeff - torch.logsumexp(logmix_coeff, 1)[:,None]
+        return mu, sigma, logmix_coeff

insight/utils.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from scipy import stats
+def nmad(data):
+    return 1.4826 * np.median(np.abs(data - np.median(data)))
+def sigma68(data): return 0.5*(pd.Series(data).quantile(q = 0.84) - pd.Series(data).quantile(q = 0.16))
+def plot_photoz(df, nbins,xvariable,metric, type_bin='bin'):
+    bin_edges = stats.mstats.mquantiles(df[xvariable].values, np.linspace(0.1,1,nbins))
+    ydata,xlab = [],[]
+    for k in range(len(bin_edges)-1):
+        edge_min = bin_edges[k]
+        edge_max = bin_edges[k+1]
+        mean_mag =  (edge_max + edge_min) / 2
+        if type_bin=='bin':
+            df_plot = df_test[(df_test.imag > edge_min) & (df_test.imag < edge_max)]
+        elif type_bin=='cum':
+            df_plot = df_test[(df_test.imag < edge_max)]
+        else:
+            raise ValueError("Only type_bin=='bin' for binned and 'cum' for cumulative are supported")
+        xlab.append(mean_mag)
+        if metric=='sig68':
+            ydata.append(sigma68(df_plot.zwerr))
+        elif metric=='bias':
+            ydata.append(np.mean(df_plot.zwerr))
+        elif metric=='nmad':
+            ydata.append(nmad(df_plot.zwerr))
+        elif metric=='outliers':
+            ydata.append(len(df_plot[np.abs(df_plot.zwerr)>0.15])/len(df_plot))
+    plt.plot(xlab,ydata, ls = '-', marker = '.', color = 'navy',lw = 1, label = '')
+    plt.ylabel(f'{metric}$[\Delta z]$', fontsize = 18)
+    plt.xlabel(f'{xvariable}', fontsize = 16)
+    plt.xticks(fontsize = 14)
+    plt.yticks(fontsize = 14)
+    plt.grid(False)
+    plt.show()