Spaces:

lauracabayol
/

TEMPS

Runtime error

TEMPS / temps /plots.py

Laura Cabayol Garcia

AT for TEMPS

21a7d1b 5 months ago

10.8 kB

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from temps.utils import nmad, sigma68
	from scipy import stats
	from typing import List, Optional, Dict


	def plot_photoz(
	df_list: List[pd.DataFrame],
	nbins: int,
	xvariable: str,
	metric: str,
	type_bin: str = "bin",
	label_list: Optional[List[str]] = None,
	samp: str = "zs",
	save: bool = False,
	) -> None:
	"""
	Plot photo-z metrics for multiple dataframes.

	Parameters:
	- df_list (List[pd.DataFrame]): List of dataframes containing data for plotting.
	- nbins (int): Number of bins for the histogram.
	- xvariable (str): Variable to plot on the x-axis.
	- metric (str): Metric to plot (e.g., 'sig68', 'bias', 'nmad', 'outliers').
	- type_bin (str, optional): Type of binning ('bin' or 'cum'). Default is 'bin'.
	- label_list (Optional[List[str]], optional): List of labels for each dataframe. Default is None.
	- samp (str, optional): Sample label for saving. Default is 'zs'.
	- save (bool, optional): If True, save the plot to a file. Default is False.

	Returns:
	None
	"""
	# Plot properties
	plt.rcParams["font.family"] = "serif"
	plt.rcParams["font.size"] = 12

	# Set x-axis label based on variable
	xvariable_lab = "VIS" if xvariable == "VISmag" else r"$z_{\rm s}$"

	# Calculate bin edges
	bin_edges = stats.mstats.mquantiles(
	df_list[0][xvariable].values, np.linspace(0.05, 1, nbins)
	)
	cmap = plt.get_cmap("Dark2")

	# Create subplots
	fig, (ax1, ax2) = plt.subplots(
	2, 1, figsize=(8, 8), gridspec_kw={"height_ratios": [3, 1]}
	)
	ydata_dict: Dict[str, List[float]] = {}

	# Loop through dataframes and calculate metrics
	for i, df in enumerate(df_list):
	ydata, xlab = [], []

	label = label_list[i]
	label_lab = {
	"zs": r"$z_{\rm s}$",
	"zs+L15": r"$z_{\rm s}$+L15",
	"TEMPS": "TEMPS",
	}.get(label, label)

	for k in range(len(bin_edges) - 1):
	edge_min = bin_edges[k]
	edge_max = bin_edges[k + 1]
	mean_mag = (edge_max + edge_min) / 2

	df_plot = (
	df[(df[xvariable] > edge_min) & (df[xvariable] < edge_max)]
	if type_bin == "bin"
	else df[(df[xvariable] < edge_max)]
	)

	xlab.append(mean_mag)
	if metric == "sig68":
	ydata.append(sigma68(df_plot.zwerr))
	elif metric == "bias":
	ydata.append(np.mean(df_plot.zwerr))
	elif metric == "nmad":
	ydata.append(nmad(df_plot.zwerr))
	elif metric == "outliers":
	ydata.append(
	len(df_plot[np.abs(df_plot.zwerr) > 0.15]) / len(df_plot) * 100
	)

	ydata_dict[f"{i}"] = ydata
	color = cmap(i)
	ax1.plot(
	xlab,
	ydata,
	marker=".",
	lw=1,
	label=label_lab,
	color=color,
	ls=["--", ":", "-"][i],
	)

	ax1.set_ylabel(f"{metric} $[\Delta z]$", fontsize=18)
	ax1.grid(False)
	ax1.legend()

	# Plot ratios
	ax2.plot(
	xlab,
	np.array(ydata_dict["1"]) / np.array(ydata_dict["0"]),
	marker=".",
	color=cmap(1),
	)
	ax2.plot(
	xlab,
	np.array(ydata_dict["2"]) / np.array(ydata_dict["0"]),
	marker=".",
	color=cmap(2),
	)
	ax2.set_ylabel(r"Method $X$ / $z_{\rm z}$", fontsize=14)
	ax2.set_xlabel(f"{xvariable_lab}", fontsize=16)
	ax2.grid(True)

	if save:
	plt.savefig(f"{metric}_{xvariable}_{samp}.pdf", dpi=300, bbox_inches="tight")
	plt.show()


	def plot_pz(m: int, pz: np.ndarray, specz: float) -> None:
	"""
	Plot the Probability Density Function (PDF) for a given model and compare it with the spectroscopic redshift.

	Parameters:
	- m (int): Index for the model.
	- pz (np.ndarray): Probability density function values.
	- specz (float): Spectroscopic redshift value.

	Returns:
	None
	"""
	fig, ax = plt.subplots(figsize=(8, 6))
	ax.plot(np.linspace(0, 4, 1000), pz[m], label="PDF", color="navy")
	ax.axvline(specz[m], color="black", linestyle="--", label=r"$z_{\rm s}$")
	ax.set_xlabel(r"$z$", fontsize=18)
	ax.set_ylabel("Probability Density", fontsize=16)
	ax.legend(fontsize=18)
	plt.show()


	def plot_zdistribution(archive, plot_test: bool = False, bins: int = 50) -> None:
	"""
	Plot the distribution of redshifts for training and optionally test samples.

	Parameters:
	- archive: Data archive object containing the training data.
	- plot_test (bool, optional): If True, plot test sample distribution. Default is False.
	- bins (int, optional): Number of histogram bins. Default is 50.

	Returns:
	None
	"""
	_, _, specz = archive.get_training_data()
	plt.hist(specz, bins=bins, histtype="step", color="navy", label=r"Training sample")

	if plot_test:
	_, _, specz_test = archive.get_training_data()
	plt.hist(
	specz_test,
	bins=bins,
	histtype="step",
	color="goldenrod",
	label=r"Test sample",
	linestyle="--",
	)

	plt.xticks(fontsize=12)
	plt.yticks(fontsize=12)
	plt.xlabel(r"Redshift", fontsize=14)
	plt.ylabel("Counts", fontsize=14)
	plt.legend()
	plt.show()


	def plot_som_map(
	som_data: np.ndarray, plot_arg: str = "z", vmin: float = 0, vmax: float = 1
	) -> None:
	"""
	Plot the Self-Organizing Map (SOM) data.

	Parameters:
	- som_data (numpy.ndarray): The SOM data to be visualized.
	- plot_arg (str, optional): The column name to be plotted. Default is 'z'.
	- vmin (float, optional): Minimum value for color scaling. Default is 0.
	- vmax (float, optional): Maximum value for color scaling. Default is 1.

	Returns:
	None
	"""
	plt.imshow(som_data, vmin=vmin, vmax=vmax, cmap="viridis")
	plt.colorbar(label=f"{plot_arg}")
	plt.xlabel(r"$x$ [pixel]", fontsize=14)
	plt.ylabel(r"$y$ [pixel]", fontsize=14)
	plt.show()


	def plot_PIT(
	pit_list_1: List[float],
	pit_list_2: Optional[List[float]] = None,
	pit_list_3: Optional[List[float]] = None,
	sample: str = "specz",
	labels: Optional[List[str]] = None,
	save: bool = True,
	) -> None:
	"""
	Plot Probability Integral Transform (PIT) values for given lists.

	Parameters:
	- pit_list_1 (List[float]): First list of PIT values.
	- pit_list_2 (Optional[List[float]], optional): Second list of PIT values. Default is None.
	- pit_list_3 (Optional[List[float]], optional): Third list of PIT values. Default is None.
	- sample (str, optional): Sample label for saving. Default is 'specz'.
	- labels (Optional[List[str]], optional): List of labels for each PIT list. Default is None.
	- save (bool, optional): If True, save the plot to a file. Default is True.

	Returns:
	None
	"""
	plt.rcParams["font.family"] = "serif"
	plt.rcParams["font.size"] = 12
	fig, ax = plt.subplots(figsize=(8, 6))
	kwargs = dict(bins=30, histtype="step", density=True, range=(0, 1))
	cmap = plt.get_cmap("Dark2")

	# Create a histogram
	ax.hist(pit_list_1, color=cmap(0), linestyle="--", **kwargs, label=labels[0])
	if pit_list_2 is not None:
	ax.hist(pit_list_2, color=cmap(1), linestyle="--", **kwargs, label=labels[1])
	if pit_list_3 is not None:
	ax.hist(pit_list_3, color=cmap(2), linestyle="--", **kwargs, label=labels[2])

	ax.set_xlabel("PIT values", fontsize=14)
	ax.set_ylabel("Normalized Counts", fontsize=14)
	ax.legend(fontsize=12)

	if save:
	plt.savefig(f"PIT_{sample}.pdf", dpi=300, bbox_inches="tight")
	plt.show()


	def plot_outlier_ratio(
	outliers: np.ndarray, num_samp: int = 100, plot_mean: bool = True
	) -> None:
	"""
	Plot the outlier ratio as a function of the number of samples.

	Parameters:
	- outliers (np.ndarray): Outlier ratio data.
	- num_samp (int, optional): Number of samples for plotting. Default is 100.
	- plot_mean (bool, optional): If True, plot the mean of outliers. Default is True.

	Returns:
	None
	"""
	plt.figure(figsize=(10, 6))
	plt.plot(np.arange(1, num_samp + 1), outliers[:num_samp], label="Outlier Ratio")

	if plot_mean:
	plt.axhline(
	np.mean(outliers), color="red", linestyle="--", label="Mean Outlier Ratio"
	)

	plt.xlabel("Number of Samples", fontsize=14)
	plt.ylabel("Outlier Ratio", fontsize=14)
	plt.legend()
	plt.grid()
	plt.show()


	def plot_crps(
	crps_list_1: List[float],
	crps_list_2: Optional[List[float]] = None,
	crps_list_3: Optional[List[float]] = None,
	labels: Optional[List[str]] = None,
	sample: str = "specz",
	save: bool = True,
	) -> None:
	# Create a figure and axis
	# plot properties
	plt.rcParams["font.family"] = "serif"
	plt.rcParams["font.size"] = 12
	fig, ax = plt.subplots(figsize=(8, 6))
	cmap = plt.get_cmap("Dark2")

	kwargs = dict(bins=50, histtype="step", density=True, range=(0, 1))

	# Create a histogram
	hist, bins, _ = ax.hist(
	crps_list_1, color=cmap(0), ls="--", **kwargs, label=labels[0]
	)
	if crps_list_2 is not None:
	hist, bins, _ = ax.hist(
	crps_list_2, color=cmap(1), ls=":", **kwargs, label=labels[1]
	)
	if crps_list_3 is not None:
	hist, bins, _ = ax.hist(
	crps_list_3, color=cmap(2), ls="-", **kwargs, label=labels[2]
	)

	# Add labels and a title
	ax.set_xlabel("CRPS Scores", fontsize=18)
	ax.set_ylabel("Frequency", fontsize=18)

	# Add grid lines
	ax.grid(True, linestyle="--", alpha=0.7)

	# Customize the x-axis
	ax.set_xlim(0, 0.5)

	# Make ticks larger
	ax.tick_params(axis="both", which="major", labelsize=14)

	# Calculate the mean CRPS value
	mean_crps_1 = round(np.nanmean(crps_list_1), 2)
	mean_crps_2 = round(np.nanmean(crps_list_2), 2)
	mean_crps_3 = round(np.nanmean(crps_list_3), 2)

	# Add the mean CRPS value at the top-left corner
	ax.annotate(
	f"Mean CRPS {labels[0]}: {mean_crps_1}",
	xy=(0.57, 0.9),
	xycoords="axes fraction",
	fontsize=14,
	color=cmap(0),
	)
	ax.annotate(
	f"Mean CRPS {labels[1]}: {mean_crps_2}",
	xy=(0.57, 0.85),
	xycoords="axes fraction",
	fontsize=14,
	color=cmap(1),
	)
	ax.annotate(
	f"Mean CRPS {labels[2]}: {mean_crps_3}",
	xy=(0.57, 0.8),
	xycoords="axes fraction",
	fontsize=14,
	color=cmap(2),
	)

	if save == True:
	plt.savefig(f"{sample}_CRPS.pdf", bbox_inches="tight")

	# Show the plot
	plt.show()