Spaces:

lauracabayol
/

TEMPS

Runtime error

File size: 10,832 Bytes

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from temps.utils import nmad, sigma68
from scipy import stats
from typing import List, Optional, Dict


def plot_photoz(
    df_list: List[pd.DataFrame],
    nbins: int,
    xvariable: str,
    metric: str,
    type_bin: str = "bin",
    label_list: Optional[List[str]] = None,
    samp: str = "zs",
    save: bool = False,
) -> None:
    """
    Plot photo-z metrics for multiple dataframes.

    Parameters:
    - df_list (List[pd.DataFrame]): List of dataframes containing data for plotting.
    - nbins (int): Number of bins for the histogram.
    - xvariable (str): Variable to plot on the x-axis.
    - metric (str): Metric to plot (e.g., 'sig68', 'bias', 'nmad', 'outliers').
    - type_bin (str, optional): Type of binning ('bin' or 'cum'). Default is 'bin'.
    - label_list (Optional[List[str]], optional): List of labels for each dataframe. Default is None.
    - samp (str, optional): Sample label for saving. Default is 'zs'.
    - save (bool, optional): If True, save the plot to a file. Default is False.

    Returns:
    None
    """
    # Plot properties
    plt.rcParams["font.family"] = "serif"
    plt.rcParams["font.size"] = 12

    # Set x-axis label based on variable
    xvariable_lab = "VIS" if xvariable == "VISmag" else r"$z_{\rm s}$"

    # Calculate bin edges
    bin_edges = stats.mstats.mquantiles(
        df_list[0][xvariable].values, np.linspace(0.05, 1, nbins)
    )
    cmap = plt.get_cmap("Dark2")

    # Create subplots
    fig, (ax1, ax2) = plt.subplots(
        2, 1, figsize=(8, 8), gridspec_kw={"height_ratios": [3, 1]}
    )
    ydata_dict: Dict[str, List[float]] = {}

    # Loop through dataframes and calculate metrics
    for i, df in enumerate(df_list):
        ydata, xlab = [], []

        label = label_list[i]
        label_lab = {
            "zs": r"$z_{\rm s}$",
            "zs+L15": r"$z_{\rm s}$+L15",
            "TEMPS": "TEMPS",
        }.get(label, label)

        for k in range(len(bin_edges) - 1):
            edge_min = bin_edges[k]
            edge_max = bin_edges[k + 1]
            mean_mag = (edge_max + edge_min) / 2

            df_plot = (
                df[(df[xvariable] > edge_min) & (df[xvariable] < edge_max)]
                if type_bin == "bin"
                else df[(df[xvariable] < edge_max)]
            )

            xlab.append(mean_mag)
            if metric == "sig68":
                ydata.append(sigma68(df_plot.zwerr))
            elif metric == "bias":
                ydata.append(np.mean(df_plot.zwerr))
            elif metric == "nmad":
                ydata.append(nmad(df_plot.zwerr))
            elif metric == "outliers":
                ydata.append(
                    len(df_plot[np.abs(df_plot.zwerr) > 0.15]) / len(df_plot) * 100
                )

        ydata_dict[f"{i}"] = ydata
        color = cmap(i)
        ax1.plot(
            xlab,
            ydata,
            marker=".",
            lw=1,
            label=label_lab,
            color=color,
            ls=["--", ":", "-"][i],
        )

    ax1.set_ylabel(f"{metric} $[\Delta z]$", fontsize=18)
    ax1.grid(False)
    ax1.legend()

    # Plot ratios
    ax2.plot(
        xlab,
        np.array(ydata_dict["1"]) / np.array(ydata_dict["0"]),
        marker=".",
        color=cmap(1),
    )
    ax2.plot(
        xlab,
        np.array(ydata_dict["2"]) / np.array(ydata_dict["0"]),
        marker=".",
        color=cmap(2),
    )
    ax2.set_ylabel(r"Method $X$ / $z_{\rm z}$", fontsize=14)
    ax2.set_xlabel(f"{xvariable_lab}", fontsize=16)
    ax2.grid(True)

    if save:
        plt.savefig(f"{metric}_{xvariable}_{samp}.pdf", dpi=300, bbox_inches="tight")
    plt.show()


def plot_pz(m: int, pz: np.ndarray, specz: float) -> None:
    """
    Plot the Probability Density Function (PDF) for a given model and compare it with the spectroscopic redshift.

    Parameters:
    - m (int): Index for the model.
    - pz (np.ndarray): Probability density function values.
    - specz (float): Spectroscopic redshift value.

    Returns:
    None
    """
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(np.linspace(0, 4, 1000), pz[m], label="PDF", color="navy")
    ax.axvline(specz[m], color="black", linestyle="--", label=r"$z_{\rm s}$")
    ax.set_xlabel(r"$z$", fontsize=18)
    ax.set_ylabel("Probability Density", fontsize=16)
    ax.legend(fontsize=18)
    plt.show()


def plot_zdistribution(archive, plot_test: bool = False, bins: int = 50) -> None:
    """
    Plot the distribution of redshifts for training and optionally test samples.

    Parameters:
    - archive: Data archive object containing the training data.
    - plot_test (bool, optional): If True, plot test sample distribution. Default is False.
    - bins (int, optional): Number of histogram bins. Default is 50.

    Returns:
    None
    """
    _, _, specz = archive.get_training_data()
    plt.hist(specz, bins=bins, histtype="step", color="navy", label=r"Training sample")

    if plot_test:
        _, _, specz_test = archive.get_training_data()
        plt.hist(
            specz_test,
            bins=bins,
            histtype="step",
            color="goldenrod",
            label=r"Test sample",
            linestyle="--",
        )

    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.xlabel(r"Redshift", fontsize=14)
    plt.ylabel("Counts", fontsize=14)
    plt.legend()
    plt.show()


def plot_som_map(
    som_data: np.ndarray, plot_arg: str = "z", vmin: float = 0, vmax: float = 1
) -> None:
    """
    Plot the Self-Organizing Map (SOM) data.

    Parameters:
    - som_data (numpy.ndarray): The SOM data to be visualized.
    - plot_arg (str, optional): The column name to be plotted. Default is 'z'.
    - vmin (float, optional): Minimum value for color scaling. Default is 0.
    - vmax (float, optional): Maximum value for color scaling. Default is 1.

    Returns:
    None
    """
    plt.imshow(som_data, vmin=vmin, vmax=vmax, cmap="viridis")
    plt.colorbar(label=f"{plot_arg}")
    plt.xlabel(r"$x$ [pixel]", fontsize=14)
    plt.ylabel(r"$y$ [pixel]", fontsize=14)
    plt.show()


def plot_PIT(
    pit_list_1: List[float],
    pit_list_2: Optional[List[float]] = None,
    pit_list_3: Optional[List[float]] = None,
    sample: str = "specz",
    labels: Optional[List[str]] = None,
    save: bool = True,
) -> None:
    """
    Plot Probability Integral Transform (PIT) values for given lists.

    Parameters:
    - pit_list_1 (List[float]): First list of PIT values.
    - pit_list_2 (Optional[List[float]], optional): Second list of PIT values. Default is None.
    - pit_list_3 (Optional[List[float]], optional): Third list of PIT values. Default is None.
    - sample (str, optional): Sample label for saving. Default is 'specz'.
    - labels (Optional[List[str]], optional): List of labels for each PIT list. Default is None.
    - save (bool, optional): If True, save the plot to a file. Default is True.

    Returns:
    None
    """
    plt.rcParams["font.family"] = "serif"
    plt.rcParams["font.size"] = 12
    fig, ax = plt.subplots(figsize=(8, 6))
    kwargs = dict(bins=30, histtype="step", density=True, range=(0, 1))
    cmap = plt.get_cmap("Dark2")

    # Create a histogram
    ax.hist(pit_list_1, color=cmap(0), linestyle="--", **kwargs, label=labels[0])
    if pit_list_2 is not None:
        ax.hist(pit_list_2, color=cmap(1), linestyle="--", **kwargs, label=labels[1])
    if pit_list_3 is not None:
        ax.hist(pit_list_3, color=cmap(2), linestyle="--", **kwargs, label=labels[2])

    ax.set_xlabel("PIT values", fontsize=14)
    ax.set_ylabel("Normalized Counts", fontsize=14)
    ax.legend(fontsize=12)

    if save:
        plt.savefig(f"PIT_{sample}.pdf", dpi=300, bbox_inches="tight")
    plt.show()


def plot_outlier_ratio(
    outliers: np.ndarray, num_samp: int = 100, plot_mean: bool = True
) -> None:
    """
    Plot the outlier ratio as a function of the number of samples.

    Parameters:
    - outliers (np.ndarray): Outlier ratio data.
    - num_samp (int, optional): Number of samples for plotting. Default is 100.
    - plot_mean (bool, optional): If True, plot the mean of outliers. Default is True.

    Returns:
    None
    """
    plt.figure(figsize=(10, 6))
    plt.plot(np.arange(1, num_samp + 1), outliers[:num_samp], label="Outlier Ratio")

    if plot_mean:
        plt.axhline(
            np.mean(outliers), color="red", linestyle="--", label="Mean Outlier Ratio"
        )

    plt.xlabel("Number of Samples", fontsize=14)
    plt.ylabel("Outlier Ratio", fontsize=14)
    plt.legend()
    plt.grid()
    plt.show()


def plot_crps(
    crps_list_1: List[float],
    crps_list_2: Optional[List[float]] = None,
    crps_list_3: Optional[List[float]] = None,
    labels: Optional[List[str]] = None,
    sample: str = "specz",
    save: bool = True,
) -> None:
    # Create a figure and axis
    # plot properties
    plt.rcParams["font.family"] = "serif"
    plt.rcParams["font.size"] = 12
    fig, ax = plt.subplots(figsize=(8, 6))
    cmap = plt.get_cmap("Dark2")

    kwargs = dict(bins=50, histtype="step", density=True, range=(0, 1))

    # Create a histogram
    hist, bins, _ = ax.hist(
        crps_list_1, color=cmap(0), ls="--", **kwargs, label=labels[0]
    )
    if crps_list_2 is not None:
        hist, bins, _ = ax.hist(
            crps_list_2, color=cmap(1), ls=":", **kwargs, label=labels[1]
        )
    if crps_list_3 is not None:
        hist, bins, _ = ax.hist(
            crps_list_3, color=cmap(2), ls="-", **kwargs, label=labels[2]
        )

    # Add labels and a title
    ax.set_xlabel("CRPS Scores", fontsize=18)
    ax.set_ylabel("Frequency", fontsize=18)

    # Add grid lines
    ax.grid(True, linestyle="--", alpha=0.7)

    # Customize the x-axis
    ax.set_xlim(0, 0.5)

    # Make ticks larger
    ax.tick_params(axis="both", which="major", labelsize=14)

    # Calculate the mean CRPS value
    mean_crps_1 = round(np.nanmean(crps_list_1), 2)
    mean_crps_2 = round(np.nanmean(crps_list_2), 2)
    mean_crps_3 = round(np.nanmean(crps_list_3), 2)

    # Add the mean CRPS value at the top-left corner
    ax.annotate(
        f"Mean CRPS {labels[0]}: {mean_crps_1}",
        xy=(0.57, 0.9),
        xycoords="axes fraction",
        fontsize=14,
        color=cmap(0),
    )
    ax.annotate(
        f"Mean CRPS {labels[1]}: {mean_crps_2}",
        xy=(0.57, 0.85),
        xycoords="axes fraction",
        fontsize=14,
        color=cmap(1),
    )
    ax.annotate(
        f"Mean CRPS {labels[2]}: {mean_crps_3}",
        xy=(0.57, 0.8),
        xycoords="axes fraction",
        fontsize=14,
        color=cmap(2),
    )

    if save == True:
        plt.savefig(f"{sample}_CRPS.pdf", bbox_inches="tight")

    # Show the plot
    plt.show()