Source code for gpmpcontrib.plot.visualization

import itertools
import numpy as np
import matplotlib.pyplot as plt
import gpmp as gp

# In Matplotlib, interactive mode is a feature that allows for
# real-time updates to plots. When interactive mode is on, Matplotlib
# will automatically update the plot window after every plotting
# command without needing to call plt.show() explicitly.
# To turn interactive mode globally.

# plt.ion()

# Set interactive mode for plotting (set to True if interactive
# plotting is desired)
interactive = False



[docs]
def plot_1d(xt, zt, xi, zi, zpm, zpv, zpsim=None, xnew=None, title=None):
    """
    Visualize the results of the predictions and the dataset.

    Parameters:
    xt (ndarray): Test points
    zt (ndarray): True values at test points
    xi (ndarray): Input data points
    zi (ndarray): Output values at input data points
    zpm (ndarray): Posterior mean values
    zpv (ndarray): Posterior variances
    zpsim (ndarray, optional): Conditional sample paths
    xnew (ndarray, optional): New data point being added
    title (str, optional): Title for the plot
    """
    fig = gp.plot.Figure(isinteractive=interactive)

    # Plot zt if it is provided
    if zt is not None:
        fig.plot(xt, zt, "k", linewidth=1, linestyle=(0, (5, 5)), label="truth")

    # Plot conditional sample paths only if zpsim is provided
    if zpsim is not None:
        fig.plot(xt, zpsim[:, 0], "k", linewidth=0.5, label="conditional sample paths")
        fig.plot(xt, zpsim[:, 1:], "k", linewidth=0.5)

    # Plot data points
    fig.plotdata(xi, zi)

    # Plot GP mean and variance
    fig.plotgp(xt, zpm, zpv, colorscheme="simple")

    # Plot new evaluation point if provided
    if xnew is not None:
        fig.plot(np.repeat(xnew, 2), fig.ylim(), color="tab:gray", linewidth=3)
        if title is None:
            fig.title("New Evaluation")

    # Add title if it is provided
    if title is not None:
        fig.title(title)

    # Set labels and show plot
    fig.xylabels("$x$", "$z$")
    fig.show(grid=True, xlim=[-1.0, 1.0], legend=True, legend_fontsize=9)




[docs]
def show_truth_vs_prediction(zt, zpm):
    """
    Visualize the predictions vs truth
    """
    num_outputs = zt.shape[1]
    fig, axs = plt.subplots(1, num_outputs, figsize=(6 * num_outputs, 5))

    for i in range(num_outputs):
        ax = axs[i] if num_outputs > 1 else axs
        ax.scatter(zt[:, i], zpm[:, i])
        ax.plot(
            [zt[:, i].min(), zt[:, i].max()], [zt[:, i].min(), zt[:, i].max()], "k--"
        )
        ax.set_xlabel("True Values")
        ax.set_ylabel("Predicted Values")
        ax.set_title(f"Output {i+1}")

    plt.tight_layout()
    plt.show()




[docs]
def show_loo_errors(zi, zloom, zloov):
    """
    Visualize leave-one-out errors for each output dimension.
    """
    num_outputs = zi.shape[1]
    fig, axs = plt.subplots(1, num_outputs, figsize=(6 * num_outputs, 5), squeeze=False)

    for i in range(num_outputs):
        ax = axs[0, i]
        ax.errorbar(
            zi[:, i], zloom[:, i], yerr=1.96 * np.sqrt(zloov[:, i]), fmt="ko", ls="None"
        )
        ax.set_xlabel("True Values")
        ax.set_ylabel("LOO Predicted")
        ax.set_title(f"Output {i + 1} - LOO predictions with 95% CI")

        # Add identity line
        min_val = min(ax.get_xlim()[0], ax.get_ylim()[0])
        max_val = max(ax.get_xlim()[1], ax.get_ylim()[1])
        ax.plot([min_val, max_val], [min_val, max_val], "k--")

        ax.grid(True, "major", linestyle=(0, (1, 5)), linewidth=0.5)

    plt.tight_layout()
    plt.show()




[docs]
def plotmatrix(data, colors=None):
    """
    Generates a matrix scatter plot from the given 2D numerical numpy array or matrix,
    optionally colorizing the scatter points based on an n x 1 ndarray.

    Parameters:
    data (2D numpy array): A matrix of numerical data.
    colors (1D numpy array, optional): An array of values to color the scatter points.
    """
    num_vars = data.shape[1]

    # Create a grid of subplots
    fig, axes = plt.subplots(nrows=num_vars, ncols=num_vars, figsize=(10, 10))

    # Initialize the scatter plot object (to be used for colorbar if needed)
    sc = None

    # Iterate over all variable pairs
    for i, j in itertools.product(range(num_vars), range(num_vars)):
        # Off-diagonal scatter plot with optional colorization
        if i != j:
            if colors is not None:
                sc = axes[i, j].scatter(
                    data[:, j], data[:, i], alpha=0.5, s=10, c=colors, cmap="viridis"
                )
            else:
                sc = axes[i, j].scatter(data[:, j], data[:, i], alpha=0.5, s=10)
        # Diagonal: plot the histograms
        else:
            axes[i, j].hist(data[:, i], bins=20, alpha=0.7)

        # Set labels on the outer edge
        if i == num_vars - 1:
            axes[i, j].set_xlabel(f"Var {j+1}")
        if j == 0:
            axes[i, j].set_ylabel(f"Var {i+1}")

    # If colorization is applied, add a colorbar outside the plots
    if colors is not None:
        # Create a new axis for the colorbar
        # [left, bottom, width, height]
        cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
        fig.colorbar(sc, cax=cbar_ax)

    # Adjust layout for spacing
    # Adjust layout to leave space for colorbar
    plt.tight_layout(rect=[0, 0, 0.9, 1])
    plt.show()




[docs]
def parallel_coordinates_plot(
    x, z, p=None, show_p=False, xi=None, zi=None, ci=None, show_type=False
):
    """
    Creates an interactive parallel coordinates plot using go.Parcoords, with optional
    custom categories for the datasets and a flag to control the display of the type variable.

    Parameters:
    - x: numpy array, shape (n_samples, n_dimensions)
         The input dataset for parallel coordinates.
    - z: numpy array, shape (n_samples, n_targets)
         The target or latent data.
    - p: numpy array, shape (n_samples,), optional
         The latent variable used for colorization (if provided).
    - show_p: bool, optional (default: False)
         If True, p will be displayed as one of the dimensions in the plot.
    - xi: numpy array, shape (n_samples, n_dimensions), optional
         Additional dataset to be displayed as type 1 (optional).
    - zi: numpy array, shape (n_samples, n_targets), optional
         Additional target data for type 1 (optional).
    - ci: numpy array, shape (n_samples,), optional
         Custom type values for the xi, zi dataset (overrides the default categories).
    - show_type: bool, optional (default: False)
         If True, the type variable will be shown as the first dimension.

    Returns:
    - Plotly figure object (go.Figure)
    """
    try:
        import pandas as pd
        import plotly as px
        import plotly.graph_objects as go
    except ImportError as exc:
        raise ImportError(
            "parallel_coordinates_plot requires optional dependencies "
            "'pandas' and 'plotly'. Install them to use this function."
        ) from exc

    # Ensure x and z are 2D arrays, p is 1D array (if provided)
    assert x.ndim == 2, "x must be a 2D numpy array"
    assert z.ndim == 2, "z must be a 2D numpy array"
    if p is not None:
        assert p.ndim == 1, "p must be a 1D numpy array if provided"
        assert p.shape[0] == x.shape[0], "p must have the same number of samples as x"

    # Ensure that xi and zi are provided together and are 2D arrays if present
    if xi is not None or zi is not None:
        assert xi is not None and zi is not None, "Both xi and zi must be provided"
        assert xi.ndim == 2 and zi.ndim == 2, "xi and zi must be 2D numpy arrays"
        assert (
            xi.shape[0] == zi.shape[0]
        ), "xi and zi must have the same number of samples"
        assert (
            xi.shape[1] == x.shape[1]
        ), "xi must have the same number of dimensions as x"
        assert zi.shape[1] == z.shape[1], "zi must have the same number of targets as z"

    # Create a DataFrame from x and z
    df_x = pd.DataFrame(x, columns=[f"var_{i}" for i in range(x.shape[1])])
    df_z = pd.DataFrame(z, columns=[f"z_{i}" for i in range(z.shape[1])])

    # Concatenate x and z into one DataFrame
    df = pd.concat([df_x, df_z], axis=1)

    # Add a default type column with value 0 for the main dataset
    df["type"] = 0

    # If xi and zi are provided, concatenate them and assign type 1
    if xi is not None and zi is not None:
        df_xi = pd.DataFrame(xi, columns=[f"var_{i}" for i in range(xi.shape[1])])
        df_zi = pd.DataFrame(zi, columns=[f"z_{i}" for i in range(zi.shape[1])])
        df_highlight = pd.concat([df_xi, df_zi], axis=1)

        if ci is not None:
            # If custom categories are provided for xi and zi, use ci
            assert (
                ci.shape[0] == xi.shape[0]
            ), "ci must have the same number of samples as xi and zi"
            df_highlight["type"] = ci
        else:
            # Assign default type 1 to the xi, zi dataset
            df_highlight["type"] = 1

        # Concatenate the main and highlighted datasets
        df = pd.concat([df, df_highlight], ignore_index=True)

    # If p is provided, add it as a column for colorization
    if p is not None:
        # Concatenate p and create a corresponding p for xi, if provided
        if xi is not None and zi is not None:
            # Set p as NaN for xi/zi (optional)
            p_highlight = np.nan * np.ones(xi.shape[0])
            df["p"] = np.concatenate([p, p_highlight])
        else:
            df["p"] = p

    # Set up the dimensions list for the plot
    dimensions = []

    # Add the type variable as the first axis if show_type is True
    if show_type and xi is not None and zi is not None:
        dimensions.append(
            dict(
                range=[df["type"].min(), df["type"].max()],
                tickvals=np.unique(df["type"]),
                ticktext=[f"Type {int(val)}" for val in np.unique(df["type"])],
                label="Type",
                values=df["type"],
            )
        )

    # Add x variables to the dimensions list
    for i in range(x.shape[1]):
        dimensions.append(
            dict(
                range=[np.min(df[f"var_{i}"]), np.max(df[f"var_{i}"])],
                label=f"var_{i}",
                values=df[f"var_{i}"],
            )
        )

    # Add z variables to the dimensions list
    for j in range(z.shape[1]):
        dimensions.append(
            dict(
                range=[np.min(df[f"z_{j}"]), np.max(df[f"z_{j}"])],
                label=f"z_{j}",
                values=df[f"z_{j}"],
            )
        )

    # Optionally add p as a dimension, based on show_p
    if p is not None and show_p:
        dimensions.append(
            dict(
                range=[np.nanmin(df["p"]), np.nanmax(df["p"])],
                label="p (Latent Variable)",
                values=df["p"],
            )
        )

    # Create the main parallel coordinates plot
    fig = go.Figure()

    # Add the dataset with colorization
    fig.add_trace(
        go.Parcoords(
            line=dict(
                color=df["p"] if p is not None else df_z.iloc[:, 0],
                # 'Sunsetdark',  # Color scale can be customized
                colorscale=px.colors.diverging.Tealrose,
                showscale=True,  # Show the color bar
            ),
            dimensions=dimensions,
        )
    )

    # Customize the layout (optional)
    fig.update_layout(
        title="Interactive Parallel Coordinates Plot with Categories",
        plot_bgcolor="white",
        paper_bgcolor="white",
    )

    # Display the plot
    fig.show()