Source code for gpmpcontrib.samplingcriteria

# --------------------------------------------------------------
# Author: Emmanuel Vazquez <emmanuel.vazquez@centralesupelec.fr>
# Copyright (c) 2022-2026, CentraleSupelec
# License: GPLv3 (see LICENSE)
# --------------------------------------------------------------
import gpmp.num as gnp


def isinbox(box, x):
    b = gnp.logical_and(gnp.all(x >= box[0], axis=1), gnp.all(x <= box[1], axis=1))
    return b



[docs]
def excursion_probability(t, zpm, zpv):
    """Computes the probabilities of exceeding the threshold t for
    Gaussian predictive distributions with means zpm and variances
    zpv. The input argument must have the following sizes:

        * zpm        M x 1,
        * zpv        M x 1,

     where M is the number of points where the EI must be
     computed. The output has size M x 1.
    """
    p = gnp.empty(zpm.shape)
    delta = zpm - t
    sigma = gnp.sqrt(zpv)
    b = sigma > 0.0

    # Where sigma > 0
    u = gnp.where(b, delta / sigma, 0.0)  # Avoid division by zero
    p = gnp.where(b, gnp.normal.cdf(u), 0.0)  # Compute p where sigma > 0

    # Condition where sigma == 0
    p = gnp.where(b, p, delta > 0.0)

    # # Compute p where sigma > 0
    # u = delta[b] / sigma[b]
    # p[b] = gnp.normal.cdf(u)

    # # Compute p where sigma == 0
    # b = gnp.logical_not(b)
    # p[b] = gnp.asdouble(delta[b] > 0)

    return p




[docs]
def excursion_logprobability(t, zpm, zpv):
    """Computes the log probabilities of exceeding the threshold t for
    Gaussian predictive distributions with means zpm and variances zpv.

    Parameters:
      t (float): Threshold value.
      zpm (gnp.array): Predictive means, shape (M, 1).
      zpv (gnp.array): Predictive variances, shape (M, 1).

    Returns:
      gnp.array: Log probabilities of exceeding the threshold, shape (M, 1).
    """
    delta = zpm - t
    sigma = gnp.sqrt(zpv)
    b = sigma > 0.0

    u = gnp.where(b, delta / sigma, 0.0)  # Avoid division by zero
    log_p = gnp.where(b, gnp.normal.logcdf(u), gnp.where(delta > 0, 0.0, -gnp.inf))

    return log_p




[docs]
def excursion_misclassification_probability(t, zpm, zpv):
    """
    Computes the probability of misclassification for the excursion set.

    The misclassification probability is defined as:
        tau_n(x) = min(P_n(ξ(x) > t), 1 - P_n(ξ(x) > t))

    This measures the uncertainty in classifying whether a point belongs
    to the excursion set or not assuming a hard classifier eta_n(x) = 1_{p_n(x) > 1/2}

    See Sequential design of computer experiments for the estimation of a probability of failure.
    J Bect, D Ginsbourger, L Li, V Picheny, E Vazquez. Statistics and Computing 22, 773-793
    https://arxiv.org/pdf/1009.5177

    Parameters
    ----------
    t : float
        Threshold value defining the excursion set.
    zpm : gnp.array, shape (M, 1)
        Predictive mean values at M points.
    zpv : gnp.array, shape (M, 1)
        Predictive variance values at M points.

    Returns
    -------
    gnp.array, shape (M, 1)
        Misclassification probabilities for each point.
    """
    g = excursion_probability(t, zpm, zpv)
    return gnp.minimum(g, 1 - g)



[docs]
def excursion_wMSE(t, zpm, zpv, alpha=1.0, beta=0.5):
    """
    Computes the weighted mean squared error (wMSE) for excursion set estimation.

    The wMSE is defined as:
        wMSE(x) = tau(x) * zpv

    where:
      - tau(x) is the misclassification probability,
      - zpv is the predictive variance.

    Parameters
    ----------
    t : float
        Threshold value defining the excursion set.
    zpm : gnp.array, shape (M, 1)
        Predictive mean values at M points.
    zpv : gnp.array, shape (M, 1)
        Predictive variance values at M points.

    Returns
    -------
    gnp.array, shape (M, 1)
        Weighted mean squared error at each point.

    Notes
    -----
    - Weighting behavior:
      * α controls misclassification sensitivity
      * Higher α emphasizes uncertain classifications (boundary regions)
      * Higher β emphasizes high-variance regions
      * β=1.0: Strong focus on high-variance regions (σ² term
        dominates). For global exploration, β=1.0 may be preferred.
      * β=0.5: Balanced exploration (σ term, recommended default). For
        excursion set estimation, β=0.5 typically provides better
        boundary detection.
    """
    tau = excursion_misclassification_probability(t, zpm, zpv)
    return tau**alpha * zpv**beta




[docs]
def box_probability(box, zpm, zpv):
    """Compute probability that predictions fall within given bounds
    for each dimension.

    For Gaussian predictions N(zpm, zpv), calculates P(lower <= value
    <= upper) per dimension.

    Parameters
    ----------
    box : (2, dim_output) array-like
        [lower, upper] bounds for each output dimension
    zpm : (n, dim_output) gnp.array
        Predictive means
    zpv : (n, dim_output) gnp.array
        Predictive variances (non-negative)

    Returns
    -------
    Returns
    -------
    tuple of (gnp.array, gnp.array)
        - product_probs: (n, 1) array of product probabilities across dimensions
        - probs: (n, dim_output) array of probabilities per dimension

    Notes
    -----
    - Returns 1 when zpv=0 and zpm is within bounds, 0 otherwise
    - Handles infinite bounds

    """
    lower, upper = gnp.asarray(box[0]), gnp.asarray(box[1])
    delta_min, delta_max = lower - zpm, upper - zpm
    sigma = gnp.sqrt(zpv)
    b = sigma > 0

    # Compute probabilities
    u_min = gnp.where(b, delta_min / sigma, 0.0)
    u_max = gnp.where(b, delta_max / sigma, 0.0)

    probs = gnp.where(
        b, 
        gnp.normal.cdf(u_max) - gnp.normal.cdf(u_min), 
        gnp.asdouble((delta_max > 0) & (delta_min < 0))
    )

    # Compute product across dimensions (axis=1)
    probs_product = gnp.prod(probs, axis=1, keepdims=True)

    return probs_product, probs




[docs]
def box_logprobability(box, zpm, zpv):
    """Compute log probabilities of predictions being within given bounds.

    Returns
    -------
    tuple of (gnp.array, gnp.array)
        - sum_log_probs: (n, 1) array of summed log probabilities across dimensions
        - log_probs: (n, dim_output) array of log probabilities per dimension
    """
    lower, upper = gnp.asarray(box[0]), gnp.asarray(box[1])
    delta_min, delta_max = lower - zpm, upper - zpm
    sigma = gnp.sqrt(zpv)
    b = sigma > 0

    # Compute log probabilities
    u_min = gnp.where(b, delta_min / sigma, 0)
    u_max = gnp.where(b, delta_max / sigma, 0)

    log_phi_max = gnp.normal.logcdf(u_max)
    log_phi_min = gnp.normal.logcdf(u_min)

    mask = log_phi_max > log_phi_min
    log_probs = gnp.where(
        b & mask,
        log_phi_max + gnp.log1p(-gnp.exp(log_phi_min - log_phi_max)),
        gnp.where((delta_max > 0) & (delta_min < 0), 0.0, -gnp.inf),
    )

    # Compute sum across dimensions (axis=1)
    sum_logprobs = gnp.sum(log_probs, axis=1, keepdims=True)

    return sum_logprobs, log_probs




[docs]
def box_misclassification_probability(box, zpm, zpv):
    """
    Computes the probability of misclassification for a box.

    Returns
    -------
    gnp.array, shape (M, 1)
        Misclassification probabilities for each point.
    """
    g_prod, g = box_probability(box, zpm, zpv)
    return gnp.minimum(g_prod, 1 - g_prod), gnp.minimum(g, 1 - g)




[docs]
def box_misclassification_logprobability(box, zpm, zpv, clip_logthreshold=1e-6):
    """Computes log misclassification probabilities for box estimation.
    
    Numerically stable version using log probabilities to avoid underflow.
    
    Returns
    -------
    tuple of (gnp.array, gnp.array)
        - sum_log_tau: (n, 1) array of summed log misclassification probs
        - log_tau: (n, dim_output) array of log misclassification probs
    """
    # Get log probabilities (more stable than log(probability))
    sum_log_probs, log_probs = box_logprobability(box, zpm, zpv)
    
    # Compute log(1 - exp(log_probs)) using log1mexp for stability
    probs = gnp.exp(log_probs)
    probs = gnp.clip(probs, clip_logthreshold, 1 - clip_logthreshold)
    log_1_minus_p = gnp.log1p(-probs)  # More accurate than log(1-p)
    
    # log_tau = log(min(p, 1-p)) = min(log_probs, log_1_minus_p)
    log_tau = gnp.minimum(log_probs, log_1_minus_p)
    
    # Sum across dimensions
    sum_log_tau = gnp.sum(log_tau, axis=1, keepdims=True)
    
    return sum_log_tau, log_tau



[docs]
def box_wMSE(box, zpm, zpv, normalization=1.0, alpha=1.0, beta=0.5):
    """Computes the weighted mean squared error (wMSE) for box/bounds estimation.

    The wMSE combines misclassification uncertainty and predictive variance:
        wMSE = τ^α * (σ² * normalization)^β
    where:
        τ = misclassification probability (min(p, 1-p))
        σ² = predictive variance
        α, β = weighting exponents

    Parameters
    ----------
    box : (2, dim_output) array-like
        [lower, upper] bounds for each output dimension
    zpm : (n, dim_output) gnp.array
        Predictive means
    zpv : (n, dim_output) gnp.array
        Predictive variances (must be non-negative)
    normalization : float or (dim_output,) array-like, optional
        Scaling factor for variance (default: 1.0). Can be either:
        - Single float (applied to all dimensions)
        - Per-dimension scaling factors
    alpha : float, optional
        Exponent for misclassification term (default: 1.0)
    beta : float, optional
        Exponent for variance term (default: 1.0)

    Returns
    -------
    tuple of (gnp.array, gnp.array)
        - wmse_sum: (n, 1) array of summed wMSE across dimensions
        - wmse: (n, dim_output) array of wMSE per dimension

    Notes
    -----
    - The normalization parameter allows per-dimension scaling
    - Weighting behavior:
      * α controls misclassification sensitivity
      * Higher α emphasizes uncertain classifications (boundary regions)
      * Higher β emphasizes high-variance regions
      * β=1.0: Strong focus on high-variance regions (σ² term
        dominates). For global exploration, β=1.0 may be preferred.
      * β=0.5: Balanced exploration (σ term, recommended default). For
        excursion set estimation, β=0.5 typically provides better
        boundary detection.
    """
    # Get log misclassification probabilities
    sum_log_tau, log_tau = box_misclassification_logprobability(box, zpm, zpv)
    # Convert back from log space with proper scaling
    tau_alpha = gnp.exp(alpha * log_tau)
    # Compute variance term
    var_term = (zpv * normalization)**beta
    # Compute per-dimension wMSE
    wmse = tau_alpha * var_term
    # Sum across dimensions (in original space)
    wmse_sum = gnp.sum(wmse, axis=1, keepdims=True)
    
    return wmse_sum, wmse




[docs]
def expected_improvement(t, zpm, zpv):
    """Computes the Expected Improvement (EI) criterion for a
     maximization problem given a threshold t and Gaussian predictive
     distributions with means zpm and variances zpv. The input
     argument must have the following sizes:

        * zpm        M x 1,
        * zpv        M x 1,

     where M is the number of points where the EI must be
     computed. The output has size M x 1.

     REFERENCES

    [1] D. R. Jones, M. Schonlau and William J. Welch. Efficient global
        optimization of expensive black-box functions.  Journal of Global
        Optimization, 13(4):455-492, 1998.

    [2] J. Mockus, V. Tiesis and A. Zilinskas. The application of Bayesian
        methods for seeking the extremum. In L.C.W. Dixon and G.P. Szego,
        editors, Towards Global Optimization, volume 2, pages 117-129, North
        Holland, New York, 1978.

    """
    ei = gnp.empty(zpm.shape)
    delta = zpm - t
    sigma = gnp.sqrt(zpv)
    b = sigma > 0

    # Compute the EI where sigma > 0
    u = gnp.where(b, delta / sigma, 0)
    ei = gnp.where(b, sigma * (gnp.normal.pdf(u) + u * gnp.normal.cdf(u)), 0)

    # Compute the EI where sigma == 0
    ei = gnp.where(b, ei, gnp.maximum(0, delta))

    # Correct numerical inaccuracies
    ei = gnp.where(ei > 0, ei, 0)

    return ei