Source code for pymimic.emulator

#!/usr/bin/env python3

"""Emulator module for PyMimic

This module provides :class:`Blp` and :class:`Blup` classes for use in
computing the best linear predictor and best linear unbiased predictor
of a random variable based on a finite random process.

"""

import sys
import warnings
import numpy as np
import scipy.optimize as optimize
import scipy.linalg as linalg
import pymimic as mim

def _dot(x, y):
    r"""Return the column-wise dot product of two arrays"""
    x = np.atleast_1d(x)
    y = np.atleast_1d(y)
    try:
        # If x and y are not scalars
        return np.einsum("ji, ji -> i", x, y)
    except ValueError:
        # If x and y are scalars
        return np.einsum("i, i -> i", x, y)

def _opt(linear_predictor, opt_method="mle", n_start=None,
         method="Powell", bounds=None, tol=None, callback=None,
         options=None):
    r"""Optimize the parameter of the covariance kernel model"""
    class _Obj:
        def __init__(self, opt_method):
            if opt_method == "mle":
                self.obj = self._obj_lnL
            elif opt_method == "loocv":
                self.obj = self._obj_R2
            else:
                raise ValueError(
                    "unknown optimization method {}.".format(opt_method)
                )

        def __call__(self, x):
            return self.obj(x)

        def _obj_R2(self, x):
            r"""Return sum of squares of LOO residuals for given covfunc arg"""
            linear_predictor.args = tuple(
                [theta[i](x[i]) for i in range(len(x))]
            )
            return linear_predictor.R2

        def _obj_lnL(self, x):
            r"""Return negative of log-likelihood of covfunc arg"""
            # TO DO Be able to pass arbitrary probability distribution
            linear_predictor.args = tuple(
                [theta[i](x[i]) for i in range(len(x))]
            )
            return - linear_predictor.lnL

    class _Param:
        r"""Parameter transformation"""
        def __init__(self, inf_bound=False):
            if inf_bound:
                self.trans = np.tan
            else:
                self.trans = lambda x: np.sinh(10.*x)/10.

        def __call__(self, x):
            return self.trans(x)

    # Specify objective function
    obj = _Obj(opt_method)
    # Set bounds
    if bounds:
        linear_predictor.argbounds = bounds
    elif linear_predictor.argbounds is None:
        linear_predictor.argbounds = linear_predictor._argbounds()
    bounds = np.copy(linear_predictor.argbounds)
    # Transform infinite search bounds to finite using arctan.
    ind_infinite = np.isinf(bounds)
    bounds[ind_infinite] = np.pi/2.
    # Transform finite search bounds to finite using arcsinh
    row_infinite = np.any(ind_infinite, axis=1)
    bounds[~row_infinite] = np.arcsinh(10.*bounds[~row_infinite])/10.
    # Transform parameter
    theta = [_Param(val) for val in row_infinite]
    # Multistart optimization
    if n_start is None:
        n_start = 10*linear_predictor.ndim
    try:
        # If LHS design is available as a file (i.e. if self.n_start >
        # self.ndim)
        x0 = mim.design(bounds, "lhs", n=n_start)
    except ValueError:
        # If LHS design is not available as a file
        x0 = mim.design(bounds, "random", n=n_start)
    fun_best = np.inf
    for i, x0_i in enumerate(x0):
        try:
            res = optimize.minimize(obj, x0=x0_i, method=method,
                                    jac=None, hess=None, hessp=None,
                                    bounds=bounds, tol=tol,
                                    callback=callback,
                                    options=options)
            if res.success and res.fun < fun_best:
                res_best = res
                # args_best = linear_predictor.args
                vars_best = vars(linear_predictor)
            else:
                warnings.warn("multistart iteration failed.", UserWarning)
        except ValueError:
            pass
    # linear_predictor.args = args_best
    for key, val in vars_best.items():
        setattr(linear_predictor, key, val)
    return res_best

class _CovfuncWrapper:
    r"""Wrap positive-definite kernel so it can take additional arguments"""
    def __init__(self, f, args):
        self.f = f
        if args is None:
            self.args = ()
        elif np.isscalar(args):
            self.args = (args,)
        else:
            self.args = args
        
    def __call__(self, s, t):
        return self.f(s, t, *self.args)

class _LinearPredictor():
    r"""Base class for classes Blp and Blup"""
    # This class allows for dependent attributes to be recomputed if
    # and only if the independent attributes on which they depend are
    # changed. See: https://stackoverflow.com/questions/57439169.
    _independent_attribs = {"args"}
    _dependent_attribs = {"kernel", "K", "U_K"}
    def __init__(self, ttrain, xtrain, var_error=None,
                 covfunc=mim.kernel.squared_exponential, args=()):
        self._ttrain = ttrain
        self._xtrain = xtrain
        self._var_error = var_error
        self._covfunc = covfunc
        self._args = args
        self._kernel = None
        self._K = None
        self._U_K = None
        self.argbounds = None
        
    def __setattr__(self, name, value):
        # Update dependent attribute if and only if self.args changes
        if name in self._independent_attribs:
            name = f"_{name}"
            for var in self._dependent_attribs:
                super().__setattr__(f"_{var}", None)
        super().__setattr__(name, value)

    @property
    def ttrain(self):
        r"""Training inputs"""
        return self._ttrain

    @property
    def ntrain(self):
        r"""Number of training data"""
        return len(self._ttrain)

    @property
    def ndim(self):
        r"""Dimension of the training inputs"""
        try:
            return self._ttrain.shape[1]
        except IndexError:
            return 1

    @property
    def xtrain(self):
        r"""Training outputs"""
        return self._xtrain

    @property
    def var_error(self):
        r"""Variance of the error in the training outputs"""
        if isinstance(self._var_error, (list, tuple, np.ndarray)):
            # If var_error is array_like
            return self._var_error
        else:
            # If var_error is None or scalar
            if self._var_error is None:
                self._var_error = 0.
            return self._var_error*np.ones(self.ntrain)
        
    @property
    def args(self):
        r"""Additional arguments required to specify the `covfunc`"""
        return self._args
    
    @property
    def kernel(self):
        r"""Wrapper for `covfunc`"""
        if self._kernel is None:
            self._kernel = _CovfuncWrapper(self._covfunc, self.args)
        return self._kernel

    @property
    def K(self):
        r"""Covariance matrix"""
        if self._K is None:
            K = self.kernel(self.ttrain, self.ttrain)
            if self.var_error is not None:
                K = K + np.diag(self.var_error)
            self._K = K
        return self._K
        
    @property
    def U_K(self):
        r"""Cholesky decomposition of the covariance matrix"""
        if self._U_K is None:
            self._U_K = linalg.cho_factor(self.K)
        return self._U_K
        
    def _argbounds(self):
        """Return kernel parameter bounds"""
        domain = np.max(self.ttrain, axis=0) - np.min(self.ttrain, axis=0)
        l_min = (np.product(domain)/self.ntrain)**(1./self.ndim)
        if self._covfunc is mim.kernel.exponential:
            return np.vstack(
                ([0., np.inf],
                 [0., 2.    ],
                 np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
            )
        elif self._covfunc is mim.kernel.gneiting:
            return np.vstack(
                ([0., np.inf],
                 [0., 2.    ],
                 np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
            )
        elif self._covfunc is mim.kernel.matern:
            return np.vstack(
                ([0., np.inf],
                 [0., np.inf],
                 np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
            )
        elif self._covfunc is mim.kernel.neural_network:
            return np.vstack(
                ([0., np.inf],
                 np.repeat([[0., l_min**(-2.)]], self.ndim + 1, axis=0))
            )
        elif self._covfunc is mim.kernel.periodic:
            return np.vstack(
                ([0., np.inf],
                 np.repeat([[l_min, np.inf]], self.ndim, axis=0),
                 np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
            )
        elif self._covfunc is mim.kernel.rational_quadratic:
            return np.vstack(
                ([0., np.inf],
                 [0., np.inf],
                 np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
            )
        elif self._covfunc is mim.kernel.squared_exponential:
            return np.vstack(
                ([0., np.inf],
                 np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
            )
        else:
            raise TypeError(
                "unknown covariance kernel."
            )

[docs]class Blp(_LinearPredictor): r"""Best linear predictor of a random variable Let :math:`X := \{X_{t}\}_{t \in T}` be a random process (considered as a column vector) with finite index set :math:`T` and second-moment kernel :math:`k`. Let :math:`Z` be a random variable to which the second-moment kernel extends by writing :math:`Z := X_{t_{0}}`. The best linear predictor (BLP) of :math:`Z` based on :math:`X` is .. math:: Z^{*} = \sigma{}^{\mathrm{t}}K^{-1}X with mean-squared error .. math:: \operatorname{MSE}(Z^{*}) = k(t_{0}, t_{0}) - \sigma^{\mathrm{t}}K^{-1}\sigma, where :math:`\sigma := (k(t_{*}, t_{i}))_{i}` and :math:`K := (k(t_{i}, t_{j}))_{ij}`. Given a realization of :math:`X`, namely :math:`x`, then we may compute the realization of :math:`Z^{*}`, namely :math:`z^{*}`, by making the substitution :math:`X = x`. Parameters ---------- ttrain : (n, d) array_like Training input. xtrain : (n,) array_like Training output. var_error : scalar or (n,) array_like, optional Variance of error in training output. covfunc : callable, optional Second-Moment kernel, ``covfunc(s, t, *args) -> array_like`` with signature ``(n, d), (m, d) -> (n, m)``, where ``args`` is a tuple needed to completely specify the function. args : (p,) tuple, optional Extra arguments to be passed to the second-moment kernel, ``covfunc``. Attributes ---------- ttrain : (n, d) array_like Training inputs xtrain : (n,) array_like Training outputs ntrain : int Number of training data ndim : int Dimension of the training inputs var_error : bool, scalar, (n,) array_like, optional Variance of the error in the training outputs args : tuple Additional arguments required to specify ``covfunc`` kernel : callable Second-moment kernel, additional argument spedified K : (n, n) ndarray Second-moment matrix for the training inputs. argbounds : (m, n) ndarray or None Bounds of the parameter space. loocv : (n,) ndarray Leave-one-out residuals. R2 : float Leave-one-out cross-validation score. lnL : float Log-likelihood of the parameter tuple under the assumption that training data are a realization of a Gaussian random process. Raises ------ Warns ----- Warnings -------- If the matrix :math:`K` is singular then the BLP does not exist. This happens when any two elements of the random process :math:`X` are perfectly correlated (in this case the rows of :math:`K` are linearly dependent). If the matrix :math:`K` is nearly-singular rather than singular then the BLP is numerically unstable. This happens when any two elements of the random process :math:`X` are highly correlated rather than perfectly correlated (in this case the rows of :math:`K` are nearly linearly dependent). There are several possible solutions to these problems: (1) remove all but one of the offending elements of the training data; (2) resample the random process using different training inputs; (3) use a different second-moment kernel; (4) add random noise to the the training output. Optimization of the BLP (implemented by the class method :func:`Blp.opt`) will also fail when the second-moment matrix becomes nearly singular. See also -------- :class:`Blup` Best linear unbiased predictor. Notes ----- The quantities :math:`z^{*}` and :math:`\operatorname{MSE}(Z^{*})` are computed using the class method :func:`Blp.xtest`. References ---------- Examples -------- One-dimensional index set: .. sourcecode:: python >>> ttrain = [0., 1.] >>> xtrain = [0., 1.] >>> mim.Blp(ttrain, xtrain, args=[1., 1.]) <pymimic.emulator.Blp object at 0x7f87c9f8af28> Second-Moment kernel provided explicitly: >>> mim.Blp(ttrain, xtrain, covfunc=mim.kernel.squared_exponential, args=[1., 1.]) <pymimic.emulator.Blp object at 0x7f1c519c0a20> Two-dimensional index set: .. sourcecode:: python >>> ttrain = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]] >>> xtrain = [0., 1., 1., 2.] >>> mim.Blp(ttrain, xtrain, args=[1., 1., 1.]) <pymimic.emulator.Blp object at 0x7f1c519c0940> Second-Moment kernel provided explicitly: .. sourcecode:: python >>> mim.Blp(ttrain, xtrain, covfunc=mim.kernel.squared_exponential, args=[1., 1., 1.]) <pymimic.emulator.Blp object at 0x7f970017fd68> """ _independent_attribs = {"args"} _dependent_attribs = {"kernel", "K", "U_K", "Gamma", "loocv", "R2", "lnL"} def __init__(self, ttrain, xtrain, var_error=None, covfunc=mim.kernel.squared_exponential, args=()): _LinearPredictor.__init__(self, ttrain, xtrain, var_error, covfunc, args) self._Gamma = None self._loocv = None self._R2 = None self._lnL = None @property def Gamma(self): r"""Matrix Gamma :meta private: """ if self._Gamma is None: self._Gamma = linalg.cho_solve(self.U_K, self.xtrain) return self._Gamma @property def loocv(self): r"""LOO residuals and their variances :meta private: """ if self._loocv is None: var_residuals = 1./np.diag(np.linalg.inv(self.K)) residuals = self.Gamma*var_residuals self._loocv = residuals, var_residuals return self._loocv @property def R2(self): r"""Leave-one-out cross-validation score :meta private: """ if self._R2 is None: try: self._R2 = np.sum(self.loocv[0]**2.)/self.ntrain except (linalg.LinAlgError, ValueError): self._R2 = np.inf return self._R2 @property def lnL(self): r"""Log-likelihood of the covariance function parameter :meta private: """ if self._lnL is None: try: log_det_K = 2.*np.sum(np.log(np.diag(self.U_K[0]))) Q = linalg.cho_solve(self.U_K, self.xtrain) self._lnL = - self.xtrain.T@Q - log_det_K except (linalg.LinAlgError, ValueError): self._lnL = - np.inf return self._lnL
[docs] def opt(self, opt_method="mle", n_start=None, method="Powell", bounds=None, tol=None, callback=None, options=None): r"""Optimize the parameter of the second-moment kernel model Set the second-moment kernel arguments, ``*args``, to their optimum values using multistart local optimization. If the second-moment kernel is user defined ``*args`` must be a sequence of scalars. Parameters ---------- opt_method : str, optional Optimization method to be used. Should be either: - ``"mle"`` (maximum-likelihood method) - ``"loocv"`` (leave-one-out cross-validation method). n_start : int, optional Number of multistart iterations to be used. If ``None`` then :math:`10d` iterations are used, where :math:`d` is the dimension of the parameter space. method : str or callable, optional Type of solver to be used by the minimizer. bounds : sequence or scipy.optimize.Bounds, optional Bounds on variables for Nelder-Mead, L-BFGS-B, TNC, SLSQP, Powell, and trust-constr methods. There are two ways to specify the bounds: - instance of scipy.optimize.Bounds class; - sequence of `(min, max)` pairs for each parameter. Bounds must be provided if the :func:`covfunc` is not included in the PyMimic module. tol : float, optional Tolerance for termination. When `tol` is specified, the selected minimization algorithm sets some relevant solver-specific tolerance(s) equal to `tol`. callback : callable, optional A function to be called after each iteration. options : dict, optional A dictionary of solver options. Returns ------- res : OptimizeResult The best of `n_start` optimization results, represented as an `OptimizeResult` object. Raises ------ Warns ----- Warnings -------- Suppose that the second-moment kernel model is of the form :math:`k(s, t) = a{}f(s, t)` for some constant :math:`a` and some positive-semidefinite kernel :math:`f`. In this case the method of leave-one-out cross-validation will fail to find :math:`a` if there are no errors on the training outputs (i.e. if ``var_error`` is ``None``, zero, or a tuple of zeros). This is because the BLP of a random variable is independent of :math:`a`. Typically, the likelihood and leave-one-out cross-validation score have multiple extrema. Use :func:`Blp.opt` in the knowledge that the global optimum may not be found, and that the global optimum may in any case result in over- or under-fitting of the data [RW06_]. See also -------- :func:`scipy.optimize.minimize` The Scipy function for the minimization of a scalar function of one or more variables. Notes ----- The function optimizes the parameter of the second-moment kernel model using either (i) the method of maximum likelihood or (ii) the method of leave-one-out cross-validation. In the first case, the likelihood of the parameter is maximized under the assumption that training and test data are a realization of a centred Gaussian random process. In the second case, the parameter is chosen so as to minimize the leave-one-out cross-validation score (i.e. the sum of the squares of the leave-one-out residuals). The optimization is done using multiple starts of :meth:`scipy.optimize.minimize`. The keywords of :meth:`Blp.opt()` (``method``, ``bounds``, ``tol``, ``callback``, and ``options``) are passed directly to this function. The starting points are chosen using Latin hypersquare sampling, unless the training data are scalars or the number of training data is greater than the the number of starts, when uniform random sampling is used. The optimization is performed in a transformed parameter space. If either bound for an element of the parameter tuple is infinite (``np.inf``) then that element is transformed using :math:`\mathrm{tan}`. If both bounds for an element of the parameter tuple are finite then that element is transformed using :math:`\mathrm{sinh}(10\,\cdot)/10`. References ---------- .. [RW06] Rasmussen, C.E., and C.K.I. Williams. 2006. *Gaussian process emulation*. Cambridge: MIT Press. Examples -------- First create an instance of :class:`Blp`. .. sourcecode:: python >>> ttrain = np.linspace(0., np.pi, 10) >>> xtrain = np.sin(ttrain) >>> blp = mim.Blp(ttrain, xtrain) Now perform the optimization. >>> blp.opt() fun: -52.99392000860139 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64> jac: array([276.77485929, 585.7648087 ]) message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH' nfev: 9 nit: 1 njev: 3 status: 0 success: True x: array([1.49225651, 0.23901602]) Inspect the instance attribute ``args`` to see that it has been set to its maximum-likelihood estimate. .. sourcecode:: python >>> blp.args [12.706204736174696, 0.5411814081739366] See Notes for an explanation of the discrepancy between the values of ``x`` and ``blp.args``. """ return _opt(self, opt_method, n_start, method, bounds, tol, callback, options)
[docs] def xtest(self, t): r"""Return the best linear predictor of a random variable. Returns the realization, :math:`z^{*}`, of the best linear predictor of a random variable based on the realization of a finite random process, :math:`x`. Parameters ---------- t : (m, d) array_like Test input. Returns ------- xtest : (m,) ndarray Test output. mse : (m,) ndarray, optional Mean-squared error in the test output. Raises ------ Warns ----- Warnings -------- See also -------- :func:`Blup.xtest` Best linear predictor Notes ----- Examples -------- One-dimensional index set, one test input: .. sourcecode:: python >>> ttrain = [0., 1.] >>> xtrain = [0., 1.] >>> blp = mim.Blp(ttrain, xtrain, args=[1., 1.]) >>> blp.xtest(0.62) (array(0.6800459), array(0.02680896)) Two test inputs: .. sourcecode:: python >>> blp.xtest([0.62, 0.89]) (array([0.6800459, 0.92670531]), array([0.02680896, 0.00425306])) Two-dimensional index set, one test input: .. sourcecode:: python >>> ttrain = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]] >>> xtrain = [0., 1., 1., 2.] >>> blp = Blp(ttrain, xtrain, args=[1., 1., 1.]) >>> blp.xtest([[0.43, 0.27]]) (array([0.82067282]), array([0.04697306])) Two test inputs at once: .. sourcecode:: python >>> blp.xtest([[0.43, 0.27], [0.16, 0.93]]) (array([0.82067282, 1.17355541]), array([0.04697306, 0.0100251])) """ if np.isscalar(t): # Allow for a single scalar index t = np.atleast_1d(t) scalar_input = True else: # Normally, there will be multiple scalar or vector indices scalar_input = False # BLP k = self.kernel(self.ttrain, t) # k = np.atleast_2d(self.kernel(self.ttrain, t)) xtest = k.T@self.Gamma # MSE of BLP alpha = np.linalg.solve(self.K, k) mse = np.diag(self.kernel(t, t)) - _dot(alpha, k) # mse = ([self.kernel(t_i, t_i).item() for t_i in t] # - _dot(alpha, k)) # A very small MSE may be computed as slightly negative due # to a lack of numerical precision mse[mse<0.] = 0. if scalar_input: return np.squeeze(xtest), np.squeeze(mse) return xtest, mse
[docs]class Blup(_LinearPredictor): r"""Best linear unbiased predictor of a random variable [G62_]. Let :math:`X := \{X_{t}\}_{t \in T}` be a random process (considered as a column vector) with finite index set :math:`T` and covariance kernel :math:`k`. Let :math:`Z` be a random variable to which the covariance kernel extends by writing :math:`Z := X_{t_{0}}`. Let :math:`(\phi_{i})_{i}` be a basis for the space of possible mean-value functions for :math:`Z`. The best linear unbiased predictor (BLUP) of :math:`Z` based on :math:`X` is .. math:: Z^{\dagger} = \Theta^{\dagger} + \sigma^{\mathrm{t}}K^{-1}\mathrm{D} where :math:`\sigma := (k(t_{0}, t_{i}))_{i}`, :math:`K := (k(t_{i}, t_{j}))_{ij}`, :math:`\Theta^{\dagger} = \phi^{\mathrm{t}}\mathrm{B}`, :math:`\mathrm{D} := X - \Phi\mathrm{B}`, and where :math:`\phi = (f_{i}(t_{0}))_{i}`, :math:`\Phi := (f_{i}(t_{j}))_{ij}`, and :math:`\mathrm{B} := (\Phi^{\mathrm{t}}K^{\,-1}\Phi)^{-1}\Phi^{\mathrm{t}}K^{\,-1}X`. Given a realization of :math:`X`, namely :math:`x`, then we may compute the realization of :math:`Z^{\dagger}`, namely :math:`z^{\dagger}`, by making the substitution :math:`X = x`. Parameters ---------- ttrain : (n, d) array_like Training input. xtrain : (n,) array_like Training output. var_error : scalar or (n,) array_like, optional Variance of error in training output. covfunc : callable, optional Covariance kernel, ``covfunc(s, t, *args) -> array_like`` with signature ``(n, d), (m, d) -> (n, m)``, where ``args`` is a tuple needed to completely specify the function. args : (q,) tuple, optional Extra arguments to be passed to the covariance kernel, ``covfunc``. basis: (p,) tuple, optional Basis for the space of mean-value functions. Each element of the tuple is a basis function, namely a callable object, ``fun(t) -> array_like`` with signature ``(n, d) -> (n,)``. Attributes ---------- ttrain : (n, d) array_like Training inputs xtrain : (n,) array_like Training outputs ntrain : int Number of training data ndim : int Dimension of the training inputs var_error : bool, scalar, (n,) array_like, optional Variance of the error in the training outputs args : tuple Additional arguments required to specify ``covfunc``` kernel : callable Covariance kernel, additional argument spedified K : (n, n) ndarray Covariance matrix for the training inputs. argbounds : (m, n) ndarray or None Bounds of the parameter space. Phi : (n, p) ndarray Basis matrix. Beta : (p,) ndarray Best linear unbiased estimator of the coefficients of the basis functions. Delta: (n,) ndarray Residuals of fit of BLUE to training data. loocv : (n,) ndarray Leave-one-out residuals. R2 : float Leave-one-out cross-validation score. lnL : float Log-likelihood of the parameter tuple under the assumption that training data are a realization of a Gaussian random process. Raises ------ Warns ----- Warnings -------- If the matrix :math:`K` is singular then the BLUP does not exist. This happens when any two elements of the random process :math:`X` are perfectly correlated (in this case the rows of :math:`K` are linearly dependent). If the matrix :math:`K` is nearly-singular rather than singular then the BLUP is numerically unstable. This happens when any two elements of the random process :math:`X` are highly correlated rather than perfectly correlated (in this case the rows of :math:`K` are nearly linearly dependent). There are several possible solutions to these problems: (1) remove all but one of the offending elements of the training data; (2) resample the random process using different training inputs; (3) use a different covariance kernel; (4) add random noise to the the training output. Optimization of the BLUP (implemented by the class method :func:`Blup.opt`) will also fail when the covariance matrix becomes nearly singular. See also -------- :class:`Blp` Best linear predictor. Notes ----- The quantities :math:`z^{\dagger}` and :math:`\operatorname{MSE}(Z^{\dagger})` are computed using the class method :func:`Blp.xtest`. References ---------- .. [G62] Goldberger, A. S. 1962. \'Best linear unbiased prediction in the generalized linear regression model\' in *Journal of the American Statistical Association*, 57 (298): 369--75. Available at https://www.doi.org/10.1080/01621459.1962.10480665. Examples -------- One-dimensional index set: .. sourcecode:: python >>> ttrain = [0., 1.] >>> xtrain = [0., 1.] >>> mim.Blup(ttrain, xtrain, args=[1., 1.]) <pymimic.emulator.Blup object at 0x7f87c9f8af28> Covariance kernel function provided explicitly: >>> mim.Blup(ttrain, xtrain, covfunc=mim.kernel.squared_exponential, args=[1., 1.]) <pymimic.emulator.Blup object at 0x7f1c519c0a20> Two-dimensional index set: .. sourcecode:: python >>> ttrain = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]] >>> xtrain = [0., 1., 1., 2.] >>> mim.Blup(ttrain, xtrain, args=[1., 1., 1.]) <pymimic.emulator.Blp object at 0x7f1c519c0940> Covariance kernel function provided explicitly: .. sourcecode:: python >>> mim.Blup(ttrain, xtrain, covfunc=mim.kernel.squared_exponential, args=[1., 1., 1.]) <pymimic.emulator.Blp object at 0x7f970017fd68> """ _independent_attribs = {"args"} _dependent_attribs = {"kernel", "K", "U_K", "alpha", "beta", "gamma", "G", "U_G", "Beta", "Delta", "Gamma", "Q", "loocv", "R2", "lnL"} def __init__(self, ttrain, xtrain, var_error=None, covfunc=mim.kernel.squared_exponential, args=(), basis=mim.basis.const()): _LinearPredictor.__init__(self, ttrain, xtrain, var_error, covfunc, args) self._alpha = None self._beta = None self._gamma = None self._G = None self._U_G = None self._Beta = None self._Delta = None self._Gamma = None self._Q = None self._loocv = None self._R2 = None self._lnL = None self.basis = basis self.Phi = self._phi(ttrain) @property def alpha(self): r"""Matrix alpha :meta private: """ if self._alpha is None: self._alpha = linalg.cho_solve(self.U_K, self.Phi) return self._alpha @property def beta(self): r"""Matrix beta :meta private: """ if self._beta is None: self._beta = linalg.cho_solve(self.U_G, self.alpha.T) return self._beta @property def gamma(self): r"""Matrix gamma :meta private: """ if self._gamma is None: self._gamma = np.eye(self.ntrain) - self.Phi@self.beta return self._gamma @property def Beta(self): r"""Coefficients of best linear unbiased estimator :meta private: """ if self._Beta is None: self._Beta = self.beta@self.xtrain return self._Beta @property def G(self): r"""Gram matrix :meta private: """ if self._G is None: self._G = self.Phi.T@self.alpha return self._G @property def U_G(self): r"""Cholesky decomposition of the Gram matrix :meta private: """ if self._U_G is None: self._U_G = linalg.cho_factor(self.G) return self._U_G @property def Delta(self): r"""Residuals of generalized least-square fit :meta private: """ if self._Delta is None: self._Delta = self.gamma@self.xtrain return self._Delta @property def Gamma(self): r"""Matrix Gamma :meta private: """ if self._Gamma is None: self._Gamma = self.Q@self.xtrain return self._Gamma @property def Q(self): r"""Matrix Q :meta private: """ if self._Q is None: self._Q = linalg.cho_solve(self.U_K, self.gamma) return self._Q @property def loocv(self): r"""LOO residuals and their variances :meta private: """ if self._loocv is None: var_residuals = 1./np.diag(self.Q) residuals = self.Gamma*var_residuals self._loocv = residuals, var_residuals return self._loocv @property def R2(self): r"""Leave-one-out cross-validation score :meta private: """ if self._R2 is None: try: self._R2 = np.sum(self.loocv[0]**2.)/self.ntrain except (linalg.LinAlgError, ValueError): self._R2 = np.inf return self._R2 @property def lnL(self): r"""Log-likelihood of the covariance function parameter :meta private: """ # Assumes the random process is Gaussian if self._lnL is None: try: log_det_K = 2.*np.sum(np.log(np.diag(self.U_K[0]))) Q = linalg.cho_solve(self.U_K, self.Delta) self._lnL = - self.Delta.T@Q - log_det_K except (linalg.LinAlgError, ValueError): self._lnL = - np.inf return self._lnL def _phi(self, t): r"""Return the basis function values for given argument""" return np.array([basis_j(t) for basis_j in self.basis]).T
[docs] def opt(self, opt_method="mle", n_start=None, method="Powell", bounds=None, tol=None, callback=None, options=None): r"""Optimize the parameter of the covariance kernel model Set the covariance kernel arguments, ``*args``, to their optimum values using multistart local optimization. Parameters ---------- opt_method : str, optional Optimization method to be used. Should be either: - ``"mle"`` (maximum-concentrated likelihood method) - ``"loocv"`` (leave-one-out cross-validation method). n_start : int, optional Number of multistart iterations to be used. If ``None`` then :math:`10d` iterations are used, where :math:`d` is the dimension of the parameter space. method : str or callable, optional Type of solver to be used by the minimizer. bounds : sequence or scipy.optimize.Bounds, optional Bounds on variables for Nelder-Mead, L-BFGS-B, TNC, SLSQP, Powell, and trust-constr methods. There are two ways to specify the bounds: - instance of scipy.optimize.Bounds class; - sequence of `(min, max)` pairs for each parameter. Bounds must be provided if the :func:`covfunc` is not included in the PyMimic module. tol : float, optional Tolerance for termination. When `tol` is specified, the selected minimization algorithm sets some relevant solver-specific tolerance(s) equal to `tol`. callback : callable, optional A function to be called after each iteration. options : dict, optional A dictionary of solver options. Returns ------- res : OptimizeResult The best of `n_start` optimization results, represented as an `OptimizeResult` object. Raises ------ Warns ----- Warnings -------- Suppose that the covariance kernel model is of the form :math:`k(s, t) = a{}f(s, t)` for some constant :math:`a` and some positive-semidefinite kernel :math:`f`. In this case the method of leave-one-out cross-validation will fail to find :math:`a` if there are no errors on the training outputs (i.e. if ``var_error`` is ``None``, zero, or a tuple of zeros). This is because the BLUP of a random variable is independent of :math:`a`. Typically, the likelihood and leave-one-out cross-validation score have multiple extrema. Use :meth:`Blup.opt()` in the knowledge that the global optimum may not be found, and that the global optimum may in fact result in over- or under-fitting of the data [RW06_]. See also -------- :func:`scipy.optimize.minimize` The Scipy function for the minimization of a scalar function of one or more variables. Notes ----- The function optimizes the parameter of the covariance kernel model using either (i) the method of maximum-concentrated likelihood or (ii) the method of leave-one-out cross-validation. In the first case, the likelihood of the parameter is maximized under the assumption that training and test data are a realization of a Gaussian random process with mean given by its best linear unbiased estimator. In the second case, the parameter is chosen so as to minimize the leave-one-out cross-validation score (i.e. the sum of the squares of the leave-one-out residuals). The optimization is done using multiple starts of :meth:`scipy.optimize.minimize`. The keywords of :meth:`Blup.opt()` (``method``, ``bounds``, ``tol``, ``callback``, and ``options``) are passed directly to this function. The starting points are chosen using Latin hypersquare sampling, unless the training data are scalars or the number of training data is greater than the the number of starts, when uniform random sampling is used. The optimization is performed in a transformed parameter space. If either bound for an element of the parameter tuple is infinite (``np.inf``) then that element is transformed using :math:`\mathrm{tan}`. If both bounds for an element of the parameter tuple are finite then that element is transformed using :math:`\mathrm{sinh}(10\cdot)/10`. References ---------- .. [RW06] Rasmussen, C.E., and C.K.I. Williams. 2006. *Gaussian process emulation*. Cambridge: MIT Press. Examples -------- First create an instance of :class:`Blup`. .. sourcecode:: python >>> ttrain = np.linspace(0., np.pi, 10) >>> xtrain = np.sin(ttrain) >>> blup = mim.Blup(ttrain, xtrain) Now perform the optimization. >>> blup.opt() fun: -52.99392000860139 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64> jac: array([276.77485929, 585.7648087 ]) message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH' nfev: 9 nit: 1 njev: 3 status: 0 success: True x: array([1.49225651, 0.23901602]) Inspect the instance attribute ``args`` to see that it has been set to its maximum-likelihood estimate. .. sourcecode:: python >>> blup.args [12.706204736174696, 0.5411814081739366] See Notes for an explanation of the discrepancy between the values of ``x`` and ``blup.args``. """ return _opt(self, opt_method, n_start, method, bounds, tol, callback, options)
[docs] def xtest(self, t): r"""Return the best linear predictor of a random variable. Returns the realization, :math:`z^{*}`, of the best linear predictor of a random variable based on the realization of a finite random process, :math:`x`. Parameters ---------- t : (m, d) array_like Test input. Returns ------- xtest : (m,) ndarray Test output. mse : (m,) ndarray, optional Mean-squared error in the test output. Raises ------ Warns ----- Warnings -------- See also -------- :func:`Blup.xtest` Best linear predictor Notes ----- Examples -------- One-dimensional index set, one test input: .. sourcecode:: python >>> ttrain = [0., 1.] >>> xtrain = [0., 1.] >>> blup = mim.Blup(ttrain, xtrain, args=[1., 1.]) >>> blup.xtest(0.62) (array(0.63368638), array(0.03371449)) Two test inputs: .. sourcecode:: python >>> blup.xtest([0.62, 0.89]) (array([0.63368638, 0.90790372]), array(0.03371449, 0.00538887])) Two-dimensional index set, one test input: .. sourcecode:: python >>> ttrain = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]] >>> xtrain = [0., 1., 1., 2.] >>> blup = Blup(ttrain, xtrain, args=[1., 1., 1.]) >>> blup.xtest([[0.43, 0.27]]) (array([0.63956746]), array([0.06813622])) Two test inputs at once: .. sourcecode:: python >>> blup.xtest([[0.43, 0.27], [0.16, 0.93]]) (array([0.63956746, 1.09544711]), array([0.06813622, 0.01396161]) """ if np.isscalar(t): # Allow for a single scalar index t = np.atleast_1d(t) scalar_input = True else: # Normally, there will be multiple scalar or vector indices scalar_input = False # BLUP k = self.kernel(self.ttrain, t) # k = np.atleast_2d(self.kernel(self.ttrain, t)) phi = self._phi(t) xtest = phi@self.Beta + k.T@self.Gamma # MSE of BLUP delta = linalg.cho_solve(self.U_K, k) epsilon = phi.T - self.Phi.T@delta zeta = linalg.cho_solve(self.U_G, epsilon) mse = (np.diag(self.kernel(t, t)) - _dot(k, delta) + _dot(epsilon, zeta)) # mse = ([self.kernel(t_i, t_i).item() for t_i in t] # - _dot(k, delta) # + _dot(epsilon, zeta)) # A very small MSE may be computed as slightly negative due # to a lack of numercial precision mse[mse<0.] = 0. if scalar_input: return np.squeeze(xtest), np.squeeze(mse) return xtest, mse