#!/usr/bin/env python3
"""Emulator module for PyMimic
This module provides :class:`Blp` and :class:`Blup` classes for use in
computing the best linear predictor and best linear unbiased predictor
of a random variable based on a finite random process.
"""
import sys
import warnings
import numpy as np
import scipy.optimize as optimize
import scipy.linalg as linalg
import pymimic as mim
def _dot(x, y):
r"""Return the column-wise dot product of two arrays"""
x = np.atleast_1d(x)
y = np.atleast_1d(y)
try:
# If x and y are not scalars
return np.einsum("ji, ji -> i", x, y)
except ValueError:
# If x and y are scalars
return np.einsum("i, i -> i", x, y)
def _opt(linear_predictor, opt_method="mle", n_start=None,
method="Powell", bounds=None, tol=None, callback=None,
options=None):
r"""Optimize the parameter of the covariance kernel model"""
class _Obj:
def __init__(self, opt_method):
if opt_method == "mle":
self.obj = self._obj_lnL
elif opt_method == "loocv":
self.obj = self._obj_R2
else:
raise ValueError(
"unknown optimization method {}.".format(opt_method)
)
def __call__(self, x):
return self.obj(x)
def _obj_R2(self, x):
r"""Return sum of squares of LOO residuals for given covfunc arg"""
linear_predictor.args = tuple(
[theta[i](x[i]) for i in range(len(x))]
)
return linear_predictor.R2
def _obj_lnL(self, x):
r"""Return negative of log-likelihood of covfunc arg"""
# TO DO Be able to pass arbitrary probability distribution
linear_predictor.args = tuple(
[theta[i](x[i]) for i in range(len(x))]
)
return - linear_predictor.lnL
class _Param:
r"""Parameter transformation"""
def __init__(self, inf_bound=False):
if inf_bound:
self.trans = np.tan
else:
self.trans = lambda x: np.sinh(10.*x)/10.
def __call__(self, x):
return self.trans(x)
# Specify objective function
obj = _Obj(opt_method)
# Set bounds
if bounds:
linear_predictor.argbounds = bounds
elif linear_predictor.argbounds is None:
linear_predictor.argbounds = linear_predictor._argbounds()
bounds = np.copy(linear_predictor.argbounds)
# Transform infinite search bounds to finite using arctan.
ind_infinite = np.isinf(bounds)
bounds[ind_infinite] = np.pi/2.
# Transform finite search bounds to finite using arcsinh
row_infinite = np.any(ind_infinite, axis=1)
bounds[~row_infinite] = np.arcsinh(10.*bounds[~row_infinite])/10.
# Transform parameter
theta = [_Param(val) for val in row_infinite]
# Multistart optimization
if n_start is None:
n_start = 10*linear_predictor.ndim
try:
# If LHS design is available as a file (i.e. if self.n_start >
# self.ndim)
x0 = mim.design(bounds, "lhs", n=n_start)
except ValueError:
# If LHS design is not available as a file
x0 = mim.design(bounds, "random", n=n_start)
fun_best = np.inf
for i, x0_i in enumerate(x0):
try:
res = optimize.minimize(obj, x0=x0_i, method=method,
jac=None, hess=None, hessp=None,
bounds=bounds, tol=tol,
callback=callback,
options=options)
if res.success and res.fun < fun_best:
res_best = res
# args_best = linear_predictor.args
vars_best = vars(linear_predictor)
else:
warnings.warn("multistart iteration failed.", UserWarning)
except ValueError:
pass
# linear_predictor.args = args_best
for key, val in vars_best.items():
setattr(linear_predictor, key, val)
return res_best
class _CovfuncWrapper:
r"""Wrap positive-definite kernel so it can take additional arguments"""
def __init__(self, f, args):
self.f = f
if args is None:
self.args = ()
elif np.isscalar(args):
self.args = (args,)
else:
self.args = args
def __call__(self, s, t):
return self.f(s, t, *self.args)
class _LinearPredictor():
r"""Base class for classes Blp and Blup"""
# This class allows for dependent attributes to be recomputed if
# and only if the independent attributes on which they depend are
# changed. See: https://stackoverflow.com/questions/57439169.
_independent_attribs = {"args"}
_dependent_attribs = {"kernel", "K", "U_K"}
def __init__(self, ttrain, xtrain, var_error=None,
covfunc=mim.kernel.squared_exponential, args=()):
self._ttrain = ttrain
self._xtrain = xtrain
self._var_error = var_error
self._covfunc = covfunc
self._args = args
self._kernel = None
self._K = None
self._U_K = None
self.argbounds = None
def __setattr__(self, name, value):
# Update dependent attribute if and only if self.args changes
if name in self._independent_attribs:
name = f"_{name}"
for var in self._dependent_attribs:
super().__setattr__(f"_{var}", None)
super().__setattr__(name, value)
@property
def ttrain(self):
r"""Training inputs"""
return self._ttrain
@property
def ntrain(self):
r"""Number of training data"""
return len(self._ttrain)
@property
def ndim(self):
r"""Dimension of the training inputs"""
try:
return self._ttrain.shape[1]
except IndexError:
return 1
@property
def xtrain(self):
r"""Training outputs"""
return self._xtrain
@property
def var_error(self):
r"""Variance of the error in the training outputs"""
if isinstance(self._var_error, (list, tuple, np.ndarray)):
# If var_error is array_like
return self._var_error
else:
# If var_error is None or scalar
if self._var_error is None:
self._var_error = 0.
return self._var_error*np.ones(self.ntrain)
@property
def args(self):
r"""Additional arguments required to specify the `covfunc`"""
return self._args
@property
def kernel(self):
r"""Wrapper for `covfunc`"""
if self._kernel is None:
self._kernel = _CovfuncWrapper(self._covfunc, self.args)
return self._kernel
@property
def K(self):
r"""Covariance matrix"""
if self._K is None:
K = self.kernel(self.ttrain, self.ttrain)
if self.var_error is not None:
K = K + np.diag(self.var_error)
self._K = K
return self._K
@property
def U_K(self):
r"""Cholesky decomposition of the covariance matrix"""
if self._U_K is None:
self._U_K = linalg.cho_factor(self.K)
return self._U_K
def _argbounds(self):
"""Return kernel parameter bounds"""
domain = np.max(self.ttrain, axis=0) - np.min(self.ttrain, axis=0)
l_min = (np.product(domain)/self.ntrain)**(1./self.ndim)
if self._covfunc is mim.kernel.exponential:
return np.vstack(
([0., np.inf],
[0., 2. ],
np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
)
elif self._covfunc is mim.kernel.gneiting:
return np.vstack(
([0., np.inf],
[0., 2. ],
np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
)
elif self._covfunc is mim.kernel.matern:
return np.vstack(
([0., np.inf],
[0., np.inf],
np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
)
elif self._covfunc is mim.kernel.neural_network:
return np.vstack(
([0., np.inf],
np.repeat([[0., l_min**(-2.)]], self.ndim + 1, axis=0))
)
elif self._covfunc is mim.kernel.periodic:
return np.vstack(
([0., np.inf],
np.repeat([[l_min, np.inf]], self.ndim, axis=0),
np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
)
elif self._covfunc is mim.kernel.rational_quadratic:
return np.vstack(
([0., np.inf],
[0., np.inf],
np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
)
elif self._covfunc is mim.kernel.squared_exponential:
return np.vstack(
([0., np.inf],
np.repeat([[0., l_min**(-2.)]], self.ndim, axis=0))
)
else:
raise TypeError(
"unknown covariance kernel."
)
[docs]class Blp(_LinearPredictor):
r"""Best linear predictor of a random variable
Let :math:`X := \{X_{t}\}_{t \in T}` be a random process
(considered as a column vector) with finite index set :math:`T`
and second-moment kernel :math:`k`. Let :math:`Z` be a random
variable to which the second-moment kernel extends by writing
:math:`Z := X_{t_{0}}`. The best linear predictor (BLP) of
:math:`Z` based on :math:`X` is
.. math::
Z^{*} = \sigma{}^{\mathrm{t}}K^{-1}X
with mean-squared error
.. math::
\operatorname{MSE}(Z^{*}) = k(t_{0}, t_{0}) -
\sigma^{\mathrm{t}}K^{-1}\sigma,
where :math:`\sigma := (k(t_{*}, t_{i}))_{i}` and :math:`K :=
(k(t_{i}, t_{j}))_{ij}`.
Given a realization of :math:`X`, namely :math:`x`, then we may
compute the realization of :math:`Z^{*}`, namely :math:`z^{*}`, by
making the substitution :math:`X = x`.
Parameters
----------
ttrain : (n, d) array_like
Training input.
xtrain : (n,) array_like
Training output.
var_error : scalar or (n,) array_like, optional
Variance of error in training output.
covfunc : callable, optional
Second-Moment kernel, ``covfunc(s, t, *args) -> array_like``
with signature ``(n, d), (m, d) -> (n, m)``, where ``args`` is
a tuple needed to completely specify the function.
args : (p,) tuple, optional
Extra arguments to be passed to the second-moment kernel,
``covfunc``.
Attributes
----------
ttrain : (n, d) array_like
Training inputs
xtrain : (n,) array_like
Training outputs
ntrain : int
Number of training data
ndim : int
Dimension of the training inputs
var_error : bool, scalar, (n,) array_like, optional
Variance of the error in the training outputs
args : tuple
Additional arguments required to specify ``covfunc``
kernel : callable
Second-moment kernel, additional argument spedified
K : (n, n) ndarray
Second-moment matrix for the training inputs.
argbounds : (m, n) ndarray or None
Bounds of the parameter space.
loocv : (n,) ndarray
Leave-one-out residuals.
R2 : float
Leave-one-out cross-validation score.
lnL : float
Log-likelihood of the parameter tuple under the assumption
that training data are a realization of a Gaussian random
process.
Raises
------
Warns
-----
Warnings
--------
If the matrix :math:`K` is singular then the BLP does not exist.
This happens when any two elements of the random process :math:`X`
are perfectly correlated (in this case the rows of :math:`K` are
linearly dependent). If the matrix :math:`K` is nearly-singular
rather than singular then the BLP is numerically unstable. This
happens when any two elements of the random process :math:`X` are
highly correlated rather than perfectly correlated (in this case
the rows of :math:`K` are nearly linearly dependent). There are
several possible solutions to these problems:
(1) remove all but one of the offending elements of the training data;
(2) resample the random process using different training inputs;
(3) use a different second-moment kernel;
(4) add random noise to the the training output.
Optimization of the BLP (implemented by the class method
:func:`Blp.opt`) will also fail when the second-moment matrix
becomes nearly singular.
See also
--------
:class:`Blup`
Best linear unbiased predictor.
Notes
-----
The quantities :math:`z^{*}` and :math:`\operatorname{MSE}(Z^{*})`
are computed using the class method :func:`Blp.xtest`.
References
----------
Examples
--------
One-dimensional index set:
.. sourcecode:: python
>>> ttrain = [0., 1.]
>>> xtrain = [0., 1.]
>>> mim.Blp(ttrain, xtrain, args=[1., 1.])
<pymimic.emulator.Blp object at 0x7f87c9f8af28>
Second-Moment kernel provided explicitly:
>>> mim.Blp(ttrain, xtrain, covfunc=mim.kernel.squared_exponential, args=[1., 1.])
<pymimic.emulator.Blp object at 0x7f1c519c0a20>
Two-dimensional index set:
.. sourcecode:: python
>>> ttrain = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]
>>> xtrain = [0., 1., 1., 2.]
>>> mim.Blp(ttrain, xtrain, args=[1., 1., 1.])
<pymimic.emulator.Blp object at 0x7f1c519c0940>
Second-Moment kernel provided explicitly:
.. sourcecode:: python
>>> mim.Blp(ttrain, xtrain, covfunc=mim.kernel.squared_exponential, args=[1., 1., 1.])
<pymimic.emulator.Blp object at 0x7f970017fd68>
"""
_independent_attribs = {"args"}
_dependent_attribs = {"kernel", "K", "U_K", "Gamma", "loocv",
"R2", "lnL"}
def __init__(self, ttrain, xtrain, var_error=None,
covfunc=mim.kernel.squared_exponential, args=()):
_LinearPredictor.__init__(self, ttrain, xtrain, var_error,
covfunc, args)
self._Gamma = None
self._loocv = None
self._R2 = None
self._lnL = None
@property
def Gamma(self):
r"""Matrix Gamma
:meta private:
"""
if self._Gamma is None:
self._Gamma = linalg.cho_solve(self.U_K, self.xtrain)
return self._Gamma
@property
def loocv(self):
r"""LOO residuals and their variances
:meta private:
"""
if self._loocv is None:
var_residuals = 1./np.diag(np.linalg.inv(self.K))
residuals = self.Gamma*var_residuals
self._loocv = residuals, var_residuals
return self._loocv
@property
def R2(self):
r"""Leave-one-out cross-validation score
:meta private:
"""
if self._R2 is None:
try:
self._R2 = np.sum(self.loocv[0]**2.)/self.ntrain
except (linalg.LinAlgError, ValueError):
self._R2 = np.inf
return self._R2
@property
def lnL(self):
r"""Log-likelihood of the covariance function parameter
:meta private:
"""
if self._lnL is None:
try:
log_det_K = 2.*np.sum(np.log(np.diag(self.U_K[0])))
Q = linalg.cho_solve(self.U_K, self.xtrain)
self._lnL = - self.xtrain.T@Q - log_det_K
except (linalg.LinAlgError, ValueError):
self._lnL = - np.inf
return self._lnL
[docs] def opt(self, opt_method="mle", n_start=None, method="Powell",
bounds=None, tol=None, callback=None, options=None):
r"""Optimize the parameter of the second-moment kernel model
Set the second-moment kernel arguments, ``*args``, to their optimum
values using multistart local optimization.
If the second-moment kernel is user defined ``*args`` must be
a sequence of scalars.
Parameters
----------
opt_method : str, optional
Optimization method to be used. Should be either:
- ``"mle"`` (maximum-likelihood method)
- ``"loocv"`` (leave-one-out cross-validation method).
n_start : int, optional
Number of multistart iterations to be used. If ``None``
then :math:`10d` iterations are used, where :math:`d` is
the dimension of the parameter space.
method : str or callable, optional
Type of solver to be used by the minimizer.
bounds : sequence or scipy.optimize.Bounds, optional
Bounds on variables for Nelder-Mead, L-BFGS-B, TNC, SLSQP,
Powell, and trust-constr methods. There are two ways to
specify the bounds:
- instance of scipy.optimize.Bounds class;
- sequence of `(min, max)` pairs for each parameter.
Bounds must be provided if the :func:`covfunc` is not
included in the PyMimic module.
tol : float, optional
Tolerance for termination. When `tol` is specified, the
selected minimization algorithm sets some relevant
solver-specific tolerance(s) equal to `tol`.
callback : callable, optional
A function to be called after each iteration.
options : dict, optional
A dictionary of solver options.
Returns
-------
res : OptimizeResult
The best of `n_start` optimization results, represented as
an `OptimizeResult` object.
Raises
------
Warns
-----
Warnings
--------
Suppose that the second-moment kernel model is of the form
:math:`k(s, t) = a{}f(s, t)` for some constant :math:`a` and
some positive-semidefinite kernel :math:`f`. In this case the
method of leave-one-out cross-validation will fail to find
:math:`a` if there are no errors on the training outputs
(i.e. if ``var_error`` is ``None``, zero, or a tuple of
zeros). This is because the BLP of a random variable is
independent of :math:`a`.
Typically, the likelihood and leave-one-out cross-validation
score have multiple extrema. Use :func:`Blp.opt` in the
knowledge that the global optimum may not be found, and that
the global optimum may in any case result in over- or
under-fitting of the data [RW06_].
See also
--------
:func:`scipy.optimize.minimize`
The Scipy function for the minimization of a scalar
function of one or more variables.
Notes
-----
The function optimizes the parameter of the second-moment kernel
model using either (i) the method of maximum likelihood or
(ii) the method of leave-one-out cross-validation.
In the first case, the likelihood of the parameter is
maximized under the assumption that training and test data are
a realization of a centred Gaussian random process.
In the second case, the parameter is chosen so as to minimize
the leave-one-out cross-validation score (i.e. the sum of the
squares of the leave-one-out residuals).
The optimization is done using multiple starts of
:meth:`scipy.optimize.minimize`. The keywords of
:meth:`Blp.opt()` (``method``, ``bounds``, ``tol``,
``callback``, and ``options``) are passed directly to this
function. The starting points are chosen using Latin
hypersquare sampling, unless the training data are scalars or
the number of training data is greater than the the number of
starts, when uniform random sampling is used.
The optimization is performed in a transformed parameter
space. If either bound for an element of the parameter tuple
is infinite (``np.inf``) then that element is transformed
using :math:`\mathrm{tan}`. If both bounds for an element of
the parameter tuple are finite then that element is
transformed using :math:`\mathrm{sinh}(10\,\cdot)/10`.
References
----------
.. [RW06]
Rasmussen, C.E., and C.K.I. Williams. 2006. *Gaussian
process emulation*. Cambridge: MIT Press.
Examples
--------
First create an instance of :class:`Blp`.
.. sourcecode:: python
>>> ttrain = np.linspace(0., np.pi, 10)
>>> xtrain = np.sin(ttrain)
>>> blp = mim.Blp(ttrain, xtrain)
Now perform the optimization.
>>> blp.opt()
fun: -52.99392000860139
hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
jac: array([276.77485929, 585.7648087 ])
message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
nfev: 9
nit: 1
njev: 3
status: 0
success: True
x: array([1.49225651, 0.23901602])
Inspect the instance attribute ``args`` to see that it has
been set to its maximum-likelihood estimate.
.. sourcecode:: python
>>> blp.args
[12.706204736174696, 0.5411814081739366]
See Notes for an explanation of the discrepancy between the
values of ``x`` and ``blp.args``.
"""
return _opt(self, opt_method, n_start, method, bounds, tol,
callback, options)
[docs] def xtest(self, t):
r"""Return the best linear predictor of a random variable.
Returns the realization, :math:`z^{*}`, of the best linear
predictor of a random variable based on the realization of a
finite random process, :math:`x`.
Parameters
----------
t : (m, d) array_like
Test input.
Returns
-------
xtest : (m,) ndarray
Test output.
mse : (m,) ndarray, optional
Mean-squared error in the test output.
Raises
------
Warns
-----
Warnings
--------
See also
--------
:func:`Blup.xtest`
Best linear predictor
Notes
-----
Examples
--------
One-dimensional index set, one test input:
.. sourcecode:: python
>>> ttrain = [0., 1.]
>>> xtrain = [0., 1.]
>>> blp = mim.Blp(ttrain, xtrain, args=[1., 1.])
>>> blp.xtest(0.62)
(array(0.6800459), array(0.02680896))
Two test inputs:
.. sourcecode:: python
>>> blp.xtest([0.62, 0.89])
(array([0.6800459, 0.92670531]), array([0.02680896, 0.00425306]))
Two-dimensional index set, one test input:
.. sourcecode:: python
>>> ttrain = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]
>>> xtrain = [0., 1., 1., 2.]
>>> blp = Blp(ttrain, xtrain, args=[1., 1., 1.])
>>> blp.xtest([[0.43, 0.27]])
(array([0.82067282]), array([0.04697306]))
Two test inputs at once:
.. sourcecode:: python
>>> blp.xtest([[0.43, 0.27], [0.16, 0.93]])
(array([0.82067282, 1.17355541]), array([0.04697306, 0.0100251]))
"""
if np.isscalar(t):
# Allow for a single scalar index
t = np.atleast_1d(t)
scalar_input = True
else:
# Normally, there will be multiple scalar or vector indices
scalar_input = False
# BLP
k = self.kernel(self.ttrain, t)
# k = np.atleast_2d(self.kernel(self.ttrain, t))
xtest = k.T@self.Gamma
# MSE of BLP
alpha = np.linalg.solve(self.K, k)
mse = np.diag(self.kernel(t, t)) - _dot(alpha, k)
# mse = ([self.kernel(t_i, t_i).item() for t_i in t]
# - _dot(alpha, k))
# A very small MSE may be computed as slightly negative due
# to a lack of numerical precision
mse[mse<0.] = 0.
if scalar_input:
return np.squeeze(xtest), np.squeeze(mse)
return xtest, mse
[docs]class Blup(_LinearPredictor):
r"""Best linear unbiased predictor of a random variable [G62_].
Let :math:`X := \{X_{t}\}_{t \in T}` be a random process
(considered as a column vector) with finite index set :math:`T`
and covariance kernel :math:`k`. Let :math:`Z` be a random
variable to which the covariance kernel extends by writing
:math:`Z := X_{t_{0}}`. Let :math:`(\phi_{i})_{i}` be a basis for
the space of possible mean-value functions for :math:`Z`. The best
linear unbiased predictor (BLUP) of :math:`Z` based on :math:`X` is
.. math::
Z^{\dagger} = \Theta^{\dagger} + \sigma^{\mathrm{t}}K^{-1}\mathrm{D}
where
:math:`\sigma := (k(t_{0}, t_{i}))_{i}`,
:math:`K := (k(t_{i}, t_{j}))_{ij}`,
:math:`\Theta^{\dagger} = \phi^{\mathrm{t}}\mathrm{B}`,
:math:`\mathrm{D} := X - \Phi\mathrm{B}`, and where
:math:`\phi = (f_{i}(t_{0}))_{i}`,
:math:`\Phi := (f_{i}(t_{j}))_{ij}`, and
:math:`\mathrm{B} :=
(\Phi^{\mathrm{t}}K^{\,-1}\Phi)^{-1}\Phi^{\mathrm{t}}K^{\,-1}X`.
Given a realization of :math:`X`, namely :math:`x`, then we may
compute the realization of :math:`Z^{\dagger}`, namely
:math:`z^{\dagger}`, by making the substitution :math:`X = x`.
Parameters
----------
ttrain : (n, d) array_like
Training input.
xtrain : (n,) array_like
Training output.
var_error : scalar or (n,) array_like, optional
Variance of error in training output.
covfunc : callable, optional
Covariance kernel, ``covfunc(s, t, *args) -> array_like`` with
signature ``(n, d), (m, d) -> (n, m)``, where ``args`` is a
tuple needed to completely specify the function.
args : (q,) tuple, optional
Extra arguments to be passed to the covariance kernel,
``covfunc``.
basis: (p,) tuple, optional
Basis for the space of mean-value functions. Each element of
the tuple is a basis function, namely a callable object,
``fun(t) -> array_like`` with signature ``(n, d) -> (n,)``.
Attributes
----------
ttrain : (n, d) array_like
Training inputs
xtrain : (n,) array_like
Training outputs
ntrain : int
Number of training data
ndim : int
Dimension of the training inputs
var_error : bool, scalar, (n,) array_like, optional
Variance of the error in the training outputs
args : tuple
Additional arguments required to specify ``covfunc```
kernel : callable
Covariance kernel, additional argument spedified
K : (n, n) ndarray
Covariance matrix for the training inputs.
argbounds : (m, n) ndarray or None
Bounds of the parameter space.
Phi : (n, p) ndarray
Basis matrix.
Beta : (p,) ndarray
Best linear unbiased estimator of the coefficients of the
basis functions.
Delta: (n,) ndarray
Residuals of fit of BLUE to training data.
loocv : (n,) ndarray
Leave-one-out residuals.
R2 : float
Leave-one-out cross-validation score.
lnL : float
Log-likelihood of the parameter tuple under the assumption
that training data are a realization of a Gaussian random
process.
Raises
------
Warns
-----
Warnings
--------
If the matrix :math:`K` is singular then the BLUP does not exist.
This happens when any two elements of the random process :math:`X`
are perfectly correlated (in this case the rows of :math:`K` are
linearly dependent). If the matrix :math:`K` is nearly-singular
rather than singular then the BLUP is numerically unstable. This
happens when any two elements of the random process :math:`X` are
highly correlated rather than perfectly correlated (in this case
the rows of :math:`K` are nearly linearly dependent). There are
several possible solutions to these problems:
(1) remove all but one of the offending elements of the training data;
(2) resample the random process using different training inputs;
(3) use a different covariance kernel;
(4) add random noise to the the training output.
Optimization of the BLUP (implemented by the class method
:func:`Blup.opt`) will also fail when the covariance matrix
becomes nearly singular.
See also
--------
:class:`Blp`
Best linear predictor.
Notes
-----
The quantities :math:`z^{\dagger}` and
:math:`\operatorname{MSE}(Z^{\dagger})` are computed using the
class method :func:`Blp.xtest`.
References
----------
.. [G62]
Goldberger, A. S. 1962. \'Best linear unbiased prediction in
the generalized linear regression model\' in *Journal of the
American Statistical Association*, 57 (298): 369--75. Available
at https://www.doi.org/10.1080/01621459.1962.10480665.
Examples
--------
One-dimensional index set:
.. sourcecode:: python
>>> ttrain = [0., 1.]
>>> xtrain = [0., 1.]
>>> mim.Blup(ttrain, xtrain, args=[1., 1.])
<pymimic.emulator.Blup object at 0x7f87c9f8af28>
Covariance kernel function provided explicitly:
>>> mim.Blup(ttrain, xtrain, covfunc=mim.kernel.squared_exponential, args=[1., 1.])
<pymimic.emulator.Blup object at 0x7f1c519c0a20>
Two-dimensional index set:
.. sourcecode:: python
>>> ttrain = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]
>>> xtrain = [0., 1., 1., 2.]
>>> mim.Blup(ttrain, xtrain, args=[1., 1., 1.])
<pymimic.emulator.Blp object at 0x7f1c519c0940>
Covariance kernel function provided explicitly:
.. sourcecode:: python
>>> mim.Blup(ttrain, xtrain, covfunc=mim.kernel.squared_exponential, args=[1., 1., 1.])
<pymimic.emulator.Blp object at 0x7f970017fd68>
"""
_independent_attribs = {"args"}
_dependent_attribs = {"kernel", "K", "U_K", "alpha", "beta",
"gamma", "G", "U_G", "Beta", "Delta",
"Gamma", "Q", "loocv", "R2", "lnL"}
def __init__(self, ttrain, xtrain, var_error=None,
covfunc=mim.kernel.squared_exponential, args=(),
basis=mim.basis.const()):
_LinearPredictor.__init__(self, ttrain, xtrain, var_error,
covfunc, args)
self._alpha = None
self._beta = None
self._gamma = None
self._G = None
self._U_G = None
self._Beta = None
self._Delta = None
self._Gamma = None
self._Q = None
self._loocv = None
self._R2 = None
self._lnL = None
self.basis = basis
self.Phi = self._phi(ttrain)
@property
def alpha(self):
r"""Matrix alpha
:meta private:
"""
if self._alpha is None:
self._alpha = linalg.cho_solve(self.U_K, self.Phi)
return self._alpha
@property
def beta(self):
r"""Matrix beta
:meta private:
"""
if self._beta is None:
self._beta = linalg.cho_solve(self.U_G, self.alpha.T)
return self._beta
@property
def gamma(self):
r"""Matrix gamma
:meta private:
"""
if self._gamma is None:
self._gamma = np.eye(self.ntrain) - self.Phi@self.beta
return self._gamma
@property
def Beta(self):
r"""Coefficients of best linear unbiased estimator
:meta private:
"""
if self._Beta is None:
self._Beta = self.beta@self.xtrain
return self._Beta
@property
def G(self):
r"""Gram matrix
:meta private:
"""
if self._G is None:
self._G = self.Phi.T@self.alpha
return self._G
@property
def U_G(self):
r"""Cholesky decomposition of the Gram matrix
:meta private:
"""
if self._U_G is None:
self._U_G = linalg.cho_factor(self.G)
return self._U_G
@property
def Delta(self):
r"""Residuals of generalized least-square fit
:meta private:
"""
if self._Delta is None:
self._Delta = self.gamma@self.xtrain
return self._Delta
@property
def Gamma(self):
r"""Matrix Gamma
:meta private:
"""
if self._Gamma is None:
self._Gamma = self.Q@self.xtrain
return self._Gamma
@property
def Q(self):
r"""Matrix Q
:meta private:
"""
if self._Q is None:
self._Q = linalg.cho_solve(self.U_K, self.gamma)
return self._Q
@property
def loocv(self):
r"""LOO residuals and their variances
:meta private:
"""
if self._loocv is None:
var_residuals = 1./np.diag(self.Q)
residuals = self.Gamma*var_residuals
self._loocv = residuals, var_residuals
return self._loocv
@property
def R2(self):
r"""Leave-one-out cross-validation score
:meta private:
"""
if self._R2 is None:
try:
self._R2 = np.sum(self.loocv[0]**2.)/self.ntrain
except (linalg.LinAlgError, ValueError):
self._R2 = np.inf
return self._R2
@property
def lnL(self):
r"""Log-likelihood of the covariance function parameter
:meta private:
"""
# Assumes the random process is Gaussian
if self._lnL is None:
try:
log_det_K = 2.*np.sum(np.log(np.diag(self.U_K[0])))
Q = linalg.cho_solve(self.U_K, self.Delta)
self._lnL = - self.Delta.T@Q - log_det_K
except (linalg.LinAlgError, ValueError):
self._lnL = - np.inf
return self._lnL
def _phi(self, t):
r"""Return the basis function values for given argument"""
return np.array([basis_j(t) for basis_j in self.basis]).T
[docs] def opt(self, opt_method="mle", n_start=None, method="Powell",
bounds=None, tol=None, callback=None, options=None):
r"""Optimize the parameter of the covariance kernel model
Set the covariance kernel arguments, ``*args``, to their
optimum values using multistart local optimization.
Parameters
----------
opt_method : str, optional
Optimization method to be used. Should be either:
- ``"mle"`` (maximum-concentrated likelihood method)
- ``"loocv"`` (leave-one-out cross-validation method).
n_start : int, optional
Number of multistart iterations to be used. If ``None``
then :math:`10d` iterations are used, where :math:`d` is
the dimension of the parameter space.
method : str or callable, optional
Type of solver to be used by the minimizer.
bounds : sequence or scipy.optimize.Bounds, optional
Bounds on variables for Nelder-Mead, L-BFGS-B, TNC, SLSQP,
Powell, and trust-constr methods. There are two ways to
specify the bounds:
- instance of scipy.optimize.Bounds class;
- sequence of `(min, max)` pairs for each parameter.
Bounds must be provided if the :func:`covfunc` is not
included in the PyMimic module.
tol : float, optional
Tolerance for termination. When `tol` is specified, the
selected minimization algorithm sets some relevant
solver-specific tolerance(s) equal to `tol`.
callback : callable, optional
A function to be called after each iteration.
options : dict, optional
A dictionary of solver options.
Returns
-------
res : OptimizeResult
The best of `n_start` optimization results, represented as
an `OptimizeResult` object.
Raises
------
Warns
-----
Warnings
--------
Suppose that the covariance kernel model is of the form
:math:`k(s, t) = a{}f(s, t)` for some constant :math:`a` and
some positive-semidefinite kernel :math:`f`. In this case the
method of leave-one-out cross-validation will fail to find
:math:`a` if there are no errors on the training outputs
(i.e. if ``var_error`` is ``None``, zero, or a tuple of
zeros). This is because the BLUP of a random variable is
independent of :math:`a`.
Typically, the likelihood and leave-one-out cross-validation
score have multiple extrema. Use :meth:`Blup.opt()` in the
knowledge that the global optimum may not be found, and that
the global optimum may in fact result in over- or
under-fitting of the data [RW06_].
See also
--------
:func:`scipy.optimize.minimize`
The Scipy function for the minimization of a scalar function of
one or more variables.
Notes
-----
The function optimizes the parameter of the covariance kernel
model using either (i) the method of maximum-concentrated
likelihood or (ii) the method of leave-one-out
cross-validation.
In the first case, the likelihood of the parameter is
maximized under the assumption that training and test data are
a realization of a Gaussian random process with mean given by
its best linear unbiased estimator.
In the second case, the parameter is chosen so as to minimize
the leave-one-out cross-validation score (i.e. the sum of the
squares of the leave-one-out residuals).
The optimization is done using multiple starts of
:meth:`scipy.optimize.minimize`. The keywords of
:meth:`Blup.opt()` (``method``, ``bounds``, ``tol``,
``callback``, and ``options``) are passed directly to this
function. The starting points are chosen using Latin
hypersquare sampling, unless the training data are scalars or
the number of training data is greater than the the number of
starts, when uniform random sampling is used.
The optimization is performed in a transformed parameter
space. If either bound for an element of the parameter tuple
is infinite (``np.inf``) then that element is transformed
using :math:`\mathrm{tan}`. If both bounds for an element of
the parameter tuple are finite then that element is
transformed using :math:`\mathrm{sinh}(10\cdot)/10`.
References
----------
.. [RW06]
Rasmussen, C.E., and C.K.I. Williams. 2006. *Gaussian
process emulation*. Cambridge: MIT Press.
Examples
--------
First create an instance of :class:`Blup`.
.. sourcecode:: python
>>> ttrain = np.linspace(0., np.pi, 10)
>>> xtrain = np.sin(ttrain)
>>> blup = mim.Blup(ttrain, xtrain)
Now perform the optimization.
>>> blup.opt()
fun: -52.99392000860139
hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
jac: array([276.77485929, 585.7648087 ])
message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
nfev: 9
nit: 1
njev: 3
status: 0
success: True
x: array([1.49225651, 0.23901602])
Inspect the instance attribute ``args`` to see that it has
been set to its maximum-likelihood estimate.
.. sourcecode:: python
>>> blup.args
[12.706204736174696, 0.5411814081739366]
See Notes for an explanation of the discrepancy between the
values of ``x`` and ``blup.args``.
"""
return _opt(self, opt_method, n_start, method, bounds, tol,
callback, options)
[docs] def xtest(self, t):
r"""Return the best linear predictor of a random variable.
Returns the realization, :math:`z^{*}`, of the best linear
predictor of a random variable based on the realization of a
finite random process, :math:`x`.
Parameters
----------
t : (m, d) array_like
Test input.
Returns
-------
xtest : (m,) ndarray
Test output.
mse : (m,) ndarray, optional
Mean-squared error in the test output.
Raises
------
Warns
-----
Warnings
--------
See also
--------
:func:`Blup.xtest`
Best linear predictor
Notes
-----
Examples
--------
One-dimensional index set, one test input:
.. sourcecode:: python
>>> ttrain = [0., 1.]
>>> xtrain = [0., 1.]
>>> blup = mim.Blup(ttrain, xtrain, args=[1., 1.])
>>> blup.xtest(0.62)
(array(0.63368638), array(0.03371449))
Two test inputs:
.. sourcecode:: python
>>> blup.xtest([0.62, 0.89])
(array([0.63368638, 0.90790372]), array(0.03371449, 0.00538887]))
Two-dimensional index set, one test input:
.. sourcecode:: python
>>> ttrain = [[0., 0.], [0., 1.], [1., 0.], [1., 1.]]
>>> xtrain = [0., 1., 1., 2.]
>>> blup = Blup(ttrain, xtrain, args=[1., 1., 1.])
>>> blup.xtest([[0.43, 0.27]])
(array([0.63956746]), array([0.06813622]))
Two test inputs at once:
.. sourcecode:: python
>>> blup.xtest([[0.43, 0.27], [0.16, 0.93]])
(array([0.63956746, 1.09544711]), array([0.06813622, 0.01396161])
"""
if np.isscalar(t):
# Allow for a single scalar index
t = np.atleast_1d(t)
scalar_input = True
else:
# Normally, there will be multiple scalar or vector indices
scalar_input = False
# BLUP
k = self.kernel(self.ttrain, t)
# k = np.atleast_2d(self.kernel(self.ttrain, t))
phi = self._phi(t)
xtest = phi@self.Beta + k.T@self.Gamma
# MSE of BLUP
delta = linalg.cho_solve(self.U_K, k)
epsilon = phi.T - self.Phi.T@delta
zeta = linalg.cho_solve(self.U_G, epsilon)
mse = (np.diag(self.kernel(t, t))
- _dot(k, delta)
+ _dot(epsilon, zeta))
# mse = ([self.kernel(t_i, t_i).item() for t_i in t]
# - _dot(k, delta)
# + _dot(epsilon, zeta))
# A very small MSE may be computed as slightly negative due
# to a lack of numercial precision
mse[mse<0.] = 0.
if scalar_input:
return np.squeeze(xtest), np.squeeze(mse)
return xtest, mse