Source code for runlmc.models.gpy_lmc

# Copyright (c) 2016, Vladimir Feinberg
# Licensed under the BSD 3-clause license (see LICENSE)

# This file was modified from the GPy project. Its file header is replicated
# below. Its LICENSE.txt is replicated in the LICENSE file for this directory.

# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)

from itertools import count

import numpy as np

from .multigp import MultiGP
from ..util.docs import inherit_doc
from ..util.numpy_convenience import tesselate


[docs]@inherit_doc class GPyLMC(MultiGP): """ This wraps GPy for the Gaussian Process model for multioutput regression under a Linear Model of Coregionalization. This performs the inversion-based cubic-time algorithm. .. Note: Because this implementation uses GPy, mean functions and normalization are unsupported. Uses the Gaussian likelihood. See :class:`runlmc.lmc.functional_kernel` for the explicit LMC formula. The DTCVAR algorithm (the `sparse` parameter) is based on Efficient Multioutput Gaussian Processes through Variational Inducing Kernels by Álvarez et al. 2010. :param Xs: input observations, should be a list of numpy arrays, where each numpy array is a design matrix for the inputs to output :math:`i`. If the :math:`i`-th input has :math:`n_i` data points, then this matrix can be :math:`n_i` or :math:`n_i\\times P` shape for input dimension :math:`P`, with the former re-interpreted as :math:`P=1`. :param Ys: output observations, this must be a list of one-dimensional numpy arrays, matching up with the number of rows in `Xs`. :param kernels: a list of (stationary) kernels which constitute the terms of the LMC sums prior to coregionalization. :param ranks: list of ranks for coregionalization factors :type ranks: list of integer :param name: model name :type name: string :param sparse: an integer. If 0, uses :py:class:`GPy.models.GPCoregionalizedRegression`, the typical cholesky algorithm. If >0, then this determines the number of inducing points used by the DTCVAR algorithm in use :py:class:`GPy.models.SparseGPCoregionalizedRegression` """ def __init__(self, Xs, Ys, kernels, ranks, name='GPyLMC', sparse=0): super().__init__(Xs, Ys, normalize=False, name=name) self.gpy_model = GPyLMC._construct_gpy( self.Xs, Ys, kernels, ranks, sparse) def _raw_predict(self, Xs): pass
[docs] def parameters_changed(self): pass
[docs] def log_likelihood(self): return self.gpy_model.log_likelihood()
[docs] def optimize(self, **kwargs): self.gpy_model.optimize(**kwargs)
[docs] def predict(self, Xs): Xs = self._pad_dims(Xs) X = np.vstack(Xs) if X.ndim == 1: X = X.reshape(-1, 1) lenls = list(map(len, Xs)) meta = np.repeat(np.arange(len(Xs)), lenls).reshape(-1, 1) mu, var = self.gpy_model.predict( np.hstack([X, meta]), Y_metadata={'output_index': meta}) mu = mu.reshape(-1) var = var.reshape(-1) return tesselate(mu, lenls), tesselate(var, lenls)
[docs] def predict_quantiles(self, Xs, quantiles=(2.5, 97.5)): Xs = self._pad_dims(Xs) X = np.vstack(Xs) if X.ndim == 1: X = X.reshape(-1, 1) meta = np.repeat(range(len(Xs)), list(map(len, Xs))).reshape(-1, 1) Qs = np.array(self.gpy_model.predict_quantiles( np.hstack([X, meta]), quantiles=quantiles, Y_metadata={'output_index': meta})) assert Qs.shape[2] == 1 Qs = Qs.reshape(len(quantiles), -1) # Qs is a (quantiles) x (num examples) array # We want (num examples) x (qunatiles) return tesselate(Qs.T, map(len, Xs))
@staticmethod def _construct_gpy(Xs, Ys, kernels, ranks, sparse): import GPy.models as models from GPy.util.multioutput import ICM kernels = [k.to_gpy() for k in kernels] input_dim = 1 num_outputs = len(Ys) Ys = [Y.reshape(-1, 1) for Y in Ys] K = ICM(input_dim, num_outputs, kernels[0], ranks[0], name='ICM0') for kernel, rank, idx in zip(kernels[1:], ranks[1:], count(1)): K += ICM( input_dim, num_outputs, kernel, rank, name='ICM{}'.format(idx)) K.name = 'LCM' if sparse > 0: return models.SparseGPCoregionalizedRegression( Xs, Ys, kernel=K, num_inducing=sparse) return models.GPCoregionalizedRegression(Xs, Ys, kernel=K)