Source code for runlmc.models.gpy_lmc

# Copyright (c) 2016, Vladimir Feinberg
# Licensed under the BSD 3-clause license (see LICENSE)

# This file was modified from the GPy project. Its file header is replicated
# below. Its LICENSE.txt is replicated in the LICENSE file for this directory.

# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)

from itertools import count

import numpy as np

from .multigp import MultiGP
from ..util.docs import inherit_doc
from ..util.numpy_convenience import tesselate


[docs]@inherit_doc
class GPyLMC(MultiGP):
    """
    This wraps GPy for the Gaussian Process model for multioutput regression
    under a Linear Model of Coregionalization.

    This performs the inversion-based cubic-time algorithm.

    .. Note: Because this implementation uses GPy, mean functions and
             normalization are unsupported.

    Uses the Gaussian likelihood. See :class:`runlmc.lmc.functional_kernel`
    for the explicit LMC formula.

    The DTCVAR algorithm (the `sparse` parameter) is based on Efficient
    Multioutput Gaussian Processes through Variational Inducing Kernels
    by Álvarez et al. 2010.

    :param Xs: input observations, should be a list of numpy arrays,
               where each numpy array is a design matrix for the inputs to
               output :math:`i`. If the :math:`i`-th input has :math:`n_i`
               data points, then this matrix can be :math:`n_i` or
               :math:`n_i\\times P` shape for input dimension :math:`P`,
               with the former re-interpreted as :math:`P=1`.
    :param Ys: output observations, this must be a list of one-dimensional
               numpy arrays, matching up with the number of rows in `Xs`.
    :param kernels: a list of (stationary) kernels which constitute the
                    terms of the LMC sums prior to coregionalization.
    :param ranks: list of ranks for coregionalization factors
    :type ranks: list of integer
    :param name: model name
    :type name: string
    :param sparse: an integer. If 0, uses
                   :py:class:`GPy.models.GPCoregionalizedRegression`,
                   the typical cholesky algorithm.
                   If >0, then this determines the number of inducing points
                   used by the DTCVAR algorithm in
                   use :py:class:`GPy.models.SparseGPCoregionalizedRegression`
    """

    def __init__(self, Xs, Ys, kernels, ranks, name='GPyLMC', sparse=0):
        super().__init__(Xs, Ys, normalize=False, name=name)
        self.gpy_model = GPyLMC._construct_gpy(
            self.Xs, Ys, kernels, ranks, sparse)

    def _raw_predict(self, Xs):
        pass

[docs]    def parameters_changed(self):
        pass

[docs]    def log_likelihood(self):
        return self.gpy_model.log_likelihood()

[docs]    def optimize(self, **kwargs):
        self.gpy_model.optimize(**kwargs)

[docs]    def predict(self, Xs):
        Xs = self._pad_dims(Xs)
        X = np.vstack(Xs)
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        lenls = list(map(len, Xs))
        meta = np.repeat(np.arange(len(Xs)), lenls).reshape(-1, 1)
        mu, var = self.gpy_model.predict(
            np.hstack([X, meta]),
            Y_metadata={'output_index': meta})
        mu = mu.reshape(-1)
        var = var.reshape(-1)
        return tesselate(mu, lenls), tesselate(var, lenls)

[docs]    def predict_quantiles(self, Xs, quantiles=(2.5, 97.5)):
        Xs = self._pad_dims(Xs)
        X = np.vstack(Xs)
        if X.ndim == 1:
            X = X.reshape(-1, 1)
        meta = np.repeat(range(len(Xs)), list(map(len, Xs))).reshape(-1, 1)
        Qs = np.array(self.gpy_model.predict_quantiles(
            np.hstack([X, meta]),
            quantiles=quantiles,
            Y_metadata={'output_index': meta}))
        assert Qs.shape[2] == 1
        Qs = Qs.reshape(len(quantiles), -1)
        # Qs is a (quantiles) x (num examples) array
        # We want (num examples) x (qunatiles)
        return tesselate(Qs.T, map(len, Xs))

    @staticmethod
    def _construct_gpy(Xs, Ys, kernels, ranks, sparse):
        import GPy.models as models
        from GPy.util.multioutput import ICM

        kernels = [k.to_gpy() for k in kernels]
        input_dim = 1
        num_outputs = len(Ys)
        Ys = [Y.reshape(-1, 1) for Y in Ys]

        K = ICM(input_dim, num_outputs, kernels[0], ranks[0], name='ICM0')
        for kernel, rank, idx in zip(kernels[1:], ranks[1:], count(1)):
            K += ICM(
                input_dim, num_outputs, kernel, rank, name='ICM{}'.format(idx))
        K.name = 'LCM'
        if sparse > 0:
            return models.SparseGPCoregionalizedRegression(
                Xs, Ys, kernel=K, num_inducing=sparse)
        return models.GPCoregionalizedRegression(Xs, Ys, kernel=K)