Excellent variational approximations to Gaussian process posteriors have been
developed which avoid the $Ołeft(N^3\right)$ scaling with dataset
size $N$. They reduce the computational cost to $Ołeft(NM^2\right)$,
with $MN$ being the number of inducing variables, which summarise the
process. While the computational cost seems to be linear in $N$, the true
complexity of the algorithm depends on how $M$ must increase to ensure a
certain quality of approximation. We address this by characterising the
behavior of an upper bound on the KL divergence to the posterior. We show that
with high probability the KL divergence can be made arbitrarily small by
growing $M$ more slowly than $N$. A particular case of interest is that for
regression with normally distributed inputs in D-dimensions with the popular
Squared Exponential kernel, $M=O(łog^D N)$ is sufficient. Our
results show that as datasets grow, Gaussian process posteriors can truly be
approximated cheaply, and provide a concrete rule for how to increase $M$ in
continual learning scenarios.
Beschreibung
[1903.03571] Rates of Convergence for Sparse Variational Gaussian Process Regression
%0 Journal Article
%1 burt2019rates
%A Burt, David R.
%A Rasmussen, Carl E.
%A van der Wilk, Mark
%D 2019
%K best_paper complexity convergence gaussian-proceses icml2019 sparsity
%T Rates of Convergence for Sparse Variational Gaussian Process Regression
%U http://arxiv.org/abs/1903.03571
%X Excellent variational approximations to Gaussian process posteriors have been
developed which avoid the $Ołeft(N^3\right)$ scaling with dataset
size $N$. They reduce the computational cost to $Ołeft(NM^2\right)$,
with $MN$ being the number of inducing variables, which summarise the
process. While the computational cost seems to be linear in $N$, the true
complexity of the algorithm depends on how $M$ must increase to ensure a
certain quality of approximation. We address this by characterising the
behavior of an upper bound on the KL divergence to the posterior. We show that
with high probability the KL divergence can be made arbitrarily small by
growing $M$ more slowly than $N$. A particular case of interest is that for
regression with normally distributed inputs in D-dimensions with the popular
Squared Exponential kernel, $M=O(łog^D N)$ is sufficient. Our
results show that as datasets grow, Gaussian process posteriors can truly be
approximated cheaply, and provide a concrete rule for how to increase $M$ in
continual learning scenarios.
@article{burt2019rates,
abstract = {Excellent variational approximations to Gaussian process posteriors have been
developed which avoid the $\mathcal{O}\left(N^3\right)$ scaling with dataset
size $N$. They reduce the computational cost to $\mathcal{O}\left(NM^2\right)$,
with $M\ll N$ being the number of inducing variables, which summarise the
process. While the computational cost seems to be linear in $N$, the true
complexity of the algorithm depends on how $M$ must increase to ensure a
certain quality of approximation. We address this by characterising the
behavior of an upper bound on the KL divergence to the posterior. We show that
with high probability the KL divergence can be made arbitrarily small by
growing $M$ more slowly than $N$. A particular case of interest is that for
regression with normally distributed inputs in D-dimensions with the popular
Squared Exponential kernel, $M=\mathcal{O}(\log^D N)$ is sufficient. Our
results show that as datasets grow, Gaussian process posteriors can truly be
approximated cheaply, and provide a concrete rule for how to increase $M$ in
continual learning scenarios.},
added-at = {2019-06-12T17:46:16.000+0200},
author = {Burt, David R. and Rasmussen, Carl E. and van der Wilk, Mark},
biburl = {https://www.bibsonomy.org/bibtex/2d0eb92eb8ef5723dcb20c228021ba360/kirk86},
description = {[1903.03571] Rates of Convergence for Sparse Variational Gaussian Process Regression},
interhash = {31df05d6ccbe4ce1f1d0e699832802fb},
intrahash = {d0eb92eb8ef5723dcb20c228021ba360},
keywords = {best_paper complexity convergence gaussian-proceses icml2019 sparsity},
note = {cite arxiv:1903.03571},
timestamp = {2019-11-07T16:05:51.000+0100},
title = {Rates of Convergence for Sparse Variational Gaussian Process Regression},
url = {http://arxiv.org/abs/1903.03571},
year = 2019
}