Variational inference is becoming more and more popular for approximating
intractable posterior distributions in Bayesian statistics and machine
learning. Meanwhile, a few recent works have provided theoretical justification
and new insights on deep neural networks for estimating smooth functions in
usual settings such as nonparametric regression. In this paper, we show that
variational inference for sparse deep learning retains the same generalization
properties than exact Bayesian inference. In particular, we highlight the
connection between estimation and approximation theories via the classical
bias-variance trade-off and show that it leads to near-minimax rates of
convergence for Hölder smooth functions. Additionally, we show that the model
selection framework over the neural network architecture via ELBO maximization
does not overfit and adaptively achieves the optimal rate of convergence.
Description
[1908.04847] Convergence Rates of Variational Inference in Sparse Deep Learning
%0 Journal Article
%1 cheriefabdellatif2019convergence
%A Chérief-Abdellatif, Badr-Eddine
%D 2019
%K convergence deep-learning inference readings sparsity variational
%T Convergence Rates of Variational Inference in Sparse Deep Learning
%U http://arxiv.org/abs/1908.04847
%X Variational inference is becoming more and more popular for approximating
intractable posterior distributions in Bayesian statistics and machine
learning. Meanwhile, a few recent works have provided theoretical justification
and new insights on deep neural networks for estimating smooth functions in
usual settings such as nonparametric regression. In this paper, we show that
variational inference for sparse deep learning retains the same generalization
properties than exact Bayesian inference. In particular, we highlight the
connection between estimation and approximation theories via the classical
bias-variance trade-off and show that it leads to near-minimax rates of
convergence for Hölder smooth functions. Additionally, we show that the model
selection framework over the neural network architecture via ELBO maximization
does not overfit and adaptively achieves the optimal rate of convergence.
@article{cheriefabdellatif2019convergence,
abstract = {Variational inference is becoming more and more popular for approximating
intractable posterior distributions in Bayesian statistics and machine
learning. Meanwhile, a few recent works have provided theoretical justification
and new insights on deep neural networks for estimating smooth functions in
usual settings such as nonparametric regression. In this paper, we show that
variational inference for sparse deep learning retains the same generalization
properties than exact Bayesian inference. In particular, we highlight the
connection between estimation and approximation theories via the classical
bias-variance trade-off and show that it leads to near-minimax rates of
convergence for H\"older smooth functions. Additionally, we show that the model
selection framework over the neural network architecture via ELBO maximization
does not overfit and adaptively achieves the optimal rate of convergence.},
added-at = {2020-06-03T09:54:37.000+0200},
author = {Chérief-Abdellatif, Badr-Eddine},
biburl = {https://www.bibsonomy.org/bibtex/2760b3a64a93f846be78468a52940a951/kirk86},
description = {[1908.04847] Convergence Rates of Variational Inference in Sparse Deep Learning},
interhash = {48033d108c90c60d81014e090dac8213},
intrahash = {760b3a64a93f846be78468a52940a951},
keywords = {convergence deep-learning inference readings sparsity variational},
note = {cite arxiv:1908.04847},
timestamp = {2020-06-03T09:54:37.000+0200},
title = {Convergence Rates of Variational Inference in Sparse Deep Learning},
url = {http://arxiv.org/abs/1908.04847},
year = 2019
}