Bayesian methods promise to fix many shortcomings of deep learning, but they
are impractical and rarely match the performance of standard methods, let alone
improve them. In this paper, we demonstrate practical training of deep networks
with natural-gradient variational inference. By applying techniques such as
batch normalisation, data augmentation, and distributed training, we achieve
similar performance in about the same number of epochs as the Adam optimiser,
even on large datasets such as ImageNet. Importantly, the benefits of Bayesian
principles are preserved: predictive probabilities are well-calibrated,
uncertainties on out-of-distribution data are improved, and continual-learning
performance is boosted. This work enables practical deep learning while
preserving benefits of Bayesian principles. A PyTorch implementation is
available as a plug-and-play optimiser.
%0 Generic
%1 osawa2019practical
%A Osawa, Kazuki
%A Swaroop, Siddharth
%A Jain, Anirudh
%A Eschenhagen, Runa
%A Turner, Richard E.
%A Yokota, Rio
%A Khan, Mohammad Emtiyaz
%D 2019
%K bayesian
%T Practical Deep Learning with Bayesian Principles
%U http://arxiv.org/abs/1906.02506
%X Bayesian methods promise to fix many shortcomings of deep learning, but they
are impractical and rarely match the performance of standard methods, let alone
improve them. In this paper, we demonstrate practical training of deep networks
with natural-gradient variational inference. By applying techniques such as
batch normalisation, data augmentation, and distributed training, we achieve
similar performance in about the same number of epochs as the Adam optimiser,
even on large datasets such as ImageNet. Importantly, the benefits of Bayesian
principles are preserved: predictive probabilities are well-calibrated,
uncertainties on out-of-distribution data are improved, and continual-learning
performance is boosted. This work enables practical deep learning while
preserving benefits of Bayesian principles. A PyTorch implementation is
available as a plug-and-play optimiser.
@misc{osawa2019practical,
abstract = {Bayesian methods promise to fix many shortcomings of deep learning, but they
are impractical and rarely match the performance of standard methods, let alone
improve them. In this paper, we demonstrate practical training of deep networks
with natural-gradient variational inference. By applying techniques such as
batch normalisation, data augmentation, and distributed training, we achieve
similar performance in about the same number of epochs as the Adam optimiser,
even on large datasets such as ImageNet. Importantly, the benefits of Bayesian
principles are preserved: predictive probabilities are well-calibrated,
uncertainties on out-of-distribution data are improved, and continual-learning
performance is boosted. This work enables practical deep learning while
preserving benefits of Bayesian principles. A PyTorch implementation is
available as a plug-and-play optimiser.},
added-at = {2019-11-15T15:40:05.000+0100},
author = {Osawa, Kazuki and Swaroop, Siddharth and Jain, Anirudh and Eschenhagen, Runa and Turner, Richard E. and Yokota, Rio and Khan, Mohammad Emtiyaz},
biburl = {https://www.bibsonomy.org/bibtex/2900e636740f0ad3abf417696c8120f45/topel},
interhash = {59f22d2211193d25214f04f12a3d80d8},
intrahash = {900e636740f0ad3abf417696c8120f45},
keywords = {bayesian},
note = {cite arxiv:1906.02506Comment: NeurIPS 2019},
timestamp = {2019-11-15T15:40:05.000+0100},
title = {Practical Deep Learning with Bayesian Principles},
url = {http://arxiv.org/abs/1906.02506},
year = 2019
}