Learning algorithms related to artificial neural networks and in particular
for Deep Learning may seem to involve many bells and whistles, called
hyper-parameters. This chapter is meant as a practical guide with
recommendations for some of the most commonly used hyper-parameters, in
particular in the context of learning algorithms based on back-propagated
gradient and gradient-based optimization. It also discusses how to deal with
the fact that more interesting results can be obtained when allowing one to
adjust many hyper-parameters. Overall, it describes elements of the practice
used to successfully and efficiently train and debug large-scale and often deep
multi-layer neural networks. It closes with open questions about the training
difficulties observed with deeper architectures.
Description
Practical recommendations for gradient-based training of deep
architectures
%0 Generic
%1 bengio2012practical
%A Bengio, Yoshua
%D 2012
%K learning-tips neural-network
%T Practical recommendations for gradient-based training of deep
architectures
%U http://arxiv.org/abs/1206.5533
%X Learning algorithms related to artificial neural networks and in particular
for Deep Learning may seem to involve many bells and whistles, called
hyper-parameters. This chapter is meant as a practical guide with
recommendations for some of the most commonly used hyper-parameters, in
particular in the context of learning algorithms based on back-propagated
gradient and gradient-based optimization. It also discusses how to deal with
the fact that more interesting results can be obtained when allowing one to
adjust many hyper-parameters. Overall, it describes elements of the practice
used to successfully and efficiently train and debug large-scale and often deep
multi-layer neural networks. It closes with open questions about the training
difficulties observed with deeper architectures.
@misc{bengio2012practical,
abstract = {Learning algorithms related to artificial neural networks and in particular
for Deep Learning may seem to involve many bells and whistles, called
hyper-parameters. This chapter is meant as a practical guide with
recommendations for some of the most commonly used hyper-parameters, in
particular in the context of learning algorithms based on back-propagated
gradient and gradient-based optimization. It also discusses how to deal with
the fact that more interesting results can be obtained when allowing one to
adjust many hyper-parameters. Overall, it describes elements of the practice
used to successfully and efficiently train and debug large-scale and often deep
multi-layer neural networks. It closes with open questions about the training
difficulties observed with deeper architectures.},
added-at = {2017-03-21T22:19:54.000+0100},
author = {Bengio, Yoshua},
biburl = {https://www.bibsonomy.org/bibtex/25fc27aa506ae1285f0c3d589c37ab263/stdiff},
description = {Practical recommendations for gradient-based training of deep
architectures},
interhash = {65d13266a65cf5b992883a165f673f67},
intrahash = {5fc27aa506ae1285f0c3d589c37ab263},
keywords = {learning-tips neural-network},
note = {cite arxiv:1206.5533},
timestamp = {2017-03-21T22:19:54.000+0100},
title = {Practical recommendations for gradient-based training of deep
architectures},
url = {http://arxiv.org/abs/1206.5533},
year = 2012
}