The convergence of back-propagation learning is
analyzed so as to explain common phenomenon observed by
practitioners. Many undesirable behaviors of backprop
can be avoided with tricks that are rarely exposedin
serious technical publications. This paper gives some
of those tricks, ando.ers explanations of why they
work. Many authors have suggested that second-order
optimization methods are advantageous for neural net
training. It is shown that most â classicalâ
second-order methods are impractical for large neural
networks. A few methods are proposed that do not have
these limitations.
%0 Book Section
%1 lecun-efficient-backprop-1998
%A LeCun, Yann
%A Bottou, Leon
%A Orr, Genevieve
%A Müller, Klaus
%B Neural Networks: Tricks of the Trade
%D 1998
%I Springer Berlin / Heidelberg
%K back-propagation character_recognition mnist network neural
%P 546
%R 10.1007/3-540-49430-8\_2
%T Efficient BackProp
%U http://dx.doi.org/10.1007/3-540-49430-8\_2
%X The convergence of back-propagation learning is
analyzed so as to explain common phenomenon observed by
practitioners. Many undesirable behaviors of backprop
can be avoided with tricks that are rarely exposedin
serious technical publications. This paper gives some
of those tricks, ando.ers explanations of why they
work. Many authors have suggested that second-order
optimization methods are advantageous for neural net
training. It is shown that most â classicalâ
second-order methods are impractical for large neural
networks. A few methods are proposed that do not have
these limitations.
%& 2
%@ 978-3-540-65311-0
@incollection{lecun-efficient-backprop-1998,
abstract = {The convergence of back-propagation learning is
analyzed so as to explain common phenomenon observed by
practitioners. Many undesirable behaviors of backprop
can be avoided with tricks that are rarely exposedin
serious technical publications. This paper gives some
of those tricks, ando.ers explanations of why they
work. Many authors have suggested that second-order
optimization methods are advantageous for neural net
training. It is shown that most â classicalâ
second-order methods are impractical for large neural
networks. A few methods are proposed that do not have
these limitations.},
added-at = {2016-07-12T19:24:18.000+0200},
author = {LeCun, Yann and Bottou, Leon and Orr, Genevieve and M{\"{u}}ller, Klaus},
biburl = {https://www.bibsonomy.org/bibtex/24ef56e477071b3b8c01d461611db31dd/mhwombat},
booktitle = {Neural Networks: Tricks of the Trade},
chapter = 2,
citeulike-article-id = {6424181},
citeulike-linkout-0 = {http://dx.doi.org/10.1007/3-540-49430-8\_2},
citeulike-linkout-1 = {http://www.springerlink.com/content/4w0bab2v3qnqhwyr},
doi = {10.1007/3-540-49430-8\_2},
file = {:/wombat/taighde/docs/neural_nets/lecun-98b.pdf:PDF;:lecun-98b.pdf:PDF},
groups = {public},
interhash = {a6f15d33edec4a864b051e66b4871355},
intrahash = {4ef56e477071b3b8c01d461611db31dd},
isbn = {978-3-540-65311-0},
keywords = {back-propagation character_recognition mnist network neural},
pages = 546,
posted-at = {2009-12-22 16:27:30},
priority = {2},
publisher = {Springer Berlin / Heidelberg},
series = {Lecture Notes in Computer Science},
timestamp = {2016-07-12T19:25:30.000+0200},
title = {Efficient BackProp},
url = {http://dx.doi.org/10.1007/3-540-49430-8\_2},
username = {mhwombat},
year = 1998
}