Training neural networks involves solving large-scale non-convex optimization
problems. This task has long been believed to be extremely difficult, with fear
of local minima and other obstacles motivating a variety of schemes to improve
optimization, such as unsupervised pretraining. However, modern neural networks
are able to achieve negligible training error on complex tasks, using only
direct training with stochastic gradient descent. We introduce a simple
analysis technique to look for evidence that such networks are overcoming local
optima. We find that, in fact, on a straight path from initialization to
solution, a variety of state of the art neural networks never encounter any
significant obstacles.
%0 Journal Article
%1 goodfellow2014qualitatively
%A Goodfellow, Ian J.
%A Vinyals, Oriol
%A Saxe, Andrew M.
%D 2014
%K optimization readings
%T Qualitatively characterizing neural network optimization problems
%U http://arxiv.org/abs/1412.6544
%X Training neural networks involves solving large-scale non-convex optimization
problems. This task has long been believed to be extremely difficult, with fear
of local minima and other obstacles motivating a variety of schemes to improve
optimization, such as unsupervised pretraining. However, modern neural networks
are able to achieve negligible training error on complex tasks, using only
direct training with stochastic gradient descent. We introduce a simple
analysis technique to look for evidence that such networks are overcoming local
optima. We find that, in fact, on a straight path from initialization to
solution, a variety of state of the art neural networks never encounter any
significant obstacles.
@article{goodfellow2014qualitatively,
abstract = {Training neural networks involves solving large-scale non-convex optimization
problems. This task has long been believed to be extremely difficult, with fear
of local minima and other obstacles motivating a variety of schemes to improve
optimization, such as unsupervised pretraining. However, modern neural networks
are able to achieve negligible training error on complex tasks, using only
direct training with stochastic gradient descent. We introduce a simple
analysis technique to look for evidence that such networks are overcoming local
optima. We find that, in fact, on a straight path from initialization to
solution, a variety of state of the art neural networks never encounter any
significant obstacles.},
added-at = {2019-10-22T03:57:00.000+0200},
author = {Goodfellow, Ian J. and Vinyals, Oriol and Saxe, Andrew M.},
biburl = {https://www.bibsonomy.org/bibtex/2ea74f95ee96e679a7dcb54bdc71e756d/kirk86},
description = {[1412.6544] Qualitatively characterizing neural network optimization problems},
interhash = {7d4ca5a11f112b28db39cc333d583d9a},
intrahash = {ea74f95ee96e679a7dcb54bdc71e756d},
keywords = {optimization readings},
note = {cite arxiv:1412.6544},
timestamp = {2019-10-22T03:57:00.000+0200},
title = {Qualitatively characterizing neural network optimization problems},
url = {http://arxiv.org/abs/1412.6544},
year = 2014
}