Numerous empirical evidence has corroborated that the noise plays a crucial
rule in effective and efficient training of neural networks. The theory behind,
however, is still largely unknown. This paper studies this fundamental problem
through training a simple two-layer convolutional neural network model.
Although training such a network requires solving a nonconvex optimization
problem with a spurious local optimum and a global optimum, we prove that
perturbed gradient descent and perturbed mini-batch stochastic gradient
algorithms in conjunction with noise annealing is guaranteed to converge to a
global optimum in polynomial time with arbitrary initialization. This implies
that the noise enables the algorithm to efficiently escape from the spurious
local optimum. Numerical experiments are provided to support our theory.
Описание
[1909.03172] Towards Understanding the Importance of Noise in Training Neural Networks
%0 Conference Paper
%1 zhou2019towards
%A Zhou, Mo
%A Liu, Tianyi
%A Li, Yan
%A Lin, Dachao
%A Zhou, Enlu
%A Zhao, Tuo
%D 2019
%K deep-learning icml2019 learning noise readings
%T Towards Understanding the Importance of Noise in Training Neural
Networks
%U http://arxiv.org/abs/1909.03172
%X Numerous empirical evidence has corroborated that the noise plays a crucial
rule in effective and efficient training of neural networks. The theory behind,
however, is still largely unknown. This paper studies this fundamental problem
through training a simple two-layer convolutional neural network model.
Although training such a network requires solving a nonconvex optimization
problem with a spurious local optimum and a global optimum, we prove that
perturbed gradient descent and perturbed mini-batch stochastic gradient
algorithms in conjunction with noise annealing is guaranteed to converge to a
global optimum in polynomial time with arbitrary initialization. This implies
that the noise enables the algorithm to efficiently escape from the spurious
local optimum. Numerical experiments are provided to support our theory.
@inproceedings{zhou2019towards,
abstract = {Numerous empirical evidence has corroborated that the noise plays a crucial
rule in effective and efficient training of neural networks. The theory behind,
however, is still largely unknown. This paper studies this fundamental problem
through training a simple two-layer convolutional neural network model.
Although training such a network requires solving a nonconvex optimization
problem with a spurious local optimum and a global optimum, we prove that
perturbed gradient descent and perturbed mini-batch stochastic gradient
algorithms in conjunction with noise annealing is guaranteed to converge to a
global optimum in polynomial time with arbitrary initialization. This implies
that the noise enables the algorithm to efficiently escape from the spurious
local optimum. Numerical experiments are provided to support our theory.},
added-at = {2020-07-16T12:56:54.000+0200},
author = {Zhou, Mo and Liu, Tianyi and Li, Yan and Lin, Dachao and Zhou, Enlu and Zhao, Tuo},
biburl = {https://www.bibsonomy.org/bibtex/251e24a11b08429fd8170dcd1bb1146fd/kirk86},
description = {[1909.03172] Towards Understanding the Importance of Noise in Training Neural Networks},
interhash = {b19293627709233f30a5ba246a09e13a},
intrahash = {51e24a11b08429fd8170dcd1bb1146fd},
keywords = {deep-learning icml2019 learning noise readings},
note = {cite arxiv:1909.03172Comment: International Conference on Machine Learning (ICML), 2019},
timestamp = {2020-07-16T12:56:54.000+0200},
title = {Towards Understanding the Importance of Noise in Training Neural
Networks},
url = {http://arxiv.org/abs/1909.03172},
year = 2019
}