We consider the problem of learning a one-hidden-layer neural network: we
assume the input $xR^d$ is from Gaussian distribution and the
label $y = a^\sigma(Bx) + \xi$, where $a$ is a nonnegative vector in
$R^m$ with $md$, $BR^md$ is a full-rank
weight matrix, and $\xi$ is a noise vector. We first give an analytic formula
for the population risk of the standard squared loss and demonstrate that it
implicitly attempts to decompose a sequence of low-rank tensors simultaneously.
Inspired by the formula, we design a non-convex objective function $G(\cdot)$
whose landscape is guaranteed to have the following properties: 1. All local
minima of $G$ are also global minima.
2. All global minima of $G$ correspond to the ground truth parameters.
3. The value and gradient of $G$ can be estimated using samples.
With these properties, stochastic gradient descent on $G$ provably converges
to the global minimum and learn the ground-truth parameters. We also prove
finite sample complexity result and validate the results by simulations.
Description
[1711.00501] Learning One-hidden-layer Neural Networks with Landscape Design
%0 Journal Article
%1 ge2017learning
%A Ge, Rong
%A Lee, Jason D.
%A Ma, Tengyu
%D 2017
%K deep-learning readings theory
%T Learning One-hidden-layer Neural Networks with Landscape Design
%U http://arxiv.org/abs/1711.00501
%X We consider the problem of learning a one-hidden-layer neural network: we
assume the input $xR^d$ is from Gaussian distribution and the
label $y = a^\sigma(Bx) + \xi$, where $a$ is a nonnegative vector in
$R^m$ with $md$, $BR^md$ is a full-rank
weight matrix, and $\xi$ is a noise vector. We first give an analytic formula
for the population risk of the standard squared loss and demonstrate that it
implicitly attempts to decompose a sequence of low-rank tensors simultaneously.
Inspired by the formula, we design a non-convex objective function $G(\cdot)$
whose landscape is guaranteed to have the following properties: 1. All local
minima of $G$ are also global minima.
2. All global minima of $G$ correspond to the ground truth parameters.
3. The value and gradient of $G$ can be estimated using samples.
With these properties, stochastic gradient descent on $G$ provably converges
to the global minimum and learn the ground-truth parameters. We also prove
finite sample complexity result and validate the results by simulations.
@article{ge2017learning,
abstract = {We consider the problem of learning a one-hidden-layer neural network: we
assume the input $x\in \mathbb{R}^d$ is from Gaussian distribution and the
label $y = a^\top \sigma(Bx) + \xi$, where $a$ is a nonnegative vector in
$\mathbb{R}^m$ with $m\le d$, $B\in \mathbb{R}^{m\times d}$ is a full-rank
weight matrix, and $\xi$ is a noise vector. We first give an analytic formula
for the population risk of the standard squared loss and demonstrate that it
implicitly attempts to decompose a sequence of low-rank tensors simultaneously.
Inspired by the formula, we design a non-convex objective function $G(\cdot)$
whose landscape is guaranteed to have the following properties: 1. All local
minima of $G$ are also global minima.
2. All global minima of $G$ correspond to the ground truth parameters.
3. The value and gradient of $G$ can be estimated using samples.
With these properties, stochastic gradient descent on $G$ provably converges
to the global minimum and learn the ground-truth parameters. We also prove
finite sample complexity result and validate the results by simulations.},
added-at = {2019-09-25T05:03:48.000+0200},
author = {Ge, Rong and Lee, Jason D. and Ma, Tengyu},
biburl = {https://www.bibsonomy.org/bibtex/2ca6e78fd84ca5fa15eb0bced26a709ad/kirk86},
description = {[1711.00501] Learning One-hidden-layer Neural Networks with Landscape Design},
interhash = {dea305f00952dadea760438eb5d0f5e9},
intrahash = {ca6e78fd84ca5fa15eb0bced26a709ad},
keywords = {deep-learning readings theory},
note = {cite arxiv:1711.00501},
timestamp = {2019-09-25T05:03:48.000+0200},
title = {Learning One-hidden-layer Neural Networks with Landscape Design},
url = {http://arxiv.org/abs/1711.00501},
year = 2017
}