Stochastic Gradient Langevin Dynamics (SGLD) is a popular variant of
Stochastic Gradient Descent, where properly scaled isotropic Gaussian noise is
added to an unbiased estimate of the gradient at each iteration. This modest
change allows SGLD to escape local minima and suffices to guarantee asymptotic
convergence to global minimizers for sufficiently regular non-convex objectives
(Gelfand and Mitter, 1991). The present work provides a nonasymptotic analysis
in the context of non-convex learning problems, giving finite-time guarantees
for SGLD to find approximate minimizers of both empirical and population risks.
As in the asymptotic setting, our analysis relates the discrete-time SGLD
Markov chain to a continuous-time diffusion process. A new tool that drives the
results is the use of weighted transportation cost inequalities to quantify the
rate of convergence of SGLD to a stationary distribution in the Euclidean
$2$-Wasserstein distance.
Описание
[1702.03849] Non-convex learning via Stochastic Gradient Langevin Dynamics: a nonasymptotic analysis
%0 Generic
%1 raginsky2017nonconvex
%A Raginsky, Maxim
%A Rakhlin, Alexander
%A Telgarsky, Matus
%D 2017
%K optimization sgld stochastic_gradient_langevin_dynamics
%T Non-convex learning via Stochastic Gradient Langevin Dynamics: a
nonasymptotic analysis
%U http://arxiv.org/abs/1702.03849
%X Stochastic Gradient Langevin Dynamics (SGLD) is a popular variant of
Stochastic Gradient Descent, where properly scaled isotropic Gaussian noise is
added to an unbiased estimate of the gradient at each iteration. This modest
change allows SGLD to escape local minima and suffices to guarantee asymptotic
convergence to global minimizers for sufficiently regular non-convex objectives
(Gelfand and Mitter, 1991). The present work provides a nonasymptotic analysis
in the context of non-convex learning problems, giving finite-time guarantees
for SGLD to find approximate minimizers of both empirical and population risks.
As in the asymptotic setting, our analysis relates the discrete-time SGLD
Markov chain to a continuous-time diffusion process. A new tool that drives the
results is the use of weighted transportation cost inequalities to quantify the
rate of convergence of SGLD to a stationary distribution in the Euclidean
$2$-Wasserstein distance.
@misc{raginsky2017nonconvex,
abstract = {Stochastic Gradient Langevin Dynamics (SGLD) is a popular variant of
Stochastic Gradient Descent, where properly scaled isotropic Gaussian noise is
added to an unbiased estimate of the gradient at each iteration. This modest
change allows SGLD to escape local minima and suffices to guarantee asymptotic
convergence to global minimizers for sufficiently regular non-convex objectives
(Gelfand and Mitter, 1991). The present work provides a nonasymptotic analysis
in the context of non-convex learning problems, giving finite-time guarantees
for SGLD to find approximate minimizers of both empirical and population risks.
As in the asymptotic setting, our analysis relates the discrete-time SGLD
Markov chain to a continuous-time diffusion process. A new tool that drives the
results is the use of weighted transportation cost inequalities to quantify the
rate of convergence of SGLD to a stationary distribution in the Euclidean
$2$-Wasserstein distance.},
added-at = {2017-06-06T11:27:25.000+0200},
author = {Raginsky, Maxim and Rakhlin, Alexander and Telgarsky, Matus},
biburl = {https://www.bibsonomy.org/bibtex/2c166514fc56240e71700fa70304b782f/suqbar},
description = {[1702.03849] Non-convex learning via Stochastic Gradient Langevin Dynamics: a nonasymptotic analysis},
interhash = {8bdc72c8811bdd5c24cefcdd7d28bc03},
intrahash = {c166514fc56240e71700fa70304b782f},
keywords = {optimization sgld stochastic_gradient_langevin_dynamics},
note = {cite arxiv:1702.03849Comment: 29 pages},
timestamp = {2017-06-06T11:27:25.000+0200},
title = {Non-convex learning via Stochastic Gradient Langevin Dynamics: a
nonasymptotic analysis},
url = {http://arxiv.org/abs/1702.03849},
year = 2017
}