We propose and analyze a variant of the classic Polyak-Ruppert averaging
scheme, broadly used in stochastic gradient methods. Rather than a uniform
average of the iterates, we consider a weighted average, with weights decaying
in a geometric fashion. In the context of linear least squares regression, we
show that this averaging scheme has a the same regularizing effect, and indeed
is asymptotically equivalent, to ridge regression. In particular, we derive
finite-sample bounds for the proposed approach that match the best known
results for regularized stochastic gradient methods.
Description
[1802.08009] Iterate averaging as regularization for stochastic gradient descent
%0 Journal Article
%1 neu2018iterate
%A Neu, Gergely
%A Rosasco, Lorenzo
%D 2018
%K deep-learning foundations machine-learning optimization stable theory
%T Iterate averaging as regularization for stochastic gradient descent
%U http://arxiv.org/abs/1802.08009
%X We propose and analyze a variant of the classic Polyak-Ruppert averaging
scheme, broadly used in stochastic gradient methods. Rather than a uniform
average of the iterates, we consider a weighted average, with weights decaying
in a geometric fashion. In the context of linear least squares regression, we
show that this averaging scheme has a the same regularizing effect, and indeed
is asymptotically equivalent, to ridge regression. In particular, we derive
finite-sample bounds for the proposed approach that match the best known
results for regularized stochastic gradient methods.
@article{neu2018iterate,
abstract = {We propose and analyze a variant of the classic Polyak-Ruppert averaging
scheme, broadly used in stochastic gradient methods. Rather than a uniform
average of the iterates, we consider a weighted average, with weights decaying
in a geometric fashion. In the context of linear least squares regression, we
show that this averaging scheme has a the same regularizing effect, and indeed
is asymptotically equivalent, to ridge regression. In particular, we derive
finite-sample bounds for the proposed approach that match the best known
results for regularized stochastic gradient methods.},
added-at = {2019-06-10T11:48:54.000+0200},
author = {Neu, Gergely and Rosasco, Lorenzo},
biburl = {https://www.bibsonomy.org/bibtex/2b36be803a1e1b965158f5c9a7b9eebaa/kirk86},
description = {[1802.08009] Iterate averaging as regularization for stochastic gradient descent},
interhash = {d5e9f743ef2ed18baa2bbae3b5c3aac0},
intrahash = {b36be803a1e1b965158f5c9a7b9eebaa},
keywords = {deep-learning foundations machine-learning optimization stable theory},
note = {cite arxiv:1802.08009},
timestamp = {2019-06-10T11:48:54.000+0200},
title = {Iterate averaging as regularization for stochastic gradient descent},
url = {http://arxiv.org/abs/1802.08009},
year = 2018
}