The success of deep learning has led to a rising interest in the
generalization property of the stochastic gradient descent (SGD) method, and
stability is one popular approach to study it. Existing works based on
stability have studied nonconvex loss functions, but only considered the
generalization error of the SGD in expectation. In this paper, we establish
various generalization error bounds with probabilistic guarantee for the SGD.
Specifically, for both general nonconvex loss functions and gradient dominant
loss functions, we characterize the on-average stability of the iterates
generated by SGD in terms of the on-average variance of the stochastic
gradients. Such characterization leads to improved bounds for the
generalization error for SGD. We then study the regularized risk minimization
problem with strongly convex regularizers, and obtain improved generalization
error bounds for proximal SGD. With strongly convex regularizers, we further
establish the generalization error bounds for nonconvex loss functions under
proximal SGD with high-probability guarantee, i.e., exponential concentration
in probability.
Description
Generalization Error Bounds with Probabilistic Guarantee for SGD in
Nonconvex Optimization
%0 Generic
%1 zhou2018generalization
%A Zhou, Yi
%A Liang, Yingbin
%A Zhang, Huishuai
%D 2018
%K SGD optimization theory
%T Generalization Error Bounds with Probabilistic Guarantee for SGD in
Nonconvex Optimization
%U http://arxiv.org/abs/1802.06903
%X The success of deep learning has led to a rising interest in the
generalization property of the stochastic gradient descent (SGD) method, and
stability is one popular approach to study it. Existing works based on
stability have studied nonconvex loss functions, but only considered the
generalization error of the SGD in expectation. In this paper, we establish
various generalization error bounds with probabilistic guarantee for the SGD.
Specifically, for both general nonconvex loss functions and gradient dominant
loss functions, we characterize the on-average stability of the iterates
generated by SGD in terms of the on-average variance of the stochastic
gradients. Such characterization leads to improved bounds for the
generalization error for SGD. We then study the regularized risk minimization
problem with strongly convex regularizers, and obtain improved generalization
error bounds for proximal SGD. With strongly convex regularizers, we further
establish the generalization error bounds for nonconvex loss functions under
proximal SGD with high-probability guarantee, i.e., exponential concentration
in probability.
@misc{zhou2018generalization,
abstract = {The success of deep learning has led to a rising interest in the
generalization property of the stochastic gradient descent (SGD) method, and
stability is one popular approach to study it. Existing works based on
stability have studied nonconvex loss functions, but only considered the
generalization error of the SGD in expectation. In this paper, we establish
various generalization error bounds with probabilistic guarantee for the SGD.
Specifically, for both general nonconvex loss functions and gradient dominant
loss functions, we characterize the on-average stability of the iterates
generated by SGD in terms of the on-average variance of the stochastic
gradients. Such characterization leads to improved bounds for the
generalization error for SGD. We then study the regularized risk minimization
problem with strongly convex regularizers, and obtain improved generalization
error bounds for proximal SGD. With strongly convex regularizers, we further
establish the generalization error bounds for nonconvex loss functions under
proximal SGD with high-probability guarantee, i.e., exponential concentration
in probability.},
added-at = {2018-02-21T11:56:51.000+0100},
author = {Zhou, Yi and Liang, Yingbin and Zhang, Huishuai},
biburl = {https://www.bibsonomy.org/bibtex/2943e6c94a50003a13d82104882fe7b32/jk_itwm},
description = {Generalization Error Bounds with Probabilistic Guarantee for SGD in
Nonconvex Optimization},
interhash = {da3da5d2caff4b3cace59fc02737e83d},
intrahash = {943e6c94a50003a13d82104882fe7b32},
keywords = {SGD optimization theory},
note = {cite arxiv:1802.06903},
timestamp = {2018-02-21T11:56:51.000+0100},
title = {Generalization Error Bounds with Probabilistic Guarantee for SGD in
Nonconvex Optimization},
url = {http://arxiv.org/abs/1802.06903},
year = 2018
}