Deep nets generalize well despite having more parameters than the number of
training samples. Recent works try to give an explanation using PAC-Bayes and
Margin-based analyses, but do not as yet result in sample complexity bounds
better than naive parameter counting. The current paper shows generalization
bounds that're orders of magnitude better in practice. These rely upon new
succinct reparametrizations of the trained net --- a compression that is
explicit and efficient. These yield generalization bounds via a simple
compression-based framework introduced here. Our results also provide some
theoretical justification for widespread empirical success in compressing deep
nets. Analysis of correctness of our compression relies upon some newly
identified noise stabilityproperties of
trained deep nets, which are also experimentally verified. The study of these
properties and resulting generalization bounds are also extended to
convolutional nets, which had eluded earlier attempts on proving
generalization.
Description
[1802.05296] Stronger generalization bounds for deep nets via a compression approach
%0 Journal Article
%1 arora2018stronger
%A Arora, Sanjeev
%A Ge, Rong
%A Neyshabur, Behnam
%A Zhang, Yi
%D 2018
%K bounds complexity compression generalization stable
%T Stronger generalization bounds for deep nets via a compression approach
%U http://arxiv.org/abs/1802.05296
%X Deep nets generalize well despite having more parameters than the number of
training samples. Recent works try to give an explanation using PAC-Bayes and
Margin-based analyses, but do not as yet result in sample complexity bounds
better than naive parameter counting. The current paper shows generalization
bounds that're orders of magnitude better in practice. These rely upon new
succinct reparametrizations of the trained net --- a compression that is
explicit and efficient. These yield generalization bounds via a simple
compression-based framework introduced here. Our results also provide some
theoretical justification for widespread empirical success in compressing deep
nets. Analysis of correctness of our compression relies upon some newly
identified noise stabilityproperties of
trained deep nets, which are also experimentally verified. The study of these
properties and resulting generalization bounds are also extended to
convolutional nets, which had eluded earlier attempts on proving
generalization.
@article{arora2018stronger,
abstract = {Deep nets generalize well despite having more parameters than the number of
training samples. Recent works try to give an explanation using PAC-Bayes and
Margin-based analyses, but do not as yet result in sample complexity bounds
better than naive parameter counting. The current paper shows generalization
bounds that're orders of magnitude better in practice. These rely upon new
succinct reparametrizations of the trained net --- a compression that is
explicit and efficient. These yield generalization bounds via a simple
compression-based framework introduced here. Our results also provide some
theoretical justification for widespread empirical success in compressing deep
nets. Analysis of correctness of our compression relies upon some newly
identified \textquotedblleft noise stability\textquotedblright properties of
trained deep nets, which are also experimentally verified. The study of these
properties and resulting generalization bounds are also extended to
convolutional nets, which had eluded earlier attempts on proving
generalization.},
added-at = {2019-06-23T21:50:11.000+0200},
author = {Arora, Sanjeev and Ge, Rong and Neyshabur, Behnam and Zhang, Yi},
biburl = {https://www.bibsonomy.org/bibtex/2912f6cd383f81d36c29e1252286cbd43/kirk86},
description = {[1802.05296] Stronger generalization bounds for deep nets via a compression approach},
interhash = {96c0fc565c224538fefcc3aa45586f88},
intrahash = {912f6cd383f81d36c29e1252286cbd43},
keywords = {bounds complexity compression generalization stable},
note = {cite arxiv:1802.05296},
timestamp = {2019-06-23T21:50:11.000+0200},
title = {Stronger generalization bounds for deep nets via a compression approach},
url = {http://arxiv.org/abs/1802.05296},
year = 2018
}