We study learning algorithms that are restricted to using a small amount of
information from their input sample. We introduce a category of learning
algorithms we term $d$-bit information learners, which are algorithms whose
output conveys at most $d$ bits of information of their input. A central theme
in this work is that such algorithms generalize.
We focus on the learning capacity of these algorithms, and prove sample
complexity bounds with tight dependencies on the confidence and error
parameters. We also observe connections with well studied notions such as
sample compression schemes, Occam's razor, PAC-Bayes and differential privacy.
We discuss an approach that allows us to prove upper bounds on the amount of
information that algorithms reveal about their inputs, and also provide a lower
bound by showing a simple concept class for which every (possibly randomized)
empirical risk minimizer must reveal a lot of information. On the other hand,
we show that in the distribution-dependent setting every VC class has empirical
risk minimizers that do not reveal a lot of information.
%0 Journal Article
%1 bassily2017learners
%A Bassily, Raef
%A Moran, Shay
%A Nachum, Ido
%A Shafer, Jonathan
%A Yehudayoff, Amir
%D 2017
%K bounds differential-privacy generalization information
%T Learners that Use Little Information
%U http://arxiv.org/abs/1710.05233
%X We study learning algorithms that are restricted to using a small amount of
information from their input sample. We introduce a category of learning
algorithms we term $d$-bit information learners, which are algorithms whose
output conveys at most $d$ bits of information of their input. A central theme
in this work is that such algorithms generalize.
We focus on the learning capacity of these algorithms, and prove sample
complexity bounds with tight dependencies on the confidence and error
parameters. We also observe connections with well studied notions such as
sample compression schemes, Occam's razor, PAC-Bayes and differential privacy.
We discuss an approach that allows us to prove upper bounds on the amount of
information that algorithms reveal about their inputs, and also provide a lower
bound by showing a simple concept class for which every (possibly randomized)
empirical risk minimizer must reveal a lot of information. On the other hand,
we show that in the distribution-dependent setting every VC class has empirical
risk minimizers that do not reveal a lot of information.
@article{bassily2017learners,
abstract = {We study learning algorithms that are restricted to using a small amount of
information from their input sample. We introduce a category of learning
algorithms we term $d$-bit information learners, which are algorithms whose
output conveys at most $d$ bits of information of their input. A central theme
in this work is that such algorithms generalize.
We focus on the learning capacity of these algorithms, and prove sample
complexity bounds with tight dependencies on the confidence and error
parameters. We also observe connections with well studied notions such as
sample compression schemes, Occam's razor, PAC-Bayes and differential privacy.
We discuss an approach that allows us to prove upper bounds on the amount of
information that algorithms reveal about their inputs, and also provide a lower
bound by showing a simple concept class for which every (possibly randomized)
empirical risk minimizer must reveal a lot of information. On the other hand,
we show that in the distribution-dependent setting every VC class has empirical
risk minimizers that do not reveal a lot of information.},
added-at = {2019-09-19T13:40:01.000+0200},
author = {Bassily, Raef and Moran, Shay and Nachum, Ido and Shafer, Jonathan and Yehudayoff, Amir},
biburl = {https://www.bibsonomy.org/bibtex/213578652a266cf16a9d181be2883388a/kirk86},
description = {[1710.05233] Learners that Use Little Information},
interhash = {2de738067adaa59b84e21d0a0ba6bc5a},
intrahash = {13578652a266cf16a9d181be2883388a},
keywords = {bounds differential-privacy generalization information},
note = {cite arxiv:1710.05233},
timestamp = {2019-09-19T13:41:20.000+0200},
title = {Learners that Use Little Information},
url = {http://arxiv.org/abs/1710.05233},
year = 2017
}