We give a new proof of the "transfer theorem" underlying adaptive data
analysis: that any mechanism for answering adaptively chosen statistical
queries that is differentially private and sample-accurate is also accurate
out-of-sample. Our new proof is elementary and gives structural insights that
we expect will be useful elsewhere. We show: 1) that differential privacy
ensures that the expectation of any query on the posterior distribution on
datasets induced by the transcript of the interaction is close to its true
value on the data distribution, and 2) sample accuracy on its own ensures that
any query answer produced by the mechanism is close to its posterior
expectation with high probability. This second claim follows from a thought
experiment in which we imagine that the dataset is resampled from the posterior
distribution after the mechanism has committed to its answers. The transfer
theorem then follows by summing these two bounds, and in particular, avoids the
"monitor argument" used to derive high probability bounds in prior work. An
upshot of our new proof technique is that the concrete bounds we obtain are
substantially better than the best previously known bounds, even though the
improvements are in the constants, rather than the asymptotics (which are known
to be tight). As we show, our new bounds outperform the naive
"sample-splitting" baseline at dramatically smaller dataset sizes compared to
the previous state of the art, bringing techniques from this literature closer
to practicality.
Beschreibung
[1909.03577] A New Analysis of Differential Privacy's Generalization Guarantees
%0 Journal Article
%1 jung2019analysis
%A Jung, Christopher
%A Ligett, Katrina
%A Neel, Seth
%A Roth, Aaron
%A Sharifi-Malvajerdi, Saeed
%A Shenfeld, Moshe
%D 2019
%K bounds differential-privacy probability theory
%T A New Analysis of Differential Privacy's Generalization Guarantees
%U http://arxiv.org/abs/1909.03577
%X We give a new proof of the "transfer theorem" underlying adaptive data
analysis: that any mechanism for answering adaptively chosen statistical
queries that is differentially private and sample-accurate is also accurate
out-of-sample. Our new proof is elementary and gives structural insights that
we expect will be useful elsewhere. We show: 1) that differential privacy
ensures that the expectation of any query on the posterior distribution on
datasets induced by the transcript of the interaction is close to its true
value on the data distribution, and 2) sample accuracy on its own ensures that
any query answer produced by the mechanism is close to its posterior
expectation with high probability. This second claim follows from a thought
experiment in which we imagine that the dataset is resampled from the posterior
distribution after the mechanism has committed to its answers. The transfer
theorem then follows by summing these two bounds, and in particular, avoids the
"monitor argument" used to derive high probability bounds in prior work. An
upshot of our new proof technique is that the concrete bounds we obtain are
substantially better than the best previously known bounds, even though the
improvements are in the constants, rather than the asymptotics (which are known
to be tight). As we show, our new bounds outperform the naive
"sample-splitting" baseline at dramatically smaller dataset sizes compared to
the previous state of the art, bringing techniques from this literature closer
to practicality.
@article{jung2019analysis,
abstract = {We give a new proof of the "transfer theorem" underlying adaptive data
analysis: that any mechanism for answering adaptively chosen statistical
queries that is differentially private and sample-accurate is also accurate
out-of-sample. Our new proof is elementary and gives structural insights that
we expect will be useful elsewhere. We show: 1) that differential privacy
ensures that the expectation of any query on the posterior distribution on
datasets induced by the transcript of the interaction is close to its true
value on the data distribution, and 2) sample accuracy on its own ensures that
any query answer produced by the mechanism is close to its posterior
expectation with high probability. This second claim follows from a thought
experiment in which we imagine that the dataset is resampled from the posterior
distribution after the mechanism has committed to its answers. The transfer
theorem then follows by summing these two bounds, and in particular, avoids the
"monitor argument" used to derive high probability bounds in prior work. An
upshot of our new proof technique is that the concrete bounds we obtain are
substantially better than the best previously known bounds, even though the
improvements are in the constants, rather than the asymptotics (which are known
to be tight). As we show, our new bounds outperform the naive
"sample-splitting" baseline at dramatically smaller dataset sizes compared to
the previous state of the art, bringing techniques from this literature closer
to practicality.},
added-at = {2019-09-13T18:45:50.000+0200},
author = {Jung, Christopher and Ligett, Katrina and Neel, Seth and Roth, Aaron and Sharifi-Malvajerdi, Saeed and Shenfeld, Moshe},
biburl = {https://www.bibsonomy.org/bibtex/294605c7121f548629267a09012fa1680/kirk86},
description = {[1909.03577] A New Analysis of Differential Privacy's Generalization Guarantees},
interhash = {6c0602cfe6851539d912559a90a3d917},
intrahash = {94605c7121f548629267a09012fa1680},
keywords = {bounds differential-privacy probability theory},
note = {cite arxiv:1909.03577},
timestamp = {2019-09-13T18:45:50.000+0200},
title = {A New Analysis of Differential Privacy's Generalization Guarantees},
url = {http://arxiv.org/abs/1909.03577},
year = 2019
}