We study the problem of nding an outlier-free subset of a
set of points (or a probability distribution) in n-dimensional
Euclidean space. A point x is dened to be a -outlier if
there exists some direction w in which its squared distance
from the mean along w is greater than times the average
squared distance from the mean along w1. Our main theorem is that for any > 0, there exists a (1 ) fraction of
the original distribution that has no O(
n
(b+log
n
))-outliers,
improving on the previous bound of O(n
7
b=). This bound
is shown to b e nearly the best possible. The theorem is
constructive, and results in a
1
1
approximation to the following optimization problem: given a distribution (i.e. the
ability to sample from it), and a parameter > 0, nd the
minimum for which there exists a subset of probability at
least (1 ) with no -outliers.
%0 Conference Paper
%1 DBLP:conf/stoc/DunaganV01
%A Dunagan, John
%A Vempala, Santosh
%B STOC
%D 2001
%K database differential_privacy outlier privacy robust_statistics
%P 627-636
%R 10.1145/380752.380860
%T Optimal outlier removal in high-dimensional
%X We study the problem of nding an outlier-free subset of a
set of points (or a probability distribution) in n-dimensional
Euclidean space. A point x is dened to be a -outlier if
there exists some direction w in which its squared distance
from the mean along w is greater than times the average
squared distance from the mean along w1. Our main theorem is that for any > 0, there exists a (1 ) fraction of
the original distribution that has no O(
n
(b+log
n
))-outliers,
improving on the previous bound of O(n
7
b=). This bound
is shown to b e nearly the best possible. The theorem is
constructive, and results in a
1
1
approximation to the following optimization problem: given a distribution (i.e. the
ability to sample from it), and a parameter > 0, nd the
minimum for which there exists a subset of probability at
least (1 ) with no -outliers.
@inproceedings{DBLP:conf/stoc/DunaganV01,
abstract = {We study the problem of nding an outlier-free subset of a
set of points (or a probability distribution) in n-dimensional
Euclidean space. A point x is dened to be a -outlier if
there exists some direction w in which its squared distance
from the mean along w is greater than times the average
squared distance from the mean along w[1]. Our main theorem is that for any > 0, there exists a (1 ) fraction of
the original distribution that has no O(
n
(b+log
n
))-outliers,
improving on the previous bound of O(n
7
b=). This bound
is shown to b e nearly the best possible. The theorem is
constructive, and results in a
1
1
approximation to the following optimization problem: given a distribution (i.e. the
ability to sample from it), and a parameter > 0, nd the
minimum for which there exists a subset of probability at
least (1 ) with no -outliers.},
added-at = {2009-10-06T19:38:55.000+0200},
author = {Dunagan, John and Vempala, Santosh},
bibsource = {DBLP, http://dblp.uni-trier.de},
biburl = {https://www.bibsonomy.org/bibtex/206b96048f0cb89028255a80cfaa28ffd/ytyoun},
booktitle = {STOC},
doi = {10.1145/380752.380860},
interhash = {324e40ce7e17aa645091c32b2f79a3c8},
intrahash = {06b96048f0cb89028255a80cfaa28ffd},
keywords = {database differential_privacy outlier privacy robust_statistics},
pages = {627-636},
timestamp = {2017-12-08T03:17:43.000+0100},
title = {Optimal outlier removal in high-dimensional},
year = 2001
}