In robust estimation one frequently needs an initial or auxiliary estimate of scale. For this one usually takes the median absolute deviation <tex-math>\$MAD\_n = 1.4826 med\_i\| x\_i - med\_jx\_j|\\$</tex-math>, because it has a simple explicit formula, needs little computation time, and is very robust as witnessed by its bounded influence function and its 50\% breakdown point. But there is still room for improvement in two areas: the fact that MAD<sub>n</sub> is aimed at symmetric distributions and its low (37\%) Gaussian efficiency. In this article we set out to construct explicit and 50 breakdown scale estimators that are more efficient. We consider the estimator <tex-math>\$S\_n = 1.1926 med\_i\med\_j|x\_i - x\_j|\\$</tex-math> and the estimator Q<sub>n</sub> given by the .25 quantile of the distances <latex>\$\|x\_i - x\_j|; i < j\\$</latex>. Note that S<sub>n</sub> and Q<sub>n</sub> do not need any location estimate. Both S<sub>n</sub> and Q<sub>n</sub> can be computed using O(n log n) time and O(n) storage. The Gaussian efficiency of S<sub>n</sub> is 58\%, whereas Q<sub>n</sub> attains 82\%. We study S<sub>n</sub> and Q<sub>n</sub> by means of their influence functions, their bias curves (for implosion as well as explosion), and their finite-sample performance. Their behavior is also compared at non-Gaussian models, including the negative exponential model where S<sub>n</sub> has a lower gross-error sensitivity than the MAD.
Description
CiteULike: Alternatives to the Median Absolute Deviation
%0 Journal Article
%1 rousseeuw-qn-1993
%A Rousseeuw, Peter J.
%A Croux, Christophe
%D 1993
%I American Statistical Association
%J Journal of the American Statistical Association
%K robust scale statistics
%N 424
%R 10.2307/2291267
%T Alternatives to the Median Absolute Deviation
%U http://dx.doi.org/10.2307/2291267
%V 88
%X In robust estimation one frequently needs an initial or auxiliary estimate of scale. For this one usually takes the median absolute deviation <tex-math>\$MAD\_n = 1.4826 med\_i\| x\_i - med\_jx\_j|\\$</tex-math>, because it has a simple explicit formula, needs little computation time, and is very robust as witnessed by its bounded influence function and its 50\% breakdown point. But there is still room for improvement in two areas: the fact that MAD<sub>n</sub> is aimed at symmetric distributions and its low (37\%) Gaussian efficiency. In this article we set out to construct explicit and 50 breakdown scale estimators that are more efficient. We consider the estimator <tex-math>\$S\_n = 1.1926 med\_i\med\_j|x\_i - x\_j|\\$</tex-math> and the estimator Q<sub>n</sub> given by the .25 quantile of the distances <latex>\$\|x\_i - x\_j|; i < j\\$</latex>. Note that S<sub>n</sub> and Q<sub>n</sub> do not need any location estimate. Both S<sub>n</sub> and Q<sub>n</sub> can be computed using O(n log n) time and O(n) storage. The Gaussian efficiency of S<sub>n</sub> is 58\%, whereas Q<sub>n</sub> attains 82\%. We study S<sub>n</sub> and Q<sub>n</sub> by means of their influence functions, their bias curves (for implosion as well as explosion), and their finite-sample performance. Their behavior is also compared at non-Gaussian models, including the negative exponential model where S<sub>n</sub> has a lower gross-error sensitivity than the MAD.
@article{rousseeuw-qn-1993,
abstract = {In robust estimation one frequently needs an initial or auxiliary estimate of scale. For this one usually takes the median absolute deviation <tex-math>\$\mathrm{MAD}\_n = 1.4826 \operatorname{med}\_i\{| x\_i - \operatorname{med}\_jx\_j|\}\$</tex-math>, because it has a simple explicit formula, needs little computation time, and is very robust as witnessed by its bounded influence function and its 50\% breakdown point. But there is still room for improvement in two areas: the fact that MAD<sub>n</sub> is aimed at symmetric distributions and its low (37\%) Gaussian efficiency. In this article we set out to construct explicit and 50 breakdown scale estimators that are more efficient. We consider the estimator <tex-math>\$S\_n = 1.1926 \operatorname{med}\_i\{\operatorname{med}\_j|x\_i - x\_j|\}\$</tex-math> and the estimator Q<sub>n</sub> given by the .25 quantile of the distances <latex>\$\{|x\_i - x\_j|; i < j\}\$</latex>. Note that S<sub>n</sub> and Q<sub>n</sub> do not need any location estimate. Both S<sub>n</sub> and Q<sub>n</sub> can be computed using O(n log n) time and O(n) storage. The Gaussian efficiency of S<sub>n</sub> is 58\%, whereas Q<sub>n</sub> attains 82\%. We study S<sub>n</sub> and Q<sub>n</sub> by means of their influence functions, their bias curves (for implosion as well as explosion), and their finite-sample performance. Their behavior is also compared at non-Gaussian models, including the negative exponential model where S<sub>n</sub> has a lower gross-error sensitivity than the MAD.},
added-at = {2012-03-28T00:07:03.000+0200},
author = {Rousseeuw, Peter J. and Croux, Christophe},
biburl = {https://www.bibsonomy.org/bibtex/2e8c61341278ddb3fbdb7dd9c5fa3f895/vivion},
citeulike-article-id = {8177320},
citeulike-linkout-0 = {http://dx.doi.org/10.2307/2291267},
citeulike-linkout-1 = {http://www.jstor.org/stable/2291267},
description = {CiteULike: Alternatives to the Median Absolute Deviation},
doi = {10.2307/2291267},
interhash = {7b1c090efc0c6f9dc7f99994a8e72408},
intrahash = {e8c61341278ddb3fbdb7dd9c5fa3f895},
issn = {01621459},
journal = {Journal of the American Statistical Association},
keywords = {robust scale statistics},
number = 424,
posted-at = {2010-11-02 15:30:21},
priority = {0},
publisher = {American Statistical Association},
timestamp = {2012-03-28T00:07:03.000+0200},
title = {Alternatives to the Median Absolute Deviation},
url = {http://dx.doi.org/10.2307/2291267},
volume = 88,
year = 1993
}