Many man-made and natural phenomena, including the intensity of earthquakes,
population of cities and size of international wars, are believed to follow
power-law distributions. The accurate identification of power-law patterns has
significant consequences for correctly understanding and modeling complex
systems. However, statistical evidence for or against the power-law hypothesis
is complicated by large fluctuations in the empirical distribution's tail, and
these are worsened when information is lost from binning the data. We adapt the
statistically principled framework for testing the power-law hypothesis,
developed by Clauset, Shalizi and Newman, to the case of binned data. This
approach includes maximum-likelihood fitting, a hypothesis test based on the
Kolmogorov--Smirnov goodness-of-fit statistic and likelihood ratio tests for
comparing against alternative explanations. We evaluate the effectiveness of
these methods on synthetic binned data with known structure, quantify the loss
of statistical power due to binning, and apply the methods to twelve real-world
binned data sets with heavy-tailed patterns.
%0 Journal Article
%1 Virkar2014Powerlaw
%A Virkar, Yogesh
%A Clauset, Aaron
%D 2014
%J The Annals of Applied Statistics
%K power\_laws data-analysis
%N 1
%P 89--119
%R 10.1214/13-aoas710
%T Power-law distributions in binned empirical data
%U http://dx.doi.org/10.1214/13-aoas710
%V 8
%X Many man-made and natural phenomena, including the intensity of earthquakes,
population of cities and size of international wars, are believed to follow
power-law distributions. The accurate identification of power-law patterns has
significant consequences for correctly understanding and modeling complex
systems. However, statistical evidence for or against the power-law hypothesis
is complicated by large fluctuations in the empirical distribution's tail, and
these are worsened when information is lost from binning the data. We adapt the
statistically principled framework for testing the power-law hypothesis,
developed by Clauset, Shalizi and Newman, to the case of binned data. This
approach includes maximum-likelihood fitting, a hypothesis test based on the
Kolmogorov--Smirnov goodness-of-fit statistic and likelihood ratio tests for
comparing against alternative explanations. We evaluate the effectiveness of
these methods on synthetic binned data with known structure, quantify the loss
of statistical power due to binning, and apply the methods to twelve real-world
binned data sets with heavy-tailed patterns.
@article{Virkar2014Powerlaw,
abstract = {{Many man-made and natural phenomena, including the intensity of earthquakes,
population of cities and size of international wars, are believed to follow
power-law distributions. The accurate identification of power-law patterns has
significant consequences for correctly understanding and modeling complex
systems. However, statistical evidence for or against the power-law hypothesis
is complicated by large fluctuations in the empirical distribution's tail, and
these are worsened when information is lost from binning the data. We adapt the
statistically principled framework for testing the power-law hypothesis,
developed by Clauset, Shalizi and Newman, to the case of binned data. This
approach includes maximum-likelihood fitting, a hypothesis test based on the
Kolmogorov--Smirnov goodness-of-fit statistic and likelihood ratio tests for
comparing against alternative explanations. We evaluate the effectiveness of
these methods on synthetic binned data with known structure, quantify the loss
of statistical power due to binning, and apply the methods to twelve real-world
binned data sets with heavy-tailed patterns.}},
added-at = {2019-06-10T14:53:09.000+0200},
archiveprefix = {arXiv},
author = {Virkar, Yogesh and Clauset, Aaron},
biburl = {https://www.bibsonomy.org/bibtex/241607429e6ae7f2b11779e88845fbe51/nonancourt},
citeulike-article-id = {11095825},
citeulike-linkout-0 = {http://arxiv.org/abs/1208.3524},
citeulike-linkout-1 = {http://arxiv.org/pdf/1208.3524},
citeulike-linkout-2 = {http://dx.doi.org/10.1214/13-aoas710},
day = 14,
doi = {10.1214/13-aoas710},
eprint = {1208.3524},
interhash = {568b0f2c589f935ef48a1296ad3ec33c},
intrahash = {41607429e6ae7f2b11779e88845fbe51},
issn = {1932-6157},
journal = {The Annals of Applied Statistics},
keywords = {power\_laws data-analysis},
month = mar,
number = 1,
pages = {89--119},
posted-at = {2012-08-27 16:45:13},
priority = {2},
timestamp = {2019-08-01T16:11:12.000+0200},
title = {{Power-law distributions in binned empirical data}},
url = {http://dx.doi.org/10.1214/13-aoas710},
volume = 8,
year = 2014
}