Identifying interesting relationships between pairs of variables in large data sets is increasingly important. Here, we present a measure of dependence for two-variable relationships: the maximal information coefficient (MIC). MIC captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R2) of the data relative to the regression function. MIC belongs to a larger class of maximal information-based nonparametric exploration (MINE) statistics for identifying and classifying relationships. We apply MIC and MINE to data sets in global health, gene expression, major-league baseball, and the human gut microbiota and identify known and novel relationships.
%0 Journal Article
%1 reshef_detecting_2011
%A Reshef, David N
%A Reshef, Yakir A
%A Finucane, Hilary K
%A Grossman, Sharon R
%A McVean, Gilean
%A Turnbaugh, Peter J
%A Lander, Eric S
%A Mitzenmacher, Michael
%A Sabeti, Pardis C
%D 2011
%J Science
%K Big algorithms, cited, correlation, data data, highly information mining,
%N 6062
%P 1518--1524
%R 10.1126/science.1205438
%T Detecting novel associations in large data sets
%U http://www.sciencemag.org/content/334/6062/1518
%V 334
%X Identifying interesting relationships between pairs of variables in large data sets is increasingly important. Here, we present a measure of dependence for two-variable relationships: the maximal information coefficient (MIC). MIC captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R2) of the data relative to the regression function. MIC belongs to a larger class of maximal information-based nonparametric exploration (MINE) statistics for identifying and classifying relationships. We apply MIC and MINE to data sets in global health, gene expression, major-league baseball, and the human gut microbiota and identify known and novel relationships.
@article{reshef_detecting_2011,
abstract = {Identifying interesting relationships between pairs of variables in large data sets is increasingly important. Here, we present a measure of dependence for two-variable relationships: the maximal information coefficient (MIC). MIC captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R2) of the data relative to the regression function. MIC belongs to a larger class of maximal information-based nonparametric exploration (MINE) statistics for identifying and classifying relationships. We apply MIC and MINE to data sets in global health, gene expression, major-league baseball, and the human gut microbiota and identify known and novel relationships.},
added-at = {2017-01-09T13:57:26.000+0100},
author = {Reshef, David N and Reshef, Yakir A and Finucane, Hilary K and Grossman, Sharon R and McVean, Gilean and Turnbaugh, Peter J and Lander, Eric S and Mitzenmacher, Michael and Sabeti, Pardis C},
biburl = {https://www.bibsonomy.org/bibtex/218303a66d4498017bd26aeb4c5c3c51e/yourwelcome},
doi = {10.1126/science.1205438},
interhash = {bda723a0d064dc5131a81a246b1b5e74},
intrahash = {18303a66d4498017bd26aeb4c5c3c51e},
issn = {0036-8075, 1095-9203},
journal = {Science},
keywords = {Big algorithms, cited, correlation, data data, highly information mining,},
language = {en},
month = dec,
number = 6062,
pages = {1518--1524},
timestamp = {2017-01-09T14:01:11.000+0100},
title = {Detecting novel associations in large data sets},
url = {http://www.sciencemag.org/content/334/6062/1518},
urldate = {2012-03-03},
volume = 334,
year = 2011
}