Identifying interesting relationships between pairs of variables in large data sets is increasingly important. Here, we present a measure of dependence for two-variable relationships: the maximal information coefficient (MIC). MIC captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R2) of the data relative to the regression function. MIC belongs to a larger class of maximal information-based nonparametric exploration (MINE) statistics for identifying and classifying relationships. We apply MIC and MINE to data sets in global health, gene expression, major-league baseball, and the human gut microbiota and identify known and novel relationships.
Beschreibung
Detecting Novel Associations in Large Data Sets | Science
%0 Journal Article
%1 reshef2011detecting
%A Reshef, David N.
%A Reshef, Yakir A.
%A Finucane, Hilary K.
%A Grossman, Sharon R.
%A McVean, Gilean
%A Turnbaugh, Peter J.
%A Lander, Eric S.
%A Mitzenmacher, Michael
%A Sabeti, Pardis C.
%D 2011
%I American Association for the Advancement of Science
%J Science
%K association corr correlation correlations data distance large linear measure nalab non nonlinear paper:cordis paper:fastcor set um1
%N 6062
%P 1518--1524
%R 10.1126/science.1205438
%T Detecting Novel Associations in Large Data Sets
%U https://science.sciencemag.org/content/334/6062/1518
%V 334
%X Identifying interesting relationships between pairs of variables in large data sets is increasingly important. Here, we present a measure of dependence for two-variable relationships: the maximal information coefficient (MIC). MIC captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R2) of the data relative to the regression function. MIC belongs to a larger class of maximal information-based nonparametric exploration (MINE) statistics for identifying and classifying relationships. We apply MIC and MINE to data sets in global health, gene expression, major-league baseball, and the human gut microbiota and identify known and novel relationships.
@article{reshef2011detecting,
abstract = {Identifying interesting relationships between pairs of variables in large data sets is increasingly important. Here, we present a measure of dependence for two-variable relationships: the maximal information coefficient (MIC). MIC captures a wide range of associations both functional and not, and for functional relationships provides a score that roughly equals the coefficient of determination (R2) of the data relative to the regression function. MIC belongs to a larger class of maximal information-based nonparametric exploration (MINE) statistics for identifying and classifying relationships. We apply MIC and MINE to data sets in global health, gene expression, major-league baseball, and the human gut microbiota and identify known and novel relationships.},
added-at = {2019-11-06T03:01:46.000+0100},
author = {Reshef, David N. and Reshef, Yakir A. and Finucane, Hilary K. and Grossman, Sharon R. and McVean, Gilean and Turnbaugh, Peter J. and Lander, Eric S. and Mitzenmacher, Michael and Sabeti, Pardis C.},
biburl = {https://www.bibsonomy.org/bibtex/2fdcfce1ef16e058f237cce199ae9dd06/becker},
description = {Detecting Novel Associations in Large Data Sets | Science},
doi = {10.1126/science.1205438},
eprint = {https://science.sciencemag.org/content/334/6062/1518.full.pdf},
interhash = {bda723a0d064dc5131a81a246b1b5e74},
intrahash = {fdcfce1ef16e058f237cce199ae9dd06},
issn = {0036-8075},
journal = {Science},
keywords = {association corr correlation correlations data distance large linear measure nalab non nonlinear paper:cordis paper:fastcor set um1},
number = 6062,
pages = {1518--1524},
publisher = {American Association for the Advancement of Science},
timestamp = {2022-01-28T01:39:40.000+0100},
title = {Detecting Novel Associations in Large Data Sets},
url = {https://science.sciencemag.org/content/334/6062/1518},
volume = 334,
year = 2011
}