Inferring large-scale covariance matrices from sparse genomic data is an ubiquitous problem in bioinformatics. Clearly, the widely used standard covariance and correlation estimators are ill-suited for this purpose. As statistically efficient and computationally fast alternative we propose a novel shrinkage covariance estimator that exploits the Ledoit-Wolf (2003) lemma for analytic calculation of the optimal shrinkage intensity. Subsequently, we apply this improved covariance estimator (which has guaranteed minimum mean squared error, is well-conditioned, and is always positive definite even for small sample sizes) to the problem of inferring large-scale gene association networks. We show that it performs very favorably compared to competing approaches both in simulations as well as in application to real expression data.
%0 Journal Article
%1 schafer2005shrinkage
%A Schafer, J.
%A Strimmer, K.
%D 2005
%J Stat Appl Genet Mol Biol
%K covariance_estimation covariance_matrix robust_statistics shrinkage statistics
%P Article32
%T A shrinkage approach to large-scale covariance matrix estimation and implications for functional genomics
%V 4
%X Inferring large-scale covariance matrices from sparse genomic data is an ubiquitous problem in bioinformatics. Clearly, the widely used standard covariance and correlation estimators are ill-suited for this purpose. As statistically efficient and computationally fast alternative we propose a novel shrinkage covariance estimator that exploits the Ledoit-Wolf (2003) lemma for analytic calculation of the optimal shrinkage intensity. Subsequently, we apply this improved covariance estimator (which has guaranteed minimum mean squared error, is well-conditioned, and is always positive definite even for small sample sizes) to the problem of inferring large-scale gene association networks. We show that it performs very favorably compared to competing approaches both in simulations as well as in application to real expression data.
@article{schafer2005shrinkage,
abstract = {Inferring large-scale covariance matrices from sparse genomic data is an ubiquitous problem in bioinformatics. Clearly, the widely used standard covariance and correlation estimators are ill-suited for this purpose. As statistically efficient and computationally fast alternative we propose a novel shrinkage covariance estimator that exploits the Ledoit-Wolf (2003) lemma for analytic calculation of the optimal shrinkage intensity. Subsequently, we apply this improved covariance estimator (which has guaranteed minimum mean squared error, is well-conditioned, and is always positive definite even for small sample sizes) to the problem of inferring large-scale gene association networks. We show that it performs very favorably compared to competing approaches both in simulations as well as in application to real expression data.},
added-at = {2014-07-21T08:04:18.000+0200},
author = {Schafer, J. and Strimmer, K.},
biburl = {https://www.bibsonomy.org/bibtex/2ffaaba282f81237ded88c283f6dcf205/peter.ralph},
interhash = {88dcbb43d8278288a1863fd1068a3ed4},
intrahash = {ffaaba282f81237ded88c283f6dcf205},
journal = {Stat Appl Genet Mol Biol},
keywords = {covariance_estimation covariance_matrix robust_statistics shrinkage statistics},
pages = {Article32},
timestamp = {2014-07-21T08:04:18.000+0200},
title = {A shrinkage approach to large-scale covariance matrix estimation and implications for functional genomics},
volume = 4,
year = 2005
}