Biobank-based genome-wide association studies are enabling exciting insights in complex trait genetics, but much uncertainty remains over best practices for optimizing statistical power and computational efficiency in GWAS while controlling confounders. Here, we introduce a much faster version of our BOLT-LMM Bayesian mixed model association method --- capable of running analyses of the full UK Biobank cohort in a few days on a single compute node --- and show that it produces highly powered, robust test statistics when run on all 459K European samples (retaining related individuals). When used to conduct a GWAS for height in UK Biobank, BOLT-LMM achieved power equivalent to linear regression on 650K samples --- a 93\% increase in effective sample size versus the common practice of analyzing unrelated British samples using linear regression (UK Biobank documentation; Bycroft et al. bioRxiv). Across a broader set of 23 highly heritable traits, the total number of independent GWAS loci detected increased from 5,839 to 10,759, an 84\% increase. We recommend the use of BOLT-LMM (retaining related individuals) for biobank-scale analyses, and we have publicly released BOLT-LMM summary association statistics for the 23 traits analyzed as a resource for all researchers.
%0 Journal Article
%1 loh2018mixed
%A Loh, Po-Ru
%A Kichaev, Gleb
%A Gazal, Steven
%A Schoech, Armin P.
%A Price, Alkes L.
%D 2018
%I Cold Spring Harbor Laboratory
%J bioRxiv
%K GWAS methods mixed_models software
%R 10.1101/194944
%T Mixed model association for biobank-scale data sets
%U https://www.biorxiv.org/content/early/2018/01/04/194944
%X Biobank-based genome-wide association studies are enabling exciting insights in complex trait genetics, but much uncertainty remains over best practices for optimizing statistical power and computational efficiency in GWAS while controlling confounders. Here, we introduce a much faster version of our BOLT-LMM Bayesian mixed model association method --- capable of running analyses of the full UK Biobank cohort in a few days on a single compute node --- and show that it produces highly powered, robust test statistics when run on all 459K European samples (retaining related individuals). When used to conduct a GWAS for height in UK Biobank, BOLT-LMM achieved power equivalent to linear regression on 650K samples --- a 93\% increase in effective sample size versus the common practice of analyzing unrelated British samples using linear regression (UK Biobank documentation; Bycroft et al. bioRxiv). Across a broader set of 23 highly heritable traits, the total number of independent GWAS loci detected increased from 5,839 to 10,759, an 84\% increase. We recommend the use of BOLT-LMM (retaining related individuals) for biobank-scale analyses, and we have publicly released BOLT-LMM summary association statistics for the 23 traits analyzed as a resource for all researchers.
@article{loh2018mixed,
abstract = {Biobank-based genome-wide association studies are enabling exciting insights in complex trait genetics, but much uncertainty remains over best practices for optimizing statistical power and computational efficiency in GWAS while controlling confounders. Here, we introduce a much faster version of our BOLT-LMM Bayesian mixed model association method --- capable of running analyses of the full UK Biobank cohort in a few days on a single compute node --- and show that it produces highly powered, robust test statistics when run on all 459K European samples (retaining related individuals). When used to conduct a GWAS for height in UK Biobank, BOLT-LMM achieved power equivalent to linear regression on 650K samples --- a 93\% increase in effective sample size versus the common practice of analyzing unrelated British samples using linear regression (UK Biobank documentation; Bycroft et al. bioRxiv). Across a broader set of 23 highly heritable traits, the total number of independent GWAS loci detected increased from 5,839 to 10,759, an 84\% increase. We recommend the use of BOLT-LMM (retaining related individuals) for biobank-scale analyses, and we have publicly released BOLT-LMM summary association statistics for the 23 traits analyzed as a resource for all researchers.},
added-at = {2018-07-05T05:29:03.000+0200},
author = {Loh, Po-Ru and Kichaev, Gleb and Gazal, Steven and Schoech, Armin P. and Price, Alkes L.},
biburl = {https://www.bibsonomy.org/bibtex/2f2edc9110fc8ef5f775be306cdfb6c32/peter.ralph},
doi = {10.1101/194944},
eprint = {https://www.biorxiv.org/content/early/2018/01/04/194944.full.pdf},
interhash = {c6819c5d71a69176f11c7a78aaf7fa00},
intrahash = {f2edc9110fc8ef5f775be306cdfb6c32},
journal = {bioRxiv},
keywords = {GWAS methods mixed_models software},
publisher = {Cold Spring Harbor Laboratory},
timestamp = {2018-07-05T05:29:03.000+0200},
title = {Mixed model association for biobank-scale data sets},
url = {https://www.biorxiv.org/content/early/2018/01/04/194944},
year = 2018
}