The number of human genomes being genotyped or sequenced increases exponentially and efficient haplotype estimation methods able to handle this amount of data are now required. Here we present a method, SHAPEIT4, which substantially improves upon other methods to process large genotype and high coverage sequencing datasets. It notably exhibits sub-linear running times with sample size, provides highly accurate haplotypes and allows integrating external phasing information such as large reference panels of haplotypes, collections of pre-phased variants and long sequencing reads. We provide SHAPEIT4 in an open source format and demonstrate its performance in terms of accuracy and running times on two gold standard datasets: the UK Biobank data and the Genome In A Bottle.
%0 Journal Article
%1 delaneau2019accurate
%A Delaneau, Olivier
%A Zagury, Jean-François
%A Robinson, Matthew R.
%A Marchini, Jonathan L.
%A Dermitzakis, Emmanouil T.
%D 2019
%J Nature Communications
%K haplotype_inference methods phasing
%N 1
%P 5436--
%R 10.1038/s41467-019-13225-y
%T Accurate, scalable and integrative haplotype estimation
%U https://doi.org/10.1038/s41467-019-13225-y
%V 10
%X The number of human genomes being genotyped or sequenced increases exponentially and efficient haplotype estimation methods able to handle this amount of data are now required. Here we present a method, SHAPEIT4, which substantially improves upon other methods to process large genotype and high coverage sequencing datasets. It notably exhibits sub-linear running times with sample size, provides highly accurate haplotypes and allows integrating external phasing information such as large reference panels of haplotypes, collections of pre-phased variants and long sequencing reads. We provide SHAPEIT4 in an open source format and demonstrate its performance in terms of accuracy and running times on two gold standard datasets: the UK Biobank data and the Genome In A Bottle.
@article{delaneau2019accurate,
abstract = {The number of human genomes being genotyped or sequenced increases exponentially and efficient haplotype estimation methods able to handle this amount of data are now required. Here we present a method, SHAPEIT4, which substantially improves upon other methods to process large genotype and high coverage sequencing datasets. It notably exhibits sub-linear running times with sample size, provides highly accurate haplotypes and allows integrating external phasing information such as large reference panels of haplotypes, collections of pre-phased variants and long sequencing reads. We provide SHAPEIT4 in an open source format and demonstrate its performance in terms of accuracy and running times on two gold standard datasets: the UK Biobank data and the Genome In A Bottle.},
added-at = {2021-07-09T21:37:07.000+0200},
author = {Delaneau, Olivier and Zagury, Jean-François and Robinson, Matthew R. and Marchini, Jonathan L. and Dermitzakis, Emmanouil T.},
biburl = {https://www.bibsonomy.org/bibtex/2cb3d9e73e219a12a0b554dd0b7e257b5/peter.ralph},
doi = {10.1038/s41467-019-13225-y},
interhash = {eba97b13f7bd39eee94414647ede2166},
intrahash = {cb3d9e73e219a12a0b554dd0b7e257b5},
issn = {20411723},
journal = {Nature Communications},
keywords = {haplotype_inference methods phasing},
number = 1,
pages = {5436--},
refid = {Delaneau2019},
timestamp = {2021-07-09T21:37:07.000+0200},
title = {Accurate, scalable and integrative haplotype estimation},
url = {https://doi.org/10.1038/s41467-019-13225-y},
volume = 10,
year = 2019
}