BACKGROUND: Effective population size (Ne) is related to genetic variability and is a basic parameter in many models of population genetics. A number of methods for inferring current and past population sizes from genetic data have been developed since JFC Kingman introduced the n-coalescent in 1982. Here we present the Extended Bayesian Skyline Plot, a non-parametric Bayesian Markov chain Monte Carlo algorithm that extends a previous coalescent-based method in several ways, including the ability to analyze multiple loci. RESULTS: Through extensive simulations we show the accuracy and limitations of inferring population size as a function of the amount of data, including recovering information about evolutionary bottlenecks. We also analyzed two real data sets to demonstrate the behavior of the new method; a single gene Hepatitis C virus data set sampled from Egypt and a 10 locus Drosophila ananassae data set representing 16 different populations. CONCLUSIONS: The results demonstrate the essential role of multiple loci in recovering population size dynamics. Multi-locus data from a small number of individuals can precisely recover past bottlenecks in population size which can not be characterized by analysis of a single locus. We also demonstrate that sequence data quality is important because even moderate levels of sequencing errors result in a considerable decrease in estimation accuracy for realistic levels of population genetic variability.
%0 Journal Article
%1 Heled:2008ty
%A Heled, J
%A Drummond, AJ
%D 2008
%J BMC Evol Biol
%K from:alexei.drummond
%N 1
%P 289
%R 10.1186/1471-2148-8-289
%T Bayesian inference of population size history from multiple loci.
%U http://dx.doi.org/10.1186/1471-2148-8-289
%V 8
%X BACKGROUND: Effective population size (Ne) is related to genetic variability and is a basic parameter in many models of population genetics. A number of methods for inferring current and past population sizes from genetic data have been developed since JFC Kingman introduced the n-coalescent in 1982. Here we present the Extended Bayesian Skyline Plot, a non-parametric Bayesian Markov chain Monte Carlo algorithm that extends a previous coalescent-based method in several ways, including the ability to analyze multiple loci. RESULTS: Through extensive simulations we show the accuracy and limitations of inferring population size as a function of the amount of data, including recovering information about evolutionary bottlenecks. We also analyzed two real data sets to demonstrate the behavior of the new method; a single gene Hepatitis C virus data set sampled from Egypt and a 10 locus Drosophila ananassae data set representing 16 different populations. CONCLUSIONS: The results demonstrate the essential role of multiple loci in recovering population size dynamics. Multi-locus data from a small number of individuals can precisely recover past bottlenecks in population size which can not be characterized by analysis of a single locus. We also demonstrate that sequence data quality is important because even moderate levels of sequencing errors result in a considerable decrease in estimation accuracy for realistic levels of population genetic variability.
@article{Heled:2008ty,
abstract = {BACKGROUND: Effective population size (Ne) is related to genetic variability and is a basic parameter in many models of population genetics. A number of methods for inferring current and past population sizes from genetic data have been developed since JFC Kingman introduced the n-coalescent in 1982. Here we present the Extended Bayesian Skyline Plot, a non-parametric Bayesian Markov chain Monte Carlo algorithm that extends a previous coalescent-based method in several ways, including the ability to analyze multiple loci. RESULTS: Through extensive simulations we show the accuracy and limitations of inferring population size as a function of the amount of data, including recovering information about evolutionary bottlenecks. We also analyzed two real data sets to demonstrate the behavior of the new method; a single gene Hepatitis C virus data set sampled from Egypt and a 10 locus Drosophila ananassae data set representing 16 different populations. CONCLUSIONS: The results demonstrate the essential role of multiple loci in recovering population size dynamics. Multi-locus data from a small number of individuals can precisely recover past bottlenecks in population size which can not be characterized by analysis of a single locus. We also demonstrate that sequence data quality is important because even moderate levels of sequencing errors result in a considerable decrease in estimation accuracy for realistic levels of population genetic variability.},
added-at = {2009-05-14T22:10:56.000+0200},
author = {Heled, J and Drummond, AJ},
biburl = {https://www.bibsonomy.org/bibtex/241f5b111c50ce5cc6ef7fb1c9bd6f8c5/compevol},
da = {20081024},
date-modified = {2008-11-28 10:40:25 -0800},
dep = {20081023},
doi = {10.1186/1471-2148-8-289},
edat = {2008/10/25 09:00},
interhash = {14496c421925d3c4e13ecea1a2101711},
intrahash = {41f5b111c50ce5cc6ef7fb1c9bd6f8c5},
issn = {1471-2148 (Electronic)},
jid = {100966975},
journal = {BMC Evol Biol},
jt = {BMC evolutionary biology},
keywords = {from:alexei.drummond},
language = {ENG},
mhda = {2008/10/25 09:00},
month = Oct,
number = 1,
own = {NLM},
pages = 289,
phst = {2008/02/22 {$[$}received{$]$}; 2008/10/23 {$[$}accepted{$]$}; 2008/10/23 {$[$}aheadofprint{$]$}},
pii = {1471-2148-8-289},
pmid = {18947398},
pst = {aheadofprint},
pt = {JOURNAL ARTICLE},
pubm = {Print-Electronic},
so = {BMC Evol Biol. 2008 Oct 23;8(1):289.},
stat = {Publisher},
timestamp = {2009-05-14T22:10:57.000+0200},
title = {Bayesian inference of population size history from multiple loci.},
url = {http://dx.doi.org/10.1186/1471-2148-8-289},
volume = 8,
year = 2008
}