In phylogenetic analyses with combined multigene or multiprotein data sets, accounting for differing evolutionary dynamics at different loci is essential for accurate tree prediction. Existing maximum likelihood (ML) and Bayesian approaches are computationally intensive. We present an alternative approach that is orders of magnitude faster. The method, Distance Rates (DistR), estimates rates based upon distances derived from gene/protein sequence data. Simulation studies indicate that this technique is accurate compared with other methods and robust to missing sequence data. The DistR method was applied to a fungal mitochondrial data set, and the rate estimates compared well to those obtained using existing ML and Bayesian approaches. Inclusion of the protein rates estimated from the DistR method into the ML calculation of trees as a branch length multiplier resulted in a significantly improved fit as measured by the Akaike Information Criterion (AIC). Furthermore, bootstrap support for the ML topology was significantly greater when protein rates were used, and some evident errors in the concatenated ML tree topology (i.e., without protein rates) were corrected. Bayesian credible intervals; DistR method; multigene phylogeny; PHYML; rate heterogeneity..
%0 Journal Article
%1 Bevan05
%A Bevan, Rachel B
%A Lang, B Franz
%A Bryant, David
%C McGill Centre for Bioinformatics, Duff Medical Building, 3775 University Street, Montreal, Quebec, H3A 2B4, Canada. rachel@mcb.mcgill.ca
%D 2005
%J Syst Biol
%K *Computer *Evolution *Models *Phylogeny Algorithms Analysis Biology/methods Classification/methods Computational Functions Fungal Likelihood Molecular Protein/*methods Proteins/classification Sequence Simulation Statistical Statistics Topic/methods as from:davidjamesbryant
%N 6
%P 900--915
%R 10.1080/10635150500354829
%T Calculating the evolutionary rates of different genes: a fast, accurate estimator with applications to maximum likelihood phylogenetic analysis.
%V 54
%X In phylogenetic analyses with combined multigene or multiprotein data sets, accounting for differing evolutionary dynamics at different loci is essential for accurate tree prediction. Existing maximum likelihood (ML) and Bayesian approaches are computationally intensive. We present an alternative approach that is orders of magnitude faster. The method, Distance Rates (DistR), estimates rates based upon distances derived from gene/protein sequence data. Simulation studies indicate that this technique is accurate compared with other methods and robust to missing sequence data. The DistR method was applied to a fungal mitochondrial data set, and the rate estimates compared well to those obtained using existing ML and Bayesian approaches. Inclusion of the protein rates estimated from the DistR method into the ML calculation of trees as a branch length multiplier resulted in a significantly improved fit as measured by the Akaike Information Criterion (AIC). Furthermore, bootstrap support for the ML topology was significantly greater when protein rates were used, and some evident errors in the concatenated ML tree topology (i.e., without protein rates) were corrected. Bayesian credible intervals; DistR method; multigene phylogeny; PHYML; rate heterogeneity..
@article{Bevan05,
abstract = {In phylogenetic analyses with combined multigene or multiprotein data sets, accounting for differing evolutionary dynamics at different loci is essential for accurate tree prediction. Existing maximum likelihood (ML) and Bayesian approaches are computationally intensive. We present an alternative approach that is orders of magnitude faster. The method, Distance Rates (DistR), estimates rates based upon distances derived from gene/protein sequence data. Simulation studies indicate that this technique is accurate compared with other methods and robust to missing sequence data. The DistR method was applied to a fungal mitochondrial data set, and the rate estimates compared well to those obtained using existing ML and Bayesian approaches. Inclusion of the protein rates estimated from the DistR method into the ML calculation of trees as a branch length multiplier resulted in a significantly improved fit as measured by the Akaike Information Criterion (AIC). Furthermore, bootstrap support for the ML topology was significantly greater when protein rates were used, and some evident errors in the concatenated ML tree topology (i.e., without protein rates) were corrected. [Bayesian credible intervals; DistR method; multigene phylogeny; PHYML; rate heterogeneity.].},
added-at = {2009-01-28T01:17:14.000+0100},
address = {McGill Centre for Bioinformatics, Duff Medical Building, 3775 University Street, Montreal, Quebec, H3A 2B4, Canada. rachel@mcb.mcgill.ca},
au = {Bevan, RB and Lang, BF and Bryant, D},
author = {Bevan, Rachel B and Lang, B Franz and Bryant, David},
biburl = {https://www.bibsonomy.org/bibtex/2d2fa95567a87a4e214a83f3a9b37ea69/compevol},
crdt = {2005/11/12 09:00},
da = {20051111},
date-added = {2009-01-28 12:07:05 +1300},
date-modified = {2009-01-28 13:04:38 +1300},
dcom = {20060126},
doi = {10.1080/10635150500354829},
edat = {2005/11/12 09:00},
interhash = {908a7d0c2d35cc47235351653c8038c6},
intrahash = {d2fa95567a87a4e214a83f3a9b37ea69},
issn = {1063-5157 (Print)},
jid = {9302532},
journal = {Syst Biol},
jt = {Systematic biology},
keywords = {*Computer *Evolution *Models *Phylogeny Algorithms Analysis Biology/methods Classification/methods Computational Functions Fungal Likelihood Molecular Protein/*methods Proteins/classification Sequence Simulation Statistical Statistics Topic/methods as from:davidjamesbryant},
language = {eng},
lr = {20071115},
mhda = {2006/01/27 09:00},
number = 6,
own = {NLM},
pages = {900--915},
pii = {M525300127M3P541},
pl = {England},
pmid = {16282169},
pst = {ppublish},
pt = {Comparative Study; Journal Article; Research Support, Non-U.S. Gov't},
rn = {0 (Fungal Proteins)},
sb = {IM},
so = {Syst Biol. 2005 Dec;54(6):900-15.},
stat = {MEDLINE},
timestamp = {2009-01-28T01:29:25.000+0100},
title = {Calculating the evolutionary rates of different genes: a fast, accurate estimator with applications to maximum likelihood phylogenetic analysis.},
volume = 54,
year = 2005
}