Several methods have been proposed to test for introgression across genomes. One method tests for a genome-wide excess of shared derived alleles between taxa using Patterson?s D statistic, but does not establish which loci show such an excess or whether the excess is due to introgression or ancestral population structure. Several recent studies have extended the use of D by applying the statistic to small genomic regions, rather than genome-wide. Here, we use simulations and whole genome data from Heliconius butterflies to investigate the behavior of D in small genomic regions. We find that D is unreliable in this situation as it gives inflated values when effective population size is low, causing D outliers to cluster in genomic regions of reduced diversity. As an alternative, we propose a related statistic f̂d, a modified version of a statistic originally developed to estimate the genome-wide fraction of admixture. f̂d is not subject to the same biases as D, and is better at identifying introgressed loci. Finally, we show that both D and f̂d outliers tend to cluster in regions of low absolute divergence (dXY), which can confound a recently proposed test for differentiating introgression from shared ancestral variation at individual loci.
%0 Journal Article
%1 martin2014evaluating
%A Martin, Simon H.
%A Davey, John W.
%A Jiggins, Chris D.
%D 2014
%I Cold Spring Harbor Labs Journals
%J Mol Bio Evol
%K ABBA-BABA Heliconius introgression local_statistics methods regions_of_introgression
%N 1
%P 244-257
%R doi:10.1093/molbev/msu269
%T Evaluating the use of ABBA-BABA statistics to locate introgressed loci
%U http://mbe.oxfordjournals.org/content/32/1/244
%V 32
%X Several methods have been proposed to test for introgression across genomes. One method tests for a genome-wide excess of shared derived alleles between taxa using Patterson?s D statistic, but does not establish which loci show such an excess or whether the excess is due to introgression or ancestral population structure. Several recent studies have extended the use of D by applying the statistic to small genomic regions, rather than genome-wide. Here, we use simulations and whole genome data from Heliconius butterflies to investigate the behavior of D in small genomic regions. We find that D is unreliable in this situation as it gives inflated values when effective population size is low, causing D outliers to cluster in genomic regions of reduced diversity. As an alternative, we propose a related statistic f̂d, a modified version of a statistic originally developed to estimate the genome-wide fraction of admixture. f̂d is not subject to the same biases as D, and is better at identifying introgressed loci. Finally, we show that both D and f̂d outliers tend to cluster in regions of low absolute divergence (dXY), which can confound a recently proposed test for differentiating introgression from shared ancestral variation at individual loci.
@article{martin2014evaluating,
abstract = {Several methods have been proposed to test for introgression across genomes. One method tests for a genome-wide excess of shared derived alleles between taxa using Patterson?s D statistic, but does not establish which loci show such an excess or whether the excess is due to introgression or ancestral population structure. Several recent studies have extended the use of D by applying the statistic to small genomic regions, rather than genome-wide. Here, we use simulations and whole genome data from Heliconius butterflies to investigate the behavior of D in small genomic regions. We find that D is unreliable in this situation as it gives inflated values when effective population size is low, causing D outliers to cluster in genomic regions of reduced diversity. As an alternative, we propose a related statistic f̂d, a modified version of a statistic originally developed to estimate the genome-wide fraction of admixture. f̂d is not subject to the same biases as D, and is better at identifying introgressed loci. Finally, we show that both D and f̂d outliers tend to cluster in regions of low absolute divergence (dXY), which can confound a recently proposed test for differentiating introgression from shared ancestral variation at individual loci.},
added-at = {2014-09-30T23:28:04.000+0200},
author = {Martin, Simon H. and Davey, John W. and Jiggins, Chris D.},
biburl = {https://www.bibsonomy.org/bibtex/2677e10e1295a43ba8aedcbd938421880/peter.ralph},
biorxiv = {10.1101/001347},
doi = {doi:10.1093/molbev/msu269},
interhash = {34de453c49ab6cd2d1df3a311b070c78},
intrahash = {677e10e1295a43ba8aedcbd938421880},
journal = {Mol Bio Evol},
keywords = {ABBA-BABA Heliconius introgression local_statistics methods regions_of_introgression},
number = 1,
pages = {244-257},
publisher = {Cold Spring Harbor Labs Journals},
timestamp = {2015-02-18T06:36:57.000+0100},
title = {Evaluating the use of ABBA-BABA statistics to locate introgressed loci},
url = {http://mbe.oxfordjournals.org/content/32/1/244},
volume = 32,
year = 2014
}