One possible explanation for the substantial organismal differences be-
tween humans and chimpanzees is that there have been changes in gene reg-
ulation. Given what is known about transcription factor binding sites, this
motivates the following probability question: given a 1000 nucleotide re-
gion in our genome, how long does it take for a specified six to nine letter
word to appear in that region in some individual? Stone and Wray Mol. Biol.
Evol. 18 (2001) 1764–1770 computed 5,950 years as the answer for six let-
ter words. Here, we will show that for words of length 6, the average waiting
time is 100,000 years, while for words of length 8, the waiting time has mean
375,000 years when there is a 7 out of 8 letter match in the population consen-
sus sequence (an event of probability roughly 5/16) and has mean 650 million
years when there is not. Fortunately, in biological reality, the match to the tar-
get word does not have to be perfect for binding to occur. If we model this
by saying that a 7 out of 8 letter match is good enough, the mean reduces to
about 60,000 years.
%0 Journal Article
%1 durrett2007waiting
%A Durrett, Richard
%A Schmidt, Deena
%D 2007
%J Ann. Appl. Probab.
%K binding_sites regulatory_sequence_turnover sequence_Markov_chain waiting_times
%N 1
%P 1--32
%R 10.1214/105051606000000619
%T Waiting for regulatory sequences to appear
%U http://dx.doi.org/10.1214/105051606000000619
%V 17
%X One possible explanation for the substantial organismal differences be-
tween humans and chimpanzees is that there have been changes in gene reg-
ulation. Given what is known about transcription factor binding sites, this
motivates the following probability question: given a 1000 nucleotide re-
gion in our genome, how long does it take for a specified six to nine letter
word to appear in that region in some individual? Stone and Wray Mol. Biol.
Evol. 18 (2001) 1764–1770 computed 5,950 years as the answer for six let-
ter words. Here, we will show that for words of length 6, the average waiting
time is 100,000 years, while for words of length 8, the waiting time has mean
375,000 years when there is a 7 out of 8 letter match in the population consen-
sus sequence (an event of probability roughly 5/16) and has mean 650 million
years when there is not. Fortunately, in biological reality, the match to the tar-
get word does not have to be perfect for binding to occur. If we model this
by saying that a 7 out of 8 letter match is good enough, the mean reduces to
about 60,000 years.
@article{durrett2007waiting,
abstract = {One possible explanation for the substantial organismal differences be-
tween humans and chimpanzees is that there have been changes in gene reg-
ulation. Given what is known about transcription factor binding sites, this
motivates the following probability question: given a 1000 nucleotide re-
gion in our genome, how long does it take for a specified six to nine letter
word to appear in that region in some individual? Stone and Wray [Mol. Biol.
Evol. 18 (2001) 1764–1770] computed 5,950 years as the answer for six let-
ter words. Here, we will show that for words of length 6, the average waiting
time is 100,000 years, while for words of length 8, the waiting time has mean
375,000 years when there is a 7 out of 8 letter match in the population consen-
sus sequence (an event of probability roughly 5/16) and has mean 650 million
years when there is not. Fortunately, in biological reality, the match to the tar-
get word does not have to be perfect for binding to occur. If we model this
by saying that a 7 out of 8 letter match is good enough, the mean reduces to
about 60,000 years.
},
added-at = {2013-09-06T00:58:56.000+0200},
author = {Durrett, Richard and Schmidt, Deena},
biburl = {https://www.bibsonomy.org/bibtex/2b18d520932c9195ef41d9b46ea7fc6bd/peter.ralph},
doi = {10.1214/105051606000000619},
fjournal = {The Annals of Applied Probability},
interhash = {726b5130ff4f65ccdd5c57dfec334e65},
intrahash = {b18d520932c9195ef41d9b46ea7fc6bd},
issn = {1050-5164},
journal = {Ann. Appl. Probab.},
keywords = {binding_sites regulatory_sequence_turnover sequence_Markov_chain waiting_times},
mrclass = {92D10 (60C05 60F05 92D20)},
mrnumber = {2292578 (2007j:92034)},
mrreviewer = {David J. Aldous},
number = 1,
pages = {1--32},
timestamp = {2013-09-06T00:58:56.000+0200},
title = {Waiting for regulatory sequences to appear},
url = {http://dx.doi.org/10.1214/105051606000000619},
volume = 17,
year = 2007
}