| Authors: |
Guillaume Blin
and Eric Blais
and Danny Hermelin
and Pierre Guillon
and Mathieu Blanchette
and Nadia El-Mabrouk
|
| Tags: |
Algorithms,
Analysis,
Biology,
Computational
DNA,
Genome,
Genomics,
Sequence
Software
|
| Abstract: |
A preliminary step to most comparative genomics studies is the annotation of chromosomes as ordered sequences of genes. Different genetic mapping techniques often give rise to different maps with unequal gene content and sets of unordered neighboring genes. Only partial orders can thus be obtained from combining such maps. However, once a total order O is known for a given genome, it can be used as a reference to order genes of a closely related species characterized by a partial order P. Our goal is to find a linearization of P that is as close as possible to O, in term of a given genomic distance. We first prove NP-completeness complexity results considering the breakpoint and the common interval distances. We then focus on the breakpoint distance and give a dynamic programming algorithm whose running time is exponential for general partial orders, but polynomial when the partial order is derived from a bounded number of genetic maps. A time-efficient greedy heuristic is then given for the general case and is empirically shown to produce solutions within 10% of the optimal solution, on simulated data. Applications to the analysis of grass genomes are presented. |
@article{Blin:2007aa,
title = {Gene maps linearization using genomic rearrangement distances},
author = {Guillaume Blin and Eric Blais and Danny Hermelin and Pierre Guillon and Mathieu Blanchette and Nadia El-Mabrouk},
journal = {J Comput Biol},
month = {May},
number = {4},
pages = {394--407},
volume = {14},
year = {2007},
abstract = {A preliminary step to most comparative genomics studies is the annotation of chromosomes as ordered sequences of genes. Different genetic mapping techniques often give rise to different maps with unequal gene content and sets of unordered neighboring genes. Only partial orders can thus be obtained from combining such maps. However, once a total order O is known for a given genome, it can be used as a reference to order genes of a closely related species characterized by a partial order P. Our goal is to find a linearization of P that is as close as possible to O, in term of a given genomic distance. We first prove NP-completeness complexity results considering the breakpoint and the common interval distances. We then focus on the breakpoint distance and give a dynamic programming algorithm whose running time is exponential for general partial orders, but polynomial when the partial order is derived from a bounded number of genetic maps. A time-efficient greedy heuristic is then given for the general case and is empirically shown to produce solutions within 10% of the optimal solution, on simulated data. Applications to the analysis of grass genomes are presented.},
doi = {10.1089/cmb.2007.A002}, local-url = {file://localhost/Users/danielzerbino/Documents/Papers/2007/Blin/J%20Comput%20Biol%202007%20Blin.pdf}, language = {English}, issue = {4}, pmid = {17572019}, uri = {papers://055852FE-1648-42FE-91D0-8CA474D2B905/Paper/p37}, affiliation = {IGM-LabInfo, UMR CNRS 8049, Universit{\'e} Paris-Est, Marne-la-Vall{\'e}e, France. gblin@univ-mlv.fr},
keywords = {Algorithms, Analysis, Biology, Computational DNA, Genome, Genomics, Sequence Software }
}