@article{Zhang:2005aa, title = {An Eulerian path approach to local multiple alignment for DNA sequences}, author = {Yu Zhang and Michael S Waterman}, journal = {Proc Natl Acad Sci USA}, month = {Feb}, number = 5, pages = {1285--90}, volume = 102, year = 2005, pii = {0409240102}, pmid = {15668398}, issue = {5}, affiliation = {Department of Mathematics, University of Southern California, 1042 West 36th Place, DRB289, Los Angeles, CA 90089-1113, USA.}, language = {English}, uri = {papers://055852FE-1648-42FE-91D0-8CA474D2B905/Paper/p7}, url = {file://localhost/Users/danielzerbino/Documents/Papers/2005/Zhang/Proc%20Natl%20Acad%20Sci%20USA%202005%20Zhang.pdf}, doi = {10.1073/pnas.0409240102}, abstract = {Expensive computation in handling a large number of sequences limits the application of local multiple sequence alignment. We present an Eulerian path approach to local multiple alignment for DNA sequences. The computational time and memory usage of this approach is approximately linear to the total size of sequences analyzed; hence, it can handle thousands of sequences or millions of letters simultaneously. By constructing a De Bruijn graph, most of the conserved segments are amplified as heavy Eulerian paths in the graph, and the original patterns distributed in sequences are recovered even if they do not exist in any single sequence. This approach can accurately detect unknown conserved regions, for both short and long, conserved and degenerate patterns. We further present a Poisson heuristic to estimate the significance of a local multiple alignment. The performance of our method is demonstrated by finding Alu repeats in the human genome. We compare the results with Alus marked by repeatmasker, where the two programs are in good agreement. Our method is robust under various conditions and superior to other methods in terms of efficiency and accuracy.}, biburl = {http://www.bibsonomy.org/bibtex/2094f2ea8388f71a5fb448d2e3517059b/dzerbino}, keywords = {Molecular Alignment, Nucleic Sequence Models, Data, Acid Genetic, Acid, Base Repetitive Sequences, Homology, Sequence, DNA,} } @article{Pevzner:2004aa, title = {De novo repeat classification and fragment assembly}, author = {Pavel A Pevzner and Paul A Pevzner and Haixu Tang and Glenn Tesler}, journal = {Genome Res}, month = {Sep}, number = 9, pages = {1786--96}, volume = 14, year = 2004, pii = {14/9/1786}, pmid = {15342561}, issue = {9}, affiliation = {Department of Computer Science and Engineering, University of California, San Diego, La Jolla, California 92093, USA.}, language = {English}, uri = {papers://055852FE-1648-42FE-91D0-8CA474D2B905/Paper/p26}, url = {file://localhost/Users/danielzerbino/Documents/Papers/2004/Pevzner/Genome%20Res%202004%20Pevzner.pdf}, doi = {10.1101/gr.2395204}, abstract = {Repetitive sequences make up a significant fraction of almost any genome, and an important and still open question in bioinformatics is how to represent all repeats in DNA sequences. We propose a new approach to repeat classification that represents all repeats in a genome as a mosaic of sub-repeats. Our key algorithmic idea also leads to new approaches to multiple alignment and fragment assembly. In particular, we show that our FragmentGluer assembler improves on Phrap and ARACHNE in assembly of BACs and bacterial genomes.}, biburl = {http://www.bibsonomy.org/bibtex/251d90db8174d7d1da830b654951e5ea0/dzerbino}, keywords = {Analysis, Humans, Cluster Human, Family Mapping, Bacterial, Acid, Sequences, Contig Genome, Repetitive Biology, Nucleic Linkage (Genetics), Algorithms, Artificial, Computational Multigene Chromosomes, Sequence Alignment,} } @article{Myers:2000aa, title = {A whole-genome assembly of Drosophila}, author = {E W Myers and G G Sutton and A L Delcher and I M Dew and D P Fasulo and M J Flanigan and S A Kravitz and C M Mobarry and K H Reinert and K A Remington and E L Anson and R A Bolanos and H H Chou and C M Jordan and A L Halpern and S Lonardi and E M Beasley and R C Brandon and L Chen and P J Dunn and Z Lai and Y Liang and D R Nusskern and M Zhan and Q Zhang and X Zheng and G M Rubin and M D Adams and J C Venter}, journal = {Science}, month = {Mar}, number = 5461, pages = {2196--204}, volume = 287, year = 2000, pii = {8395}, pmid = {10731133}, issue = {5461}, affiliation = {Celera Genomics, Inc., 45 West Gude Drive, Rockville, MD 20850, USA. Gene.Myers@celera.com}, language = {English}, uri = {papers://055852FE-1648-42FE-91D0-8CA474D2B905/Paper/p20}, url = {file://localhost/Users/danielzerbino/Documents/Papers/2000/Myers/Science%202000%20Myers.pdf}, abstract = {We report on the quality of a whole-genome assembly of Drosophila melanogaster and the nature of the computer algorithms that accomplished it. Three independent external data sources essentially agree with and support the assembly's sequence and ordering of contigs across the euchromatic portion of the genome. In addition, there are isolated contigs that we believe represent nonrepetitive pockets within the heterochromatin of the centromeres. Comparison with a previously sequenced 2.9- megabase region indicates that sequencing accuracy within nonrepetitive segments is greater than 99. 99% without manual curation. As such, this initial reconstruction of the Drosophila sequence should be of substantial value to the scientific community.}, biburl = {http://www.bibsonomy.org/bibtex/211f0d08db68b4f73ce33ffab95ffef98/dzerbino}, keywords = {Chromatin, Genes, Drosophila Heterochromatin, Genome, Nucleic melanogaster, Sequence Acid, Tagged Euchromatin, Data, Chromosome Mapping, DNA, Physical Analysis, Repetitive Contig Computational Algorithms, Molecular Sites, Insect, Animals, Sequences, Biology} } @article{Salzberg:2005aa, title = {Beware of mis-assembled genomes}, author = {Steven L Salzberg and James A Yorke}, journal = {Bioinformatics}, month = {Dec}, number = 24, pages = {4320--1}, volume = 21, year = 2005, pii = {21/24/4320}, pmid = {16332717}, issue = {24}, language = {English}, uri = {papers://055852FE-1648-42FE-91D0-8CA474D2B905/Paper/p28}, url = {file://localhost/Users/danielzerbino/Documents/Papers/2005/Salzberg/Bioinformatics%202005%20Salzberg.pdf}, doi = {10.1093/bioinformatics/bti769}, biburl = {http://www.bibsonomy.org/bibtex/23b29f08c2272b6be4fe71f8be959c4cc/dzerbino}, keywords = {Sequences, Software, Nucleic Biology, Computational Genomics, Acid, Databases, Acid Repetitive} }