The output of a genome assembler generally comprises a collection of contiguous DNA sequences (contigs) whose relative placement along the genome is not defined. A procedure called scaffolding is commonly used to order and orient these contigs using paired read information. This ordering of contigs is an essential step when finishing and analyzing the data from a whole-genome shotgun project. Most recent assemblers include a scaffolding module; however, users have little control over the scaffolding algorithm or the information produced. We thus developed a general-purpose scaffolder, called Bambus, which affords users significant flexibility in controlling the scaffolding parameters. Bambus was used recently to scaffold the low-coverage draft dog genome data. Most significantly, Bambus enables the use of linking data other than that inferred from mate-pair information. For example, the sequence of a completed genome can be used to guide the scaffolding of a related organism. We present several applications of Bambus: support for finishing, comparative genomics, analysis of the haplotype structure of genomes, and scaffolding of a mammalian genome at low coverage. Bambus is available as an open-source package from our Web site.
%0 Journal Article
%1 Pop:2004aa
%A Pop, Mihai
%A Kosack, Daniel S
%A Salzberg, Steven L
%D 2004
%J Genome Res
%K Algorithms, Animals, Bacillus Bacterial, Benchmarking, Biology, Brucella, Computational Contig Databases, Dogs, Genetic, Genome, Genomics, Haplotypes, Mapping, Shewanella, Software Software, Staphylococcus, Validation, Wolbachia, anthracis
%N 1
%P 149--59
%R 10.1101/gr.1536204
%T Hierarchical scaffolding with Bambus
%V 14
%X The output of a genome assembler generally comprises a collection of contiguous DNA sequences (contigs) whose relative placement along the genome is not defined. A procedure called scaffolding is commonly used to order and orient these contigs using paired read information. This ordering of contigs is an essential step when finishing and analyzing the data from a whole-genome shotgun project. Most recent assemblers include a scaffolding module; however, users have little control over the scaffolding algorithm or the information produced. We thus developed a general-purpose scaffolder, called Bambus, which affords users significant flexibility in controlling the scaffolding parameters. Bambus was used recently to scaffold the low-coverage draft dog genome data. Most significantly, Bambus enables the use of linking data other than that inferred from mate-pair information. For example, the sequence of a completed genome can be used to guide the scaffolding of a related organism. We present several applications of Bambus: support for finishing, comparative genomics, analysis of the haplotype structure of genomes, and scaffolding of a mammalian genome at low coverage. Bambus is available as an open-source package from our Web site.
@article{Pop:2004aa,
abstract = {The output of a genome assembler generally comprises a collection of contiguous DNA sequences (contigs) whose relative placement along the genome is not defined. A procedure called scaffolding is commonly used to order and orient these contigs using paired read information. This ordering of contigs is an essential step when finishing and analyzing the data from a whole-genome shotgun project. Most recent assemblers include a scaffolding module; however, users have little control over the scaffolding algorithm or the information produced. We thus developed a general-purpose scaffolder, called Bambus, which affords users significant flexibility in controlling the scaffolding parameters. Bambus was used recently to scaffold the low-coverage draft dog genome data. Most significantly, Bambus enables the use of linking data other than that inferred from mate-pair information. For example, the sequence of a completed genome can be used to guide the scaffolding of a related organism. We present several applications of Bambus: support for finishing, comparative genomics, analysis of the haplotype structure of genomes, and scaffolding of a mammalian genome at low coverage. Bambus is available as an open-source package from our Web site.},
added-at = {2007-09-17T20:19:41.000+0200},
affiliation = {The Institute for Genomic Research (TIGR), Rockville, Maryland 20850, USA. mpop@tigr.org},
author = {Pop, Mihai and Kosack, Daniel S and Salzberg, Steven L},
biburl = {https://www.bibsonomy.org/bibtex/2a94162d90a98876b571cb8460decb1c0/dzerbino},
doi = {10.1101/gr.1536204},
interhash = {c7f2e19467f19f38118b87b8392a6069},
intrahash = {a94162d90a98876b571cb8460decb1c0},
issue = {1},
journal = {Genome Res},
keywords = {Algorithms, Animals, Bacillus Bacterial, Benchmarking, Biology, Brucella, Computational Contig Databases, Dogs, Genetic, Genome, Genomics, Haplotypes, Mapping, Shewanella, Software Software, Staphylococcus, Validation, Wolbachia, anthracis},
language = {English},
local-url = {file://localhost/Users/danielzerbino/Documents/Papers/2004/Pop/Genome%20Res%202004%20Pop.pdf},
month = Jan,
number = 1,
pages = {149--59},
pii = {14/1/149},
pmid = {14707177},
timestamp = {2007-09-17T20:19:41.000+0200},
title = {Hierarchical scaffolding with Bambus},
uri = {papers://055852FE-1648-42FE-91D0-8CA474D2B905/Paper/p18},
volume = 14,
year = 2004
}