We previously described the whole-genome assembly program Arachne, presenting assemblies of simulated data for small to mid-sized genomes. Here we describe algorithmic adaptations to the program, allowing for assembly of mammalian-size genomes, and also improving the assembly of smaller genomes. Three principal changes were simultaneously made and applied to the assembly of the mouse genome, during a six-month period of development: (1) Supercontigs (scaffolds) were iteratively broken and rejoined using several criteria, yielding a 64-fold increase in length (N50), and apparent elimination of all global misjoins; (2) gaps between contigs in supercontigs were filled (partially or completely) by insertion of reads, as suggested by pairing within the supercontig, increasing the N50 contig length by 50%; (3) memory usage was reduced fourfold. The outcome of this mouse assembly and its analysis are described in (Mouse Genome Sequencing Consortium 2002).
%0 Journal Article
%1 Jaffe:2003aa
%A Jaffe, David B
%A Butler, Jonathan
%A Gnerre, Sante
%A Mauceli, Evan
%A Lindblad-Toh, Kerstin
%A Mesirov, Jill P
%A Zody, Michael C
%A Lander, Eric S
%D 2003
%J Genome Res
%K Animals, Biology, Computational Contig Genome, Humans Mapping, Mice, Software,
%N 1
%P 91--6
%R 10.1101/gr.828403
%T Whole-genome sequence assembly for mammalian genomes: Arachne 2
%V 13
%X We previously described the whole-genome assembly program Arachne, presenting assemblies of simulated data for small to mid-sized genomes. Here we describe algorithmic adaptations to the program, allowing for assembly of mammalian-size genomes, and also improving the assembly of smaller genomes. Three principal changes were simultaneously made and applied to the assembly of the mouse genome, during a six-month period of development: (1) Supercontigs (scaffolds) were iteratively broken and rejoined using several criteria, yielding a 64-fold increase in length (N50), and apparent elimination of all global misjoins; (2) gaps between contigs in supercontigs were filled (partially or completely) by insertion of reads, as suggested by pairing within the supercontig, increasing the N50 contig length by 50%; (3) memory usage was reduced fourfold. The outcome of this mouse assembly and its analysis are described in (Mouse Genome Sequencing Consortium 2002).
@article{Jaffe:2003aa,
abstract = {We previously described the whole-genome assembly program Arachne, presenting assemblies of simulated data for small to mid-sized genomes. Here we describe algorithmic adaptations to the program, allowing for assembly of mammalian-size genomes, and also improving the assembly of smaller genomes. Three principal changes were simultaneously made and applied to the assembly of the mouse genome, during a six-month period of development: (1) Supercontigs (scaffolds) were iteratively broken and rejoined using several criteria, yielding a 64-fold increase in length (N50), and apparent elimination of all global misjoins; (2) gaps between contigs in supercontigs were filled (partially or completely) by insertion of reads, as suggested by pairing within the supercontig, increasing the N50 contig length by 50%; (3) memory usage was reduced fourfold. The outcome of this mouse assembly and its analysis are described in (Mouse Genome Sequencing Consortium 2002).},
added-at = {2007-09-17T20:19:41.000+0200},
affiliation = {Whitehead Institute/MIT Center for Genome Research, Cambridge, Massachusetts 02141, USA. jaffe@genome.wi.mit.edu},
author = {Jaffe, David B and Butler, Jonathan and Gnerre, Sante and Mauceli, Evan and Lindblad-Toh, Kerstin and Mesirov, Jill P and Zody, Michael C and Lander, Eric S},
biburl = {https://www.bibsonomy.org/bibtex/2c9787f374c7933c38f7ab5214fb93862/dzerbino},
doi = {10.1101/gr.828403},
interhash = {a56b449830923e2399817ee015844488},
intrahash = {c9787f374c7933c38f7ab5214fb93862},
issue = {1},
journal = {Genome Res},
keywords = {Animals, Biology, Computational Contig Genome, Humans Mapping, Mice, Software,},
language = {English},
local-url = {file://localhost/Users/danielzerbino/Documents/Papers/2003/Jaffe/Genome%20Res%202003%20Jaffe.pdf},
month = Jan,
number = 1,
pages = {91--6},
pmid = {12529310},
timestamp = {2007-09-17T20:19:42.000+0200},
title = {Whole-genome sequence assembly for mammalian genomes: Arachne 2},
uri = {papers://055852FE-1648-42FE-91D0-8CA474D2B905/Paper/p12},
volume = 13,
year = 2003
}