The de Bruijn graph data structure is widely used in next-generation sequencing (NGS). Many programs, e.g. de novo assemblers, rely on in-memory representation of this graph. However, current techniques for representing the de Bruijn graph of a human genome require a large amount of memory (≥ 30 GB).
We propose a new encoding of the de Bruijn graph, which occupies an order of magnitude less space than current representations. The encoding is based on a Bloom filter, with an additional structure to remove critical false positives. An assembly software implementing this structure, Minia, performed a complete de novo assembly of human genome short reads using 5.7 GB of memory in 23 hours.
%0 Book Section
%1 year=2012
%A Chikhi, Rayan
%A Rizk, Guillaume
%B Algorithms in Bioinformatics
%D 2012
%E Raphael, Ben
%E Tang, Jijun
%I Springer Berlin Heidelberg
%K bloom de-bruijn
%P 236-248
%R 10.1007/978-3-642-33122-0_19
%T Space-Efficient and Exact de Bruijn Graph Representation Based on a Bloom Filter
%V 7534
%X The de Bruijn graph data structure is widely used in next-generation sequencing (NGS). Many programs, e.g. de novo assemblers, rely on in-memory representation of this graph. However, current techniques for representing the de Bruijn graph of a human genome require a large amount of memory (≥ 30 GB).
We propose a new encoding of the de Bruijn graph, which occupies an order of magnitude less space than current representations. The encoding is based on a Bloom filter, with an additional structure to remove critical false positives. An assembly software implementing this structure, Minia, performed a complete de novo assembly of human genome short reads using 5.7 GB of memory in 23 hours.
%@ 978-3-642-33121-3
@incollection{year={2012},
abstract = {The de Bruijn graph data structure is widely used in next-generation sequencing (NGS). Many programs, e.g. de novo assemblers, rely on in-memory representation of this graph. However, current techniques for representing the de Bruijn graph of a human genome require a large amount of memory (≥ 30 GB).
We propose a new encoding of the de Bruijn graph, which occupies an order of magnitude less space than current representations. The encoding is based on a Bloom filter, with an additional structure to remove critical false positives. An assembly software implementing this structure, Minia, performed a complete de novo assembly of human genome short reads using 5.7 GB of memory in 23 hours.},
added-at = {2013-04-08T17:29:02.000+0200},
author = {Chikhi, Rayan and Rizk, Guillaume},
biburl = {https://www.bibsonomy.org/bibtex/2400fca97dd95e15eaccdbcf1315600cc/ytyoun},
booktitle = {Algorithms in Bioinformatics},
doi = {10.1007/978-3-642-33122-0_19},
editor = {Raphael, Ben and Tang, Jijun},
interhash = {b57db0bdb289386c3cb845a94b5bf5d2},
intrahash = {400fca97dd95e15eaccdbcf1315600cc},
isbn = {978-3-642-33121-3},
keywords = {bloom de-bruijn},
pages = {236-248},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
timestamp = {2013-04-08T17:29:02.000+0200},
title = {Space-Efficient and Exact de Bruijn Graph Representation Based on a Bloom Filter},
volume = 7534,
year = 2012
}