Motivation: The commercial launch of 454 pyrosequencing in 2005 was a milestone in genome sequencing in terms of performance and cost. Throughout the three available releases, average read lengths have increased to ∼500 base pairs and are thus approaching read lengths obtained from traditional Sanger sequencing. Study design of sequencing projects would benefit from being able to simulate experiments.Results: We explore 454 raw data to investigate its characteristics and derive empirical distributions for the flow values generated by pyrosequencing. Based on our findings, we implement Flowsim, a simulator that generates realistic pyrosequencing data files of arbitrary size from a given set of input DNA sequences. We finally use our simulator to examine the impact of sequence lengths on the results of concrete whole-genome assemblies, and we suggest its use in planning of sequencing projects, benchmarking of assembly methods and other fields.Availability: Flowsim is freely available under the General Public License from http://blog.malde.org/index.php/flowsim/Contact: susanne.balzer@imr.no; ketil.malde@imr.no
%0 Journal Article
%1 Balzer15092010
%A Balzer, Susanne
%A Malde, Ketil
%A Lanzén, Anders
%A Sharma, Animesh
%A Jonassen, Inge
%D 2010
%J Bioinformatics
%K ion
%N 18
%P i420-i425
%R 10.1093/bioinformatics/btq365
%T Characteristics of 454 pyrosequencing data—enabling realistic simulation with flowsim
%V 26
%X Motivation: The commercial launch of 454 pyrosequencing in 2005 was a milestone in genome sequencing in terms of performance and cost. Throughout the three available releases, average read lengths have increased to ∼500 base pairs and are thus approaching read lengths obtained from traditional Sanger sequencing. Study design of sequencing projects would benefit from being able to simulate experiments.Results: We explore 454 raw data to investigate its characteristics and derive empirical distributions for the flow values generated by pyrosequencing. Based on our findings, we implement Flowsim, a simulator that generates realistic pyrosequencing data files of arbitrary size from a given set of input DNA sequences. We finally use our simulator to examine the impact of sequence lengths on the results of concrete whole-genome assemblies, and we suggest its use in planning of sequencing projects, benchmarking of assembly methods and other fields.Availability: Flowsim is freely available under the General Public License from http://blog.malde.org/index.php/flowsim/Contact: susanne.balzer@imr.no; ketil.malde@imr.no
@article{Balzer15092010,
abstract = {Motivation: The commercial launch of 454 pyrosequencing in 2005 was a milestone in genome sequencing in terms of performance and cost. Throughout the three available releases, average read lengths have increased to ∼500 base pairs and are thus approaching read lengths obtained from traditional Sanger sequencing. Study design of sequencing projects would benefit from being able to simulate experiments.Results: We explore 454 raw data to investigate its characteristics and derive empirical distributions for the flow values generated by pyrosequencing. Based on our findings, we implement Flowsim, a simulator that generates realistic pyrosequencing data files of arbitrary size from a given set of input DNA sequences. We finally use our simulator to examine the impact of sequence lengths on the results of concrete whole-genome assemblies, and we suggest its use in planning of sequencing projects, benchmarking of assembly methods and other fields.Availability: Flowsim is freely available under the General Public License from http://blog.malde.org/index.php/flowsim/Contact: susanne.balzer@imr.no; ketil.malde@imr.no},
added-at = {2013-03-31T16:31:28.000+0200},
author = {Balzer, Susanne and Malde, Ketil and Lanzén, Anders and Sharma, Animesh and Jonassen, Inge},
biburl = {https://www.bibsonomy.org/bibtex/245880f1f82d9ec660d752ced61f7f087/ytyoun},
doi = {10.1093/bioinformatics/btq365},
interhash = {4cd38570bbaa5bfdb87d2664ad3cb9ae},
intrahash = {45880f1f82d9ec660d752ced61f7f087},
journal = {Bioinformatics},
keywords = {ion},
number = 18,
pages = {i420-i425},
timestamp = {2013-03-31T16:31:28.000+0200},
title = {Characteristics of 454 pyrosequencing data—enabling realistic simulation with flowsim},
volume = 26,
year = 2010
}