Motivation: The spectrum of somatic single-nucleotide variants in cancer genomes often reflects the signatures of multiple distinct mutational processes, which can provide clinically actionable insights into cancer etiology. Existing software tools for identifying and evaluating these mutational signatures do not scale to analyze large datasets containing thousands of individuals or millions of variants. Results: We introduce Helmsman, a program designed to rapidly generate mutation spectra matrices from arbitrarily large datasets. Helmsman is up to 300 times faster than existing methods and can provide more than a 100-fold reduction in memory usage, making mutation signature analysis tractable for any collection of single nucleotide variants, no matter how large.
%0 Journal Article
%1 carlson2018helmsman
%A Carlson, Jedidiah
%A Li, Jun
%A Zöllner, Sebastian
%D 2018
%I Cold Spring Harbor Laboratory
%J bioRxiv
%K context-dependent-mutation methods mutation_motif mutation_spectrum software
%R 10.1101/373076
%T Helmsman: fast and efficient generation of input matrices for mutation signature analysis
%U https://www.biorxiv.org/content/early/2018/07/20/373076
%X Motivation: The spectrum of somatic single-nucleotide variants in cancer genomes often reflects the signatures of multiple distinct mutational processes, which can provide clinically actionable insights into cancer etiology. Existing software tools for identifying and evaluating these mutational signatures do not scale to analyze large datasets containing thousands of individuals or millions of variants. Results: We introduce Helmsman, a program designed to rapidly generate mutation spectra matrices from arbitrarily large datasets. Helmsman is up to 300 times faster than existing methods and can provide more than a 100-fold reduction in memory usage, making mutation signature analysis tractable for any collection of single nucleotide variants, no matter how large.
@article{carlson2018helmsman,
abstract = {Motivation: The spectrum of somatic single-nucleotide variants in cancer genomes often reflects the signatures of multiple distinct mutational processes, which can provide clinically actionable insights into cancer etiology. Existing software tools for identifying and evaluating these mutational signatures do not scale to analyze large datasets containing thousands of individuals or millions of variants. Results: We introduce Helmsman, a program designed to rapidly generate mutation spectra matrices from arbitrarily large datasets. Helmsman is up to 300 times faster than existing methods and can provide more than a 100-fold reduction in memory usage, making mutation signature analysis tractable for any collection of single nucleotide variants, no matter how large.},
added-at = {2018-07-21T06:11:44.000+0200},
author = {Carlson, Jedidiah and Li, Jun and Z{\"o}llner, Sebastian},
biburl = {https://www.bibsonomy.org/bibtex/2b1398752e256d7c69da0f7172bfade11/peter.ralph},
doi = {10.1101/373076},
eprint = {https://www.biorxiv.org/content/early/2018/07/20/373076.full.pdf},
interhash = {73eef3cc4c630fdbb243182ceff6536a},
intrahash = {b1398752e256d7c69da0f7172bfade11},
journal = {bioRxiv},
keywords = {context-dependent-mutation methods mutation_motif mutation_spectrum software},
publisher = {Cold Spring Harbor Laboratory},
timestamp = {2018-07-21T06:11:44.000+0200},
title = {Helmsman: fast and efficient generation of input matrices for mutation signature analysis},
url = {https://www.biorxiv.org/content/early/2018/07/20/373076},
year = 2018
}