As computational work becomes more and more integral to many aspects of
scientific research, computational reproducibility has become an issue of
increasing importance to computer systems researchers and domain scientists
alike. Though computational reproducibility seems more straight forward than
replicating physical experiments, the complex and rapidly changing nature of
computer environments makes being able to reproduce and extend such work a
serious challenge. In this paper, I explore common reasons that code developed
for one research project cannot be successfully executed or extended by
subsequent researchers. I review current approaches to these issues, including
virtual machines and workflow systems, and their limitations. I then examine
how the popular emerging technology Docker combines several areas from systems
research - such as operating system virtualization, cross-platform portability,
modular re-usable elements, versioning, and a `DevOps' philosophy, to address
these challenges. I illustrate this with several examples of Docker use with a
focus on the R statistical environment.
%0 Journal Article
%1 boettiger2014introduction
%A Boettiger, Carl
%C New York, NY, USA
%D 2014
%I ACM
%J ACM SIGOPS Operating Systems Review
%K reproducibility docker
%N 1
%P 71--79
%R 10.1145/2723872.2723882
%T An introduction to Docker for reproducible research, with examples from the R environment
%U http://dx.doi.org/10.1145/2723872.2723882
%V 49
%X As computational work becomes more and more integral to many aspects of
scientific research, computational reproducibility has become an issue of
increasing importance to computer systems researchers and domain scientists
alike. Though computational reproducibility seems more straight forward than
replicating physical experiments, the complex and rapidly changing nature of
computer environments makes being able to reproduce and extend such work a
serious challenge. In this paper, I explore common reasons that code developed
for one research project cannot be successfully executed or extended by
subsequent researchers. I review current approaches to these issues, including
virtual machines and workflow systems, and their limitations. I then examine
how the popular emerging technology Docker combines several areas from systems
research - such as operating system virtualization, cross-platform portability,
modular re-usable elements, versioning, and a `DevOps' philosophy, to address
these challenges. I illustrate this with several examples of Docker use with a
focus on the R statistical environment.
@article{boettiger2014introduction,
abstract = {{As computational work becomes more and more integral to many aspects of
scientific research, computational reproducibility has become an issue of
increasing importance to computer systems researchers and domain scientists
alike. Though computational reproducibility seems more straight forward than
replicating physical experiments, the complex and rapidly changing nature of
computer environments makes being able to reproduce and extend such work a
serious challenge. In this paper, I explore common reasons that code developed
for one research project cannot be successfully executed or extended by
subsequent researchers. I review current approaches to these issues, including
virtual machines and workflow systems, and their limitations. I then examine
how the popular emerging technology Docker combines several areas from systems
research - such as operating system virtualization, cross-platform portability,
modular re-usable elements, versioning, and a `DevOps' philosophy, to address
these challenges. I illustrate this with several examples of Docker use with a
focus on the R statistical environment.}},
added-at = {2018-12-07T09:10:16.000+0100},
address = {New York, NY, USA},
archiveprefix = {arXiv},
author = {Boettiger, Carl},
biburl = {https://www.bibsonomy.org/bibtex/29e87d6d6b005575f864ff14c2400e0a1/jpvaldes},
citeulike-article-id = {13603356},
citeulike-linkout-0 = {http://portal.acm.org/citation.cfm?id=2723882},
citeulike-linkout-1 = {http://arxiv.org/abs/1410.0846},
citeulike-linkout-2 = {http://arxiv.org/pdf/1410.0846},
citeulike-linkout-3 = {http://dx.doi.org/10.1145/2723872.2723882},
day = 2,
doi = {10.1145/2723872.2723882},
eprint = {1410.0846},
interhash = {80c34c5afdfe3d552ba3eecec233f88e},
intrahash = {9e87d6d6b005575f864ff14c2400e0a1},
issn = {01635980},
journal = {ACM SIGOPS Operating Systems Review},
keywords = {reproducibility docker},
month = oct,
number = 1,
pages = {71--79},
posted-at = {2017-03-17 07:39:11},
priority = {2},
publisher = {ACM},
timestamp = {2018-12-07T09:32:10.000+0100},
title = {{An introduction to Docker for reproducible research, with examples from the R environment}},
url = {http://dx.doi.org/10.1145/2723872.2723882},
volume = 49,
year = 2014
}