Scientific workflows have gained popularity for modeling and executing
in silico experiments by scientists for problem-solving. These workflows
primarily engage in computation and data transformation tasks to
perform scientific analysis in the Science Cloud. Increasingly workflows
are gaining use in managing the scientific data when they arrive
from external sensors and are prepared for becoming science ready
and available for use in the Cloud. While not directly part of the
scientific analysis, these workflows operating behind the Cloud on
behalf of the -data valetsᅢツᅡ play an important role in end-to-end
management of scientific data products. They share several features
with traditional scientific workflows: both are data intensive and
use Cloud resources. However, they also differ in significant respects,
for example, in the reliability required, scheduling constraints
and the use of provenance collected. In this article, we investigate
these two classes of workflows - Science Application workflows and
Data Preparation workflows - and use these to drive common and distinct
requirements from workflow systems for eScience in the Cloud. We
use workflow examples from two collaborations, the NEPTUNE oceanography
project and the Pan-STARRS astronomy project, to draw out our comparison.
Our analysis of these workflows classes can guide the evolution of
workflow systems to support emerging applications in the Cloud and
the Trident Scientific Workbench is one such workflow system that
has directly benefitted from this to meet the needs of these two
eScience projects.
%0 Conference Paper
%1 Simmhan:advcomp:2009
%A Simmhan, Yogesh
%A Barga, Roger
%A van Ingen, Catharine
%A Lazowska, Ed
%A Szalay, Alex
%B Conference on Advanced Engineering Computing and Applications in
Sciences (ADVCOMP)
%D 2009
%I IEEE
%K cloud, data escience, hpc, management, msr, panstarrs, peer reviewed trident, workflows,
%P 41-50
%R 10.1109/ADVCOMP.2009.14
%T Building the Trident Scientific Workflow Workbench for Data Management
in the Cloud
%X Scientific workflows have gained popularity for modeling and executing
in silico experiments by scientists for problem-solving. These workflows
primarily engage in computation and data transformation tasks to
perform scientific analysis in the Science Cloud. Increasingly workflows
are gaining use in managing the scientific data when they arrive
from external sensors and are prepared for becoming science ready
and available for use in the Cloud. While not directly part of the
scientific analysis, these workflows operating behind the Cloud on
behalf of the -data valetsᅢツᅡ play an important role in end-to-end
management of scientific data products. They share several features
with traditional scientific workflows: both are data intensive and
use Cloud resources. However, they also differ in significant respects,
for example, in the reliability required, scheduling constraints
and the use of provenance collected. In this article, we investigate
these two classes of workflows - Science Application workflows and
Data Preparation workflows - and use these to drive common and distinct
requirements from workflow systems for eScience in the Cloud. We
use workflow examples from two collaborations, the NEPTUNE oceanography
project and the Pan-STARRS astronomy project, to draw out our comparison.
Our analysis of these workflows classes can guide the evolution of
workflow systems to support emerging applications in the Cloud and
the Trident Scientific Workbench is one such workflow system that
has directly benefitted from this to meet the needs of these two
eScience projects.
@inproceedings{Simmhan:advcomp:2009,
abstract = {Scientific workflows have gained popularity for modeling and executing
in silico experiments by scientists for problem-solving. These workflows
primarily engage in computation and data transformation tasks to
perform scientific analysis in the Science Cloud. Increasingly workflows
are gaining use in managing the scientific data when they arrive
from external sensors and are prepared for becoming science ready
and available for use in the Cloud. While not directly part of the
scientific analysis, these workflows operating behind the Cloud on
behalf of the -data valetsᅢツᅡ play an important role in end-to-end
management of scientific data products. They share several features
with traditional scientific workflows: both are data intensive and
use Cloud resources. However, they also differ in significant respects,
for example, in the reliability required, scheduling constraints
and the use of provenance collected. In this article, we investigate
these two classes of workflows - Science Application workflows and
Data Preparation workflows - and use these to drive common and distinct
requirements from workflow systems for eScience in the Cloud. We
use workflow examples from two collaborations, the NEPTUNE oceanography
project and the Pan-STARRS astronomy project, to draw out our comparison.
Our analysis of these workflows classes can guide the evolution of
workflow systems to support emerging applications in the Cloud and
the Trident Scientific Workbench is one such workflow system that
has directly benefitted from this to meet the needs of these two
eScience projects.},
added-at = {2014-08-13T04:08:36.000+0200},
author = {Simmhan, Yogesh and Barga, Roger and van Ingen, Catharine and Lazowska, Ed and Szalay, Alex},
biburl = {https://www.bibsonomy.org/bibtex/25fe28c54a855c5a48848f57207699c25/simmhan},
booktitle = {Conference on Advanced Engineering Computing and Applications in
Sciences (ADVCOMP)},
doi = {10.1109/ADVCOMP.2009.14},
interhash = {fff3e50eecc78b005ff681e8967a6efe},
intrahash = {5fe28c54a855c5a48848f57207699c25},
keywords = {cloud, data escience, hpc, management, msr, panstarrs, peer reviewed trident, workflows,},
month = {October},
owner = {Simmhan},
pages = {41-50},
publisher = {IEEE},
timestamp = {2014-08-13T04:08:36.000+0200},
title = {Building the Trident Scientific Workflow Workbench for Data Management
in the Cloud},
year = 2009
}