Google's MapReduce programming model serves for processing large data sets in a massively parallel manner. We deliver the first rigorous description of the model including its advancement as Google's domain-specific language Sawzall. To this end, we reverse-engineer the seminal papers on MapReduce and Sawzall, and we capture our findings as an executable specification. We also identify and resolve some obscurities in the informal presentation given in the seminal papers. We use typed functional programming (specifically Haskell) as a tool for design recovery and executable specification. Our development comprises three components: (i) the basic program skeleton that underlies MapReduce computations; (ii) the opportunities for parallelism in executing MapReduce computations; (iii) the fundamental characteristics of Sawzall's aggregators as an advancement of the MapReduce approach. Our development does not formalize the more implementational aspects of an actual, distributed execution of MapReduce computations.
%0 Journal Article
%1 1290812
%A Lämmel, Ralf
%C Amsterdam, The Netherlands, The Netherlands
%D 2007
%I Elsevier North-Holland, Inc.
%J Sci. Comput. Program.
%K distributed grid large_scale mapreduce parallel retrieval
%N 3
%P 208--237
%R http://dx.doi.org/10.1016/j.scico.2007.07.001
%T Google's MapReduce programming model &\#8212; Revisited
%U http://portal.acm.org/citation.cfm?id=1290549.1290812
%V 68
%X Google's MapReduce programming model serves for processing large data sets in a massively parallel manner. We deliver the first rigorous description of the model including its advancement as Google's domain-specific language Sawzall. To this end, we reverse-engineer the seminal papers on MapReduce and Sawzall, and we capture our findings as an executable specification. We also identify and resolve some obscurities in the informal presentation given in the seminal papers. We use typed functional programming (specifically Haskell) as a tool for design recovery and executable specification. Our development comprises three components: (i) the basic program skeleton that underlies MapReduce computations; (ii) the opportunities for parallelism in executing MapReduce computations; (iii) the fundamental characteristics of Sawzall's aggregators as an advancement of the MapReduce approach. Our development does not formalize the more implementational aspects of an actual, distributed execution of MapReduce computations.
@article{1290812,
abstract = {Google's MapReduce programming model serves for processing large data sets in a massively parallel manner. We deliver the first rigorous description of the model including its advancement as Google's domain-specific language Sawzall. To this end, we reverse-engineer the seminal papers on MapReduce and Sawzall, and we capture our findings as an executable specification. We also identify and resolve some obscurities in the informal presentation given in the seminal papers. We use typed functional programming (specifically Haskell) as a tool for design recovery and executable specification. Our development comprises three components: (i) the basic program skeleton that underlies MapReduce computations; (ii) the opportunities for parallelism in executing MapReduce computations; (iii) the fundamental characteristics of Sawzall's aggregators as an advancement of the MapReduce approach. Our development does not formalize the more implementational aspects of an actual, distributed execution of MapReduce computations.},
added-at = {2007-12-06T03:55:09.000+0100},
address = {Amsterdam, The Netherlands, The Netherlands},
author = {L\"{a}mmel, Ralf},
biburl = {https://www.bibsonomy.org/bibtex/2ec28e4fa50ddcb678fa3d505b82571c5/jhammerb},
description = {Google's MapReduce programming model — Revisited},
doi = {http://dx.doi.org/10.1016/j.scico.2007.07.001},
interhash = {38193b3efc8520c267ec445819b77ccc},
intrahash = {ec28e4fa50ddcb678fa3d505b82571c5},
issn = {0167-6423},
journal = {Sci. Comput. Program.},
keywords = {distributed grid large_scale mapreduce parallel retrieval},
number = 3,
pages = {208--237},
publisher = {Elsevier North-Holland, Inc.},
timestamp = {2007-12-06T03:55:09.000+0100},
title = {Google's MapReduce programming model \&\#8212; Revisited},
url = {http://portal.acm.org/citation.cfm?id=1290549.1290812},
volume = 68,
year = 2007
}