@article{DeanGhemawat08CACM,
title = {{MapReduce:} Simplified Data Processing on Large Clusters},
author = {Jeffrey Dean and Sanjay Ghemawat},
journal = {Communications of the ACM},
number = {1},
pages = {107-113},
url = {http://dx.doi.org/10.1145/1327452.1327492},
volume = {51},
year = {2008},
abstract = {MapReduce is a programming model and an associated implementation
for processing and generating large datasets that is amenable to
a broad variety of real-world tasks. Users specify the computation
in terms of a map and a reduce function, and the underlying runtime
system automatically parallelizes the computation across large-scale
clusters of machines, handles machine failures, and schedules inter-machine
communication to make efficient use of the network and disks. Programmers
find the system easy to use: more than ten thousand distinct MapReduce
programs have been implemented internally at Google over the past
four years, and an average of one hundred thousand MapReduce jobs
are executed on Google's clusters every day, processing a total of
more than twenty petabytes of data per day.},
issn = {0001-0782}, timestamp = {2008.02.05}, file = {ACM Digital Library:2008/DeanGhemawat08CACM.pdf:PDF}, owner = {flint},
keywords = {mapreduce }
}