@article{DeanGhemawat08cacm, title = {{MapReduce:} Simplified Data Processing on Large Clusters}, author = {Jeffrey Dean and Sanjay Ghemawat}, journal = {Communications of the ACM}, number = {1}, pages = {107-113}, url = {http://dx.doi.org/10.1145/1327452.1327492}, volume = {51}, year = {2008}, biburl = {http://www.bibsonomy.org/bibtex/2cfb6379b9a2177c3289f9f2cd84a3ff9/flint63}, abstract = {MapReduce is a programming model and an associated implementation for processing and generating large datasets that is amenable to a broad variety of real-world tasks. Users specify the computation in terms of a map and a reduce function, and the underlying runtime system automatically parallelizes the computation across large-scale clusters of machines, handles machine failures, and schedules inter-machine communication to make efficient use of the network and disks. Programmers find the system easy to use: more than ten thousand distinct MapReduce programs have been implemented internally at Google over the past four years, and an average of one hundred thousand MapReduce jobs are executed on Google's clusters every day, processing a total of more than twenty petabytes of data per day.}, issn = {0001-0782}, timestamp = {2008.02.05}, file = {ACM Digital Library:2008/DeanGhemawat08cacm.pdf:PDF}, owner = {flint}, keywords = {acm algorithm data google network paper processing v0805 } } @article{journals/tocs/ChangDGHWBCFG08, title = {Bigtable: A Distributed Storage System for Structured Data.}, author = {Fay Chang and Jeffrey Dean and Sanjay Ghemawat and Wilson C. Hsieh and Deborah A. Wallach and Michael Burrows and Tushar Chandra and Andrew Fikes and Robert E. Gruber}, journal = {ACM Trans. Comput. Syst.}, number = {2}, url = {http://dblp.uni-trier.de/db/journals/tocs/tocs26.html#ChangDGHWBCFG08}, volume = {26}, year = {2008}, biburl = {http://www.bibsonomy.org/bibtex/2dad7cea5bc24aa82dd4ba9cfe729222c/dblp}, description = {dblp}, ee = {http://doi.acm.org/10.1145/1365815.1365816}, date = {2008-06-19}, keywords = {dblp } } @book{ghemawat1998, title = {Games businesses play : cases and models}, address = {MIT Press}, annote = {XVI, 255 S}, author = {{Pankaj} Ghemawat}, edition = {2. print}, howpublished = {Cambridge, Mass. [u.a.]}, url = {http://gso.gbv.de/DB=2.1/CMD?ACT=SRCHA&SRT=YOP&IKT=1016&TRM=ppn+303415150&sourceid=fbw_bibsonomy}, year = {1998}, biburl = {http://www.bibsonomy.org/bibtex/242e1d01d04beca0fd48cbde15ae343d8/fbw}, description = {imported}, isbn = {0-262-07182-7}, keywords = {imported } } @inproceedings{1267323, title = {Bigtable: a distributed storage system for structured data}, address = {Berkeley, CA, USA}, author = {Fay Chang and Jeffrey Dean and Sanjay Ghemawat and Wilson C. Hsieh and Deborah A. Wallach and Mike Burrows and Tushar Chandra and Andrew Fikes and Robert E. Gruber}, booktitle = {USENIX'06: Proceedings of the 7th conference on USENIX Symposium on Operating Systems Design and Implementation}, pages = {15--15}, publisher = {USENIX Association}, year = {2006}, biburl = {http://www.bibsonomy.org/bibtex/2b42a1e40cc01965650c298d781f44959/chesteve}, location = {Seattle, WA}, keywords = {google } } @article{DeanGhemawat08CACM, title = {{MapReduce:} Simplified Data Processing on Large Clusters}, author = {Jeffrey Dean and Sanjay Ghemawat}, journal = {Communications of the ACM}, number = {1}, pages = {107-113}, url = {http://dx.doi.org/10.1145/1327452.1327492}, volume = {51}, year = {2008}, biburl = {http://www.bibsonomy.org/bibtex/2cfb6379b9a2177c3289f9f2cd84a3ff9/castagna}, abstract = {MapReduce is a programming model and an associated implementation for processing and generating large datasets that is amenable to a broad variety of real-world tasks. Users specify the computation in terms of a map and a reduce function, and the underlying runtime system automatically parallelizes the computation across large-scale clusters of machines, handles machine failures, and schedules inter-machine communication to make efficient use of the network and disks. Programmers find the system easy to use: more than ten thousand distinct MapReduce programs have been implemented internally at Google over the past four years, and an average of one hundred thousand MapReduce jobs are executed on Google's clusters every day, processing a total of more than twenty petabytes of data per day.}, issn = {0001-0782}, timestamp = {2008.02.05}, file = {ACM Digital Library:2008/DeanGhemawat08CACM.pdf:PDF}, owner = {flint}, keywords = {mapreduce } } @article{journals/cacm/DeanG08, title = {MapReduce: simplified data processing on large clusters.}, author = {Jeffrey Dean and Sanjay Ghemawat}, journal = {Commun. ACM}, number = {1}, pages = {107-113}, url = {http://dblp.uni-trier.de/db/journals/cacm/cacm51.html#DeanG08}, volume = {51}, year = {2008}, biburl = {http://www.bibsonomy.org/bibtex/29c0106df8a93c5707ee86edd539bc3b2/dblp}, description = {dblp}, ee = {http://doi.acm.org/10.1145/1327452.1327492}, date = {2008-01-17}, keywords = {dblp } } @url{Casadesus-Masanell:2003, title = {Dynamic Mixed Duopoly: A Model Motivated by Linux vs. Windows}, author = {Ramon Casadesus-Masanell and Pankaj Ghemawat}, journal = {Harvard Business School Research Papers}, number = {D/519}, url = {http://opensource.mit.edu/papers/masanellghemawat.pdf}, year = {2003}, biburl = {http://www.bibsonomy.org/bibtex/29e2bcbf5b41eebaa44cc1af712916b37/torstenschuenemann}, urldate = {10.10.2007}, date-modified = {2007-11-03 10:39:05 +0100}, date-added = {2007-11-03 10:39:05 +0100}, keywords = {OpenSource } } @inproceedings{chang2006bigtable, title = {Bigtable: A Distributed Storage System for Structured Data}, author = {Fay Chang and Jeffrey Dean and Sanjay Ghemawat and Wilson C. Hsieh and Deborah A. Wallach and Mike Burrows and Tushar Chandra and Andrew Fikes and Robert E. Gruber}, booktitle = {7th USENIX Symposium on Operating System Design and Implementation (OSDI '06)}, pages = {205--218}, url = {http://www.usenix.org/events/osdi06/tech/chang.html}, year = {2006}, biburl = {http://www.bibsonomy.org/bibtex/24d88f45a07096496b594c4685a0a4bca/cameron}, description = {Main paper DB}, abstract = {Bigtable is a distributed storage system for managing structured data that is designed to scale to a very large size: petabytes of data across thousands of commodity servers. Many projects at Google store data in Bigtable, including web indexing, Google Earth, and Google Finance. These applications place very different demands on Bigtable, both in terms of data size (from URLs to web pages to satellite imagery) and latency requirements (from backend bulk processing to real-time data serving). Despite these varied demands, Bigtable has successfully provided a flexible, high-performance solution for all of these Google products. In this paper we describe the simple data model provided by Bigtable, which gives clients dynamic control over data layout and format, and we describe the design and implementation of Bigtable.}, date-modified = {2007-11-15 13:44:49 -0500}, local-url = {chang/chang-2006-bigtable.pdf}, citeulike-article-id = {936194}, date-added = {2007-11-15 13:40:04 -0500}, priority = {2}, keywords = {database distributed systems } } @inproceedings{1251264, title = {MapReduce: simplified data processing on large clusters}, address = {Berkeley, CA, USA}, author = {Jeffrey Dean and Sanjay Ghemawat}, booktitle = {OSDI'04: Proceedings of the 6th conference on Symposium on Opearting Systems Design \& Implementation}, pages = {10--10}, publisher = {USENIX Association}, url = {http://portal.acm.org/citation.cfm?id=1251254.1251264}, year = {2004}, biburl = {http://www.bibsonomy.org/bibtex/27d0429efb0c9459c456dd7d303a7a5d5/jhammerb}, description = {MapReduce}, location = {San Francisco, CA}, keywords = {distributed grid mapreduce retreival } } @inproceedings{dean2004map-reduce, title = {MapReduce: Simplified Data Processing on Large Clusters}, author = {Jeffrey Dean and Sanjay Ghemawat}, booktitle = {OSDI}, organization = {google labs}, pages = {137--150}, url = {http://www.usenix.org/events/osdi04/tech/dean.html}, year = {2004}, biburl = {http://www.bibsonomy.org/bibtex/21fc56b2c39922da949ea59c8796dbca8/sbattiston}, comment = {explains the methods used at google to make computations that are in principle easy, but in practice difficult because they need to be done on huge data sets. The principle is to decompose any computation in operations of type Map and Reduces }, signalledby = {F. Walter}, folder = {graphs}, keywords = {computation dataset, graph, large } }