MapReduce for information retrieval evaluation: "Let's quickly test this on 12 TB of data"
D. Hiemstra, and C. Hauff. Multilingual and Multimodal Information Access Evaluation, volume 6360 of Lecture Notes in Computer Science, page 64--69. Berlin, Springer Verlag, (2010)
Abstract
We propose to use MapReduce to quickly test new retrieval approaches on a cluster of machines by sequentially scanning all documents. We present a small case study in which we use a cluster of 15 low cost machines to search a web crawl of 0.5 billion pages showing that sequential scanning is a viable approach to running large-scale information retrieval experiments with little effort. The code is available to other researchers at: http://mirex.sourceforge.net.
%0 Conference Paper
%1 so73226
%A Hiemstra, Djoerd
%A Hauff, Claudia
%B Multilingual and Multimodal Information Access Evaluation
%C Berlin
%D 2010
%E Agosti, Maristella
%E Ferro, Nicola
%E Peters, Carol
%E de, Maarten Rijke
%E Smeaton, Alan
%I Springer Verlag
%K informationretrieval ir lucene mapreduce terrier
%P 64--69
%T MapReduce for information retrieval evaluation: "Let's quickly test this on 12 TB of data"
%U http://doc.utwente.nl/73226/1/clef10.pdf
%V 6360
%X We propose to use MapReduce to quickly test new retrieval approaches on a cluster of machines by sequentially scanning all documents. We present a small case study in which we use a cluster of 15 low cost machines to search a web crawl of 0.5 billion pages showing that sequential scanning is a viable approach to running large-scale information retrieval experiments with little effort. The code is available to other researchers at: http://mirex.sourceforge.net.
@inproceedings{so73226,
abstract = {We propose to use MapReduce to quickly test new retrieval approaches on a cluster of machines by sequentially scanning all documents. We present a small case study in which we use a cluster of 15 low cost machines to search a web crawl of 0.5 billion pages showing that sequential scanning is a viable approach to running large-scale information retrieval experiments with little effort. The code is available to other researchers at: http://mirex.sourceforge.net. },
added-at = {2011-06-28T11:23:23.000+0200},
address = {Berlin},
author = {{Hiemstra}, Djoerd and {Hauff}, Claudia},
biburl = {https://www.bibsonomy.org/bibtex/2f688cd9d4f52ce75ece5e8de64ede9d6/stroeh},
booktitle = {Multilingual and Multimodal Information Access Evaluation},
editor = {{Agosti}, Maristella and {Ferro}, Nicola and {Peters}, Carol and de, Maarten {Rijke} and {Smeaton}, Alan},
interhash = {2d2d2340a434dc9fa2dfcd231d43a785},
intrahash = {f688cd9d4f52ce75ece5e8de64ede9d6},
keywords = {informationretrieval ir lucene mapreduce terrier},
pages = {64--69},
publisher = {Springer Verlag},
series = {Lecture Notes in Computer Science},
timestamp = {2011-06-28T11:23:23.000+0200},
title = {MapReduce for information retrieval evaluation: "Let's quickly test this on 12 TB of data"},
url = {http://doc.utwente.nl/73226/1/clef10.pdf},
volume = 6360,
year = 2010
}