Clustering is one of the most crucial techniques for dealing with the massive amount of information present on the web. Clustering can either be performed once offline, independent of search queries, or performed online on the results of search queries. Our offline approach aims to efficiently cluster similar pages on the web, using the technique of Locality-Sensitive Hashing (LSH), in which web pages are hashed in such a way that similar pages have a much higher probability of collision than...
%0 Conference Paper
%1 Haveliwala00ScalableTechniquesClusteringWeb
%A Haveliwala, Taher H.
%A Gionis, Aristides
%A Indyk, Piotr
%B WebDB (Informal Proceedings)
%D 2000
%K clustering, web
%P 129--134
%T Scalable Techniques for Clustering the Web
%U http://citeseer.ist.psu.edu/haveliwala00scalable.html
%X Clustering is one of the most crucial techniques for dealing with the massive amount of information present on the web. Clustering can either be performed once offline, independent of search queries, or performed online on the results of search queries. Our offline approach aims to efficiently cluster similar pages on the web, using the technique of Locality-Sensitive Hashing (LSH), in which web pages are hashed in such a way that similar pages have a much higher probability of collision than...
@inproceedings{Haveliwala00ScalableTechniquesClusteringWeb,
abstract = {Clustering is one of the most crucial techniques for dealing with the massive amount of information present on the web. Clustering can either be performed once offline, independent of search queries, or performed online on the results of search queries. Our offline approach aims to efficiently cluster similar pages on the web, using the technique of Locality-Sensitive Hashing (LSH), in which web pages are hashed in such a way that similar pages have a much higher probability of collision than...},
added-at = {2008-05-27T08:15:25.000+0200},
author = {Haveliwala, Taher H. and Gionis, Aristides and Indyk, Piotr},
biburl = {https://www.bibsonomy.org/bibtex/26a9c1bc0c0c42ad9f5072528f5b7bcff/mgrani},
booktitle = {WebDB (Informal Proceedings)},
citeulike-article-id = {446846},
interhash = {2264e6ce7afd435c7c5d43ffbebbe9f1},
intrahash = {6a9c1bc0c0c42ad9f5072528f5b7bcff},
keywords = {clustering, web},
pages = {129--134},
priority = {2},
timestamp = {2008-05-27T08:15:28.000+0200},
title = {Scalable Techniques for Clustering the Web},
url = {http://citeseer.ist.psu.edu/haveliwala00scalable.html},
year = 2000
}