@article{keyhere, title = {Syntactic clustering of the Web}, author = {Andrei Z. Broder and Steven C. Glassman and Mark S. Manasse and Geoffrey Zweig}, booktitle = {Papers from the Sixth International World Wide Web Conference}, journal = {Computer Networks and ISDN Systems}, month = {#sep#}, number = {8-13}, pages = {1157--1166}, url = {http://www.sciencedirect.com/science/article/B6TYT-3SP60S4-11/2/38f44c816ec8d69b406317de1629e56d}, volume = {29}, year = {1997}, biburl = {http://www.bibsonomy.org/bibtex/293a3440b81c13ec81c17481a97719c71/wnpxrz}, description = {ScienceDirect - Computer Networks and ISDN Systems : Syntactic clustering of the Web}, abstract = {We have developed an efficient way to determine the syntactic similarity of files and have applied it to every document on the World Wide Web. Using this mechanism, we built a clustering of all the documents that are syntactically similar. Possible applications include a "Lost and Found" service, filtering the results of Web searches, updating widely distributed web-pages, and identifying violations of intellectual property rights.}, keywords = {clustering syntactic web } }