One of the most frequent Web surfing tasks is to search for names of persons and organizations. Such names are often not distinctive, commonly occurring, and nonunique. Thus, a single name may be mapped to several entities. We describe a methodology to cluster the Web pages returned by the search engine so that pages belonging to different entities are clustered into different groups. The algorithm uses a combination of named entities, link-based and structure-based information as features to partition the document set into direct and indirect pages using a decision model. It then uses the distinct direct pages as seeds to cluster the document set into different clusters. The algorithm has been found to be effective for Web-based applications.
%0 Conference Proceedings
%1 citeulike:447616
%A Ye, S.
%A Chua, T. S.
%A Kei, J. R.
%D 2003
%J Web Intelligence, 2003. WI 2003. Proceedings. IEEE/WIC International Conference on
%K citeulike clustering, web
%P 344--350
%T Querying and clustering Web pages about persons and organizations
%U http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1241214
%X One of the most frequent Web surfing tasks is to search for names of persons and organizations. Such names are often not distinctive, commonly occurring, and nonunique. Thus, a single name may be mapped to several entities. We describe a methodology to cluster the Web pages returned by the search engine so that pages belonging to different entities are clustered into different groups. The algorithm uses a combination of named entities, link-based and structure-based information as features to partition the document set into direct and indirect pages using a decision model. It then uses the distinct direct pages as seeds to cluster the document set into different clusters. The algorithm has been found to be effective for Web-based applications.
@proceedings{citeulike:447616,
abstract = {{One of the most frequent Web surfing tasks is to search for names of persons and organizations. Such names are often not distinctive, commonly occurring, and nonunique. Thus, a single name may be mapped to several entities. We describe a methodology to cluster the Web pages returned by the search engine so that pages belonging to different entities are clustered into different groups. The algorithm uses a combination of named entities, link-based and structure-based information as features to partition the document set into direct and indirect pages using a decision model. It then uses the distinct direct pages as seeds to cluster the document set into different clusters. The algorithm has been found to be effective for Web-based applications.}},
added-at = {2017-09-08T10:52:59.000+0200},
author = {Ye, S. and Chua, T. S. and Kei, J. R.},
biburl = {https://www.bibsonomy.org/bibtex/2b8b8d937f3c58c834f5fb6f3949d919e/fernand0},
citeulike-article-id = {447616},
citeulike-linkout-0 = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1241214},
interhash = {0470c99a9fbc9e79b28f0279ecca4b6e},
intrahash = {b8b8d937f3c58c834f5fb6f3949d919e},
journal = {Web Intelligence, 2003. WI 2003. Proceedings. IEEE/WIC International Conference on},
keywords = {citeulike clustering, web},
pages = {344--350},
posted-at = {2005-12-23 10:03:49},
priority = {2},
timestamp = {2017-09-08T10:53:23.000+0200},
title = {{Querying and clustering Web pages about persons and organizations}},
url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1241214},
year = 2003
}