@inproceedings{GKB06,
title = {Instance Classification using Co-Occurrences on the Web},
author = {Gijs Geleijnse and Jan Korst and Viktor de Boer},
booktitle = {Proc. of the ISWC2006 Workshop on Web Content Mining with Human Language Technologies},
url = {http://orestes.ii.uam.es/workshop/3.pdf},
year = {2006},
abstract = {We present a novel unsupervised approach to mapping artrelated
instances (such as music artists and painters) to subjective categories
like genre and style. We base our approach on co-occurrences of
the two on the web, found with Google. The co-occurrences are found
using three methods: by identifying the search engine counts, by analyzing
Google excerpts found by querying patterns and by scanning full
documents. Per instance, we use the same co-occurrence-based approach
to find its nearest neighbors, i.e. the most related instances. These results
can be combined in order to create a more reliable classification.
We tested and compared the three methods on two different domains:
mapping music artists to genres, and painters to art-styles. The results
show that the use of related instances indeed improves the precision of
the classification. Moreover, the methods with the lowest Google Complexity
perform best.},
keywords = {2006 classification datamining google occurrences unread web }
}