@inproceedings{GKB06, abstract = {We present a novel unsupervised approach to mapping artrelated instances (such as music artists and painters) to subjective categories like genre and style. We base our approach on co-occurrences of the two on the web, found with Google. The co-occurrences are found using three methods: by identifying the search engine counts, by analyzing Google excerpts found by querying patterns and by scanning full documents. Per instance, we use the same co-occurrence-based approach to find its nearest neighbors, i.e. the most related instances. These results can be combined in order to create a more reliable classification. We tested and compared the three methods on two different domains: mapping music artists to genres, and painters to art-styles. The results show that the use of related instances indeed improves the precision of the classification. Moreover, the methods with the lowest Google Complexity perform best.}, added-at = {2006-11-06T22:05:31.000+0100}, author = {Geleijnse, Gijs and Korst, Jan and de Boer, Viktor}, biburl = {http://www.bibsonomy.org/bibtex/2027513ba40abf3789d6e8872fbe07287/lysander07}, booktitle = {Proc. of the ISWC2006 Workshop on Web Content Mining with Human Language Technologies}, interhash = {e26e7cca0055f3042f172a70968aa724}, intrahash = {027513ba40abf3789d6e8872fbe07287}, keywords = {co-occurence WCMHLT2006 google datamining linguistics}, timestamp = {2006-11-06T22:05:31.000+0100}, title = {Instance Classification using Co-Occurrences on the Web}, url = {http://orestes.ii.uam.es/workshop/3.pdf}, year = 2006 }