Social bookmarking systems allow users to organise collections of resources
on the Web in a collaborative fashion. The increasing popularity of these
systems as well as first insights into their emergent semantics have made them
relevant to disciplines like knowledge extraction and ontology learning. The
problem of devising methods to measure the semantic relatedness between tags
and characterizing it semantically is still largely open. Here we analyze three
measures of tag relatedness: tag co-occurrence, cosine similarity of
co-occurrence distributions, and FolkRank, an adaptation of the PageRank
algorithm to folksonomies. Each measure is computed on tags from a large-scale
dataset crawled from the social bookmarking system del.icio.us. To provide a
semantic grounding of our findings, a connection to WordNet (a semantic lexicon
for the English language) is established by mapping tags into synonym sets of
WordNet, and applying there well-known metrics of semantic similarity. Our
results clearly expose different characteristics of the selected measures of
relatedness, making them applicable to different subtasks of knowledge
extraction such as synonym detection or discovery of concept hierarchies.
Comment: 5 pages, 2 figures
Description
[0805.2045] Semantic Analysis of Tag Similarity Measures in Collaborative Tagging Systems
%0 Generic
%1 Cattuto2008
%A Cattuto, Ciro
%A Benz, Dominik
%A Hotho, Andreas
%A Stumme, Gerd
%D 2008
%K analyse collaborative tagging
%T Semantic Analysis of Tag Similarity Measures in Collaborative Tagging Systems
%U http://arxiv.org/abs/0805.2045
%X Social bookmarking systems allow users to organise collections of resources
on the Web in a collaborative fashion. The increasing popularity of these
systems as well as first insights into their emergent semantics have made them
relevant to disciplines like knowledge extraction and ontology learning. The
problem of devising methods to measure the semantic relatedness between tags
and characterizing it semantically is still largely open. Here we analyze three
measures of tag relatedness: tag co-occurrence, cosine similarity of
co-occurrence distributions, and FolkRank, an adaptation of the PageRank
algorithm to folksonomies. Each measure is computed on tags from a large-scale
dataset crawled from the social bookmarking system del.icio.us. To provide a
semantic grounding of our findings, a connection to WordNet (a semantic lexicon
for the English language) is established by mapping tags into synonym sets of
WordNet, and applying there well-known metrics of semantic similarity. Our
results clearly expose different characteristics of the selected measures of
relatedness, making them applicable to different subtasks of knowledge
extraction such as synonym detection or discovery of concept hierarchies.
Comment: 5 pages, 2 figures
@misc{Cattuto2008,
abstract = { Social bookmarking systems allow users to organise collections of resources
on the Web in a collaborative fashion. The increasing popularity of these
systems as well as first insights into their emergent semantics have made them
relevant to disciplines like knowledge extraction and ontology learning. The
problem of devising methods to measure the semantic relatedness between tags
and characterizing it semantically is still largely open. Here we analyze three
measures of tag relatedness: tag co-occurrence, cosine similarity of
co-occurrence distributions, and FolkRank, an adaptation of the PageRank
algorithm to folksonomies. Each measure is computed on tags from a large-scale
dataset crawled from the social bookmarking system del.icio.us. To provide a
semantic grounding of our findings, a connection to WordNet (a semantic lexicon
for the English language) is established by mapping tags into synonym sets of
WordNet, and applying there well-known metrics of semantic similarity. Our
results clearly expose different characteristics of the selected measures of
relatedness, making them applicable to different subtasks of knowledge
extraction such as synonym detection or discovery of concept hierarchies.
Comment: 5 pages, 2 figures},
added-at = {2008-10-23T14:43:55.000+0200},
author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd},
biburl = {https://www.bibsonomy.org/bibtex/278fd64c3db55e6387ebdeb6c40054542/ctprojekt},
description = {[0805.2045] Semantic Analysis of Tag Similarity Measures in Collaborative Tagging Systems},
interhash = {cc62b733f6e0402db966d6dbf1b7711f},
intrahash = {78fd64c3db55e6387ebdeb6c40054542},
keywords = {analyse collaborative tagging},
timestamp = {2008-10-23T14:43:55.000+0200},
title = {Semantic Analysis of Tag Similarity Measures in Collaborative Tagging Systems},
url = {http://arxiv.org/abs/0805.2045},
year = 2008
}