Semantic relatedness between words has been extracted from a variety of sources.In this ongoing work, we explore and compare several options for determining if semantic relatedness can be extracted from navigation structures in Wikipedia. In that direction, we first investigate the potential of representation learning techniques such as DeepWalk in comparison to previously applied methods based on counting co-occurrences. Since both methods are based on (random) paths in the network, we also study different approaches to generate paths from Wikipedia link structure. For this task, we do not only consider the link structure of Wikipedia, but also actual navigation behavior of users. Finally, we analyze if semantics can also be extracted from smaller subsets of the Wikipedia link network. As a result we find that representation learning techniques mostly outperform the investigated co-occurrence counting methods on the Wikipedia network. However, we find that this is not the case for paths sampled from human navigation behavior.
%0 Conference Paper
%1 dallmann2016extracting
%A Dallmann, Alexander
%A Niebler, Thomas
%A Lemmerich, Florian
%A Hotho, Andreas
%B Proceedings of the 10th International Conference on Web and Social Media
%D 2016
%E West, Robert
%E Zia, Leila
%E Taraborelli, Dario
%E Leskovec, Jure
%I AAAI
%K extracting myown published random semantics solvatio walks wikipedia word2vec
%T Extracting Semantics from Random Walks on Wikipedia: Comparing Learning and Counting Methods
%U https://www.aaai.org/ocs/index.php/ICWSM/ICWSM16/paper/view/13199
%X Semantic relatedness between words has been extracted from a variety of sources.In this ongoing work, we explore and compare several options for determining if semantic relatedness can be extracted from navigation structures in Wikipedia. In that direction, we first investigate the potential of representation learning techniques such as DeepWalk in comparison to previously applied methods based on counting co-occurrences. Since both methods are based on (random) paths in the network, we also study different approaches to generate paths from Wikipedia link structure. For this task, we do not only consider the link structure of Wikipedia, but also actual navigation behavior of users. Finally, we analyze if semantics can also be extracted from smaller subsets of the Wikipedia link network. As a result we find that representation learning techniques mostly outperform the investigated co-occurrence counting methods on the Wikipedia network. However, we find that this is not the case for paths sampled from human navigation behavior.
@inproceedings{dallmann2016extracting,
abstract = {Semantic relatedness between words has been extracted from a variety of sources.In this ongoing work, we explore and compare several options for determining if semantic relatedness can be extracted from navigation structures in Wikipedia. In that direction, we first investigate the potential of representation learning techniques such as DeepWalk in comparison to previously applied methods based on counting co-occurrences. Since both methods are based on (random) paths in the network, we also study different approaches to generate paths from Wikipedia link structure. For this task, we do not only consider the link structure of Wikipedia, but also actual navigation behavior of users. Finally, we analyze if semantics can also be extracted from smaller subsets of the Wikipedia link network. As a result we find that representation learning techniques mostly outperform the investigated co-occurrence counting methods on the Wikipedia network. However, we find that this is not the case for paths sampled from human navigation behavior.},
added-at = {2017-07-11T10:00:09.000+0200},
author = {Dallmann, Alexander and Niebler, Thomas and Lemmerich, Florian and Hotho, Andreas},
author+an = {2=highlight},
biburl = {https://www.bibsonomy.org/bibtex/22f93e5cbd553dad29f5f3213521e53ea/thoni},
booktitle = {Proceedings of the 10th International Conference on Web and Social Media},
conference = {International AAAI Conference on Web and Social Media},
editor = {West, Robert and Zia, Leila and Taraborelli, Dario and Leskovec, Jure},
interhash = {a8393a6d07a1ef923eb0a7013639c103},
intrahash = {2f93e5cbd553dad29f5f3213521e53ea},
keywords = {extracting myown published random semantics solvatio walks wikipedia word2vec},
publisher = {AAAI},
timestamp = {2018-12-29T12:25:07.000+0100},
title = {Extracting Semantics from Random Walks on Wikipedia: Comparing Learning and Counting Methods},
url = {https://www.aaai.org/ocs/index.php/ICWSM/ICWSM16/paper/view/13199},
year = 2016
}