Wikipedia provides a semantic network for computing semantic relatedness in a more structured fashion than a search engine and with more coverage than WordNet. We present experiments on using Wikipedia for computing semantic relatedness and compare it to WordNet on various benchmarking datasets. Existing relatedness measures perform better using Wikipedia than a baseline given by Google counts, and we show that Wikipedia outperforms WordNet on some datasets. We also address the question whether and how Wikipedia can be integrated into NLP applications as a knowledge base. Including Wikipedia improves the performance of a machine learning based coreference resolution system, indicating that it represents a valuable resource for NLP applications. Finally, we show that our method can be easily used for languages other than English by computing semantic relatedness for a German dataset.
%0 Journal Article
%1 Ponzetto:2007
%A Ponzetto, Simone P.
%A Strube, Michael
%D 2007
%J Journal of Artificial Intelligence Research
%K WW-MUST knowledge knowledge-extraction relatedness semantic semanticweb wikipedia
%P 181--212
%T Knowledge Derived from Wikipedia for Computing Semantic Relatedness
%V 30
%X Wikipedia provides a semantic network for computing semantic relatedness in a more structured fashion than a search engine and with more coverage than WordNet. We present experiments on using Wikipedia for computing semantic relatedness and compare it to WordNet on various benchmarking datasets. Existing relatedness measures perform better using Wikipedia than a baseline given by Google counts, and we show that Wikipedia outperforms WordNet on some datasets. We also address the question whether and how Wikipedia can be integrated into NLP applications as a knowledge base. Including Wikipedia improves the performance of a machine learning based coreference resolution system, indicating that it represents a valuable resource for NLP applications. Finally, we show that our method can be easily used for languages other than English by computing semantic relatedness for a German dataset.
@article{Ponzetto:2007,
abstract = {Wikipedia provides a semantic network for computing semantic relatedness in a more structured fashion than a search engine and with more coverage than WordNet. We present experiments on using Wikipedia for computing semantic relatedness and compare it to WordNet on various benchmarking datasets. Existing relatedness measures perform better using Wikipedia than a baseline given by Google counts, and we show that Wikipedia outperforms WordNet on some datasets. We also address the question whether and how Wikipedia can be integrated into NLP applications as a knowledge base. Including Wikipedia improves the performance of a machine learning based coreference resolution system, indicating that it represents a valuable resource for NLP applications. Finally, we show that our method can be easily used for languages other than English by computing semantic relatedness for a German dataset.},
added-at = {2008-02-10T02:19:38.000+0100},
author = {Ponzetto, Simone P. and Strube, Michael},
biburl = {https://www.bibsonomy.org/bibtex/2623e428e88e8c1af1c1b2cb39a9c55f1/brightbyte},
citeulike-article-id = {2357713},
description = {stuff from citeyoulike},
interhash = {33c7f1b328509fc8d59a93af0a7821fa},
intrahash = {623e428e88e8c1af1c1b2cb39a9c55f1},
journal = {Journal of Artificial Intelligence Research},
keywords = {WW-MUST knowledge knowledge-extraction relatedness semantic semanticweb wikipedia},
pages = {181--212},
priority = {3},
timestamp = {2009-01-23T09:58:50.000+0100},
title = {Knowledge Derived from Wikipedia for Computing Semantic Relatedness},
volume = 30,
year = 2007
}