Mining Domain-Specific Thesauri from Wikipedia: A Case Study
D. Milne, O. Medelyan, and I. Witten. Proceedings of the 2006 IEEE/WIC/ACM International Conference on Web Intelligence, page 442--448. Washington, DC, USA, IEEE Computer Society, (2006)
DOI: 10.1109/WI.2006.119
Abstract
Domain-specific thesauri are high-cost, high-maintenance, high-value knowledge structures. We show how the classic thesaurus structure of terms and links can be mined automatically from Wikipedia. In a comparison with a professional thesaurus for agriculture we find that Wikipedia contains a substantial proportion of its concepts and semantic relations; furthermore it has impressive coverage of contemporary documents in the domain. Thesauri derived using our techniques capitalize on existing public efforts and tend to reflect contemporary language usage better than their costly, painstakingly-constructed manual counterparts.
%0 Conference Paper
%1 milne2006mining
%A Milne, David
%A Medelyan, Olena
%A Witten, Ian H.
%B Proceedings of the 2006 IEEE/WIC/ACM International Conference on Web Intelligence
%C Washington, DC, USA
%D 2006
%I IEEE Computer Society
%K domain gazetteer mining thesaurus wikipedia
%P 442--448
%R 10.1109/WI.2006.119
%T Mining Domain-Specific Thesauri from Wikipedia: A Case Study
%U http://dx.doi.org/10.1109/WI.2006.119
%X Domain-specific thesauri are high-cost, high-maintenance, high-value knowledge structures. We show how the classic thesaurus structure of terms and links can be mined automatically from Wikipedia. In a comparison with a professional thesaurus for agriculture we find that Wikipedia contains a substantial proportion of its concepts and semantic relations; furthermore it has impressive coverage of contemporary documents in the domain. Thesauri derived using our techniques capitalize on existing public efforts and tend to reflect contemporary language usage better than their costly, painstakingly-constructed manual counterparts.
%@ 0-7695-2747-7
@inproceedings{milne2006mining,
abstract = {Domain-specific thesauri are high-cost, high-maintenance, high-value knowledge structures. We show how the classic thesaurus structure of terms and links can be mined automatically from Wikipedia. In a comparison with a professional thesaurus for agriculture we find that Wikipedia contains a substantial proportion of its concepts and semantic relations; furthermore it has impressive coverage of contemporary documents in the domain. Thesauri derived using our techniques capitalize on existing public efforts and tend to reflect contemporary language usage better than their costly, painstakingly-constructed manual counterparts.},
acmid = {1249168},
added-at = {2015-12-09T12:16:41.000+0100},
address = {Washington, DC, USA},
author = {Milne, David and Medelyan, Olena and Witten, Ian H.},
biburl = {https://www.bibsonomy.org/bibtex/223b361dd68d74b457ec1299b2ecc9079/jaeschke},
booktitle = {Proceedings of the 2006 IEEE/WIC/ACM International Conference on Web Intelligence},
doi = {10.1109/WI.2006.119},
interhash = {ec54edfc924ff1288d6e2791ddb1785b},
intrahash = {23b361dd68d74b457ec1299b2ecc9079},
isbn = {0-7695-2747-7},
keywords = {domain gazetteer mining thesaurus wikipedia},
numpages = {7},
pages = {442--448},
publisher = {IEEE Computer Society},
series = {WI '06},
timestamp = {2015-12-09T12:24:06.000+0100},
title = {Mining Domain-Specific Thesauri from Wikipedia: A Case Study},
url = {http://dx.doi.org/10.1109/WI.2006.119},
year = 2006
}