F. Wu, and D. Weld. CIKM '07: Proceedings of the sixteenth ACM conference on Conference on information and knowledge management, page 41--50. New York, NY, USA, ACM, (2007)
DOI: http://doi.acm.org/10.1145/1321440.1321449
Abstract
Berners-Lee's compelling vision of a Semantic Web is hindered by a chicken-and-egg problem, which can be best solved by a bootstrapping method - creating enough structured data to motivate the development of applications. This paper argues that autonomously "Semantifying Wikipedia" is the best way to solve the problem. We choose Wikipedia as an initial data source, because it is comprehensive, not too large, high-quality, and contains enough manually-derived structure to bootstrap an autonomous, self-supervised process. We identify several types of structures which can be automatically enhanced in Wikipedia (e.g., link structure, taxonomic data, infoboxes, etc.), and we describea prototype implementation of a self-supervised, machine learning system which realizes our vision. Preliminary experiments demonstrate the high precision of our system's extracted data - in one case equaling that of humans.
%0 Conference Paper
%1 1321449
%A Wu, Fei
%A Weld, Daniel S.
%B CIKM '07: Proceedings of the sixteenth ACM conference on Conference on information and knowledge management
%C New York, NY, USA
%D 2007
%I ACM
%K imported information-extraction machine-learning wikipedia
%P 41--50
%R http://doi.acm.org/10.1145/1321440.1321449
%T Autonomously semantifying wikipedia
%U http://portal.acm.org/citation.cfm?id=1321440.1321449
%X Berners-Lee's compelling vision of a Semantic Web is hindered by a chicken-and-egg problem, which can be best solved by a bootstrapping method - creating enough structured data to motivate the development of applications. This paper argues that autonomously "Semantifying Wikipedia" is the best way to solve the problem. We choose Wikipedia as an initial data source, because it is comprehensive, not too large, high-quality, and contains enough manually-derived structure to bootstrap an autonomous, self-supervised process. We identify several types of structures which can be automatically enhanced in Wikipedia (e.g., link structure, taxonomic data, infoboxes, etc.), and we describea prototype implementation of a self-supervised, machine learning system which realizes our vision. Preliminary experiments demonstrate the high precision of our system's extracted data - in one case equaling that of humans.
%@ 978-1-59593-803-9
@inproceedings{1321449,
abstract = {Berners-Lee's compelling vision of a Semantic Web is hindered by a chicken-and-egg problem, which can be best solved by a bootstrapping method - creating enough structured data to motivate the development of applications. This paper argues that autonomously "Semantifying Wikipedia" is the best way to solve the problem. We choose Wikipedia as an initial data source, because it is comprehensive, not too large, high-quality, and contains enough manually-derived structure to bootstrap an autonomous, self-supervised process. We identify several types of structures which can be automatically enhanced in Wikipedia (e.g., link structure, taxonomic data, infoboxes, etc.), and we describea prototype implementation of a self-supervised, machine learning system which realizes our vision. Preliminary experiments demonstrate the high precision of our system's extracted data - in one case equaling that of humans.},
added-at = {2009-11-11T14:57:49.000+0100},
address = {New York, NY, USA},
author = {Wu, Fei and Weld, Daniel S.},
biburl = {https://www.bibsonomy.org/bibtex/2c168ad255b8e1042e38e1b55893f4677/gromgull},
booktitle = {CIKM '07: Proceedings of the sixteenth ACM conference on Conference on information and knowledge management},
description = {Autonomously semantifying wikipedia},
doi = {http://doi.acm.org/10.1145/1321440.1321449},
interhash = {b007780b13ba3d7c611c29a73b510f20},
intrahash = {c168ad255b8e1042e38e1b55893f4677},
isbn = {978-1-59593-803-9},
keywords = {imported information-extraction machine-learning wikipedia},
location = {Lisbon, Portugal},
pages = {41--50},
publisher = {ACM},
timestamp = {2009-11-11T14:57:49.000+0100},
title = {Autonomously semantifying wikipedia},
url = {http://portal.acm.org/citation.cfm?id=1321440.1321449},
year = 2007
}