Natural language technologies have long been envisioned to play a crucial role in developing a Semantic Web. Textual content's significance on the Web has increased with the rise of Web 2.0 and mass participation in content generation. Yet, natural language technologies face great challenges in dealing with Web content's heterogeneity: key among these is domain and task adaptation. To address this challenge, the authors consider the problem of semantically annotating Wikipedia. Specifically, they investigate a method for dealing with domain and task adaptation of semantic taggers in cases where parallel text and metadata are available. By creating a semantic mapping among vocabularies from two sources: Wikipedia and the original annotated corpus, they improve their tagger on Wikipedia. Moreover, by applying their tagger and mapping between sources, they significantly extend the metadata currently available in the DBpedia collection. This article is part of a special issue on Natural Language Processing and the Web.
%0 Journal Article
%1 MikaCiaramitaEtAl08intelligent
%A Mika, Peter
%A Ciaramita, Massimiliano
%A Zaragoza, Hugo
%A Atserias, Jordi
%D 2008
%J IEEE Intelligent Systems
%K v1205 ieee paper ai language processing web data text information retrieval
%N 5
%P 26-33
%R 10.1109/MIS.2008.85
%T Learning to Tag and Tagging to Learn: A Case Study on Wikipedia
%V 23
%X Natural language technologies have long been envisioned to play a crucial role in developing a Semantic Web. Textual content's significance on the Web has increased with the rise of Web 2.0 and mass participation in content generation. Yet, natural language technologies face great challenges in dealing with Web content's heterogeneity: key among these is domain and task adaptation. To address this challenge, the authors consider the problem of semantically annotating Wikipedia. Specifically, they investigate a method for dealing with domain and task adaptation of semantic taggers in cases where parallel text and metadata are available. By creating a semantic mapping among vocabularies from two sources: Wikipedia and the original annotated corpus, they improve their tagger on Wikipedia. Moreover, by applying their tagger and mapping between sources, they significantly extend the metadata currently available in the DBpedia collection. This article is part of a special issue on Natural Language Processing and the Web.
@article{MikaCiaramitaEtAl08intelligent,
abstract = {Natural language technologies have long been envisioned to play a crucial role in developing a Semantic Web. Textual content's significance on the Web has increased with the rise of Web 2.0 and mass participation in content generation. Yet, natural language technologies face great challenges in dealing with Web content's heterogeneity: key among these is domain and task adaptation. To address this challenge, the authors consider the problem of semantically annotating Wikipedia. Specifically, they investigate a method for dealing with domain and task adaptation of semantic taggers in cases where parallel text and metadata are available. By creating a semantic mapping among vocabularies from two sources: Wikipedia and the original annotated corpus, they improve their tagger on Wikipedia. Moreover, by applying their tagger and mapping between sources, they significantly extend the metadata currently available in the DBpedia collection. This article is part of a special issue on Natural Language Processing and the Web.},
added-at = {2012-05-30T10:50:56.000+0200},
author = {Mika, Peter and Ciaramita, Massimiliano and Zaragoza, Hugo and Atserias, Jordi},
biburl = {https://www.bibsonomy.org/bibtex/2500f619e2472a470ae2e6d6ee32cedba/flint63},
doi = {10.1109/MIS.2008.85},
file = {IEEE Digital Library:2008/MikaCiaramitaEtAl08intelligent.pdf:PDF},
groups = {public},
interhash = {f61e5b69eb9a6e1e4d5ce27d066e4d99},
intrahash = {500f619e2472a470ae2e6d6ee32cedba},
issn = {1541-1672},
journal = {IEEE Intelligent Systems},
keywords = {v1205 ieee paper ai language processing web data text information retrieval},
number = 5,
pages = {26-33},
timestamp = {2018-04-16T12:07:58.000+0200},
title = {Learning to Tag and Tagging to Learn: A Case Study on {Wikipedia}},
username = {flint63},
volume = 23,
year = 2008
}