The recognition of Proper Nouns (PNs) is considered an important task in the area of Information Retrieval and Extraction. However the high performance of most existing PN classifiers heavily depends upon the avail-ability of large dictionaries of domain-specific Proper Nouns, and a certain amount of manual work for rule writing or manual tagging. Though it is not a heavy requirement to rely on some existing PN dictionary (of-ten these resources are available on the web), its coverage of a domain corpus may be rather low, in absence of manual updating. In this paper we propose a technique for the automatic updating of a PN Dictionary through the cooperation of an inductive and a probabilistic classifier. In our experiments we show that, whenever an existing PN Dictionary allows the identification of 50\% of the proper nouns within a corpus, our technique allows, without additional manual effort, the successful recognition of about 90\% of the remaining 50\%.
%0 Generic
%1 Petasis:2000:AAP:345508.345563
%A Petasis, Georgios
%A Cucchiarelli, Alessandro
%A Velardi, Paola
%A Paliouras, Georgios
%A Karkaletsis, Vangelis
%A Spyropoulos, Constantine D.
%B Proceedings of the 23rd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)
%C New York, NY, USA
%D 2000
%I ACM
%K IR, and data extraction, for information language learning machine mining natural processing text
%P 128--135
%R http://doi.acm.org/10.1145/345508.345563
%T Automatic adaptation of proper noun dictionaries through cooperation of machine learning and probabilistic methods
%U http://www.ellogon.org/petasis/bibliography/SIGIR2000/SIGIR-CameraReady.pdf
%X The recognition of Proper Nouns (PNs) is considered an important task in the area of Information Retrieval and Extraction. However the high performance of most existing PN classifiers heavily depends upon the avail-ability of large dictionaries of domain-specific Proper Nouns, and a certain amount of manual work for rule writing or manual tagging. Though it is not a heavy requirement to rely on some existing PN dictionary (of-ten these resources are available on the web), its coverage of a domain corpus may be rather low, in absence of manual updating. In this paper we propose a technique for the automatic updating of a PN Dictionary through the cooperation of an inductive and a probabilistic classifier. In our experiments we show that, whenever an existing PN Dictionary allows the identification of 50\% of the proper nouns within a corpus, our technique allows, without additional manual effort, the successful recognition of about 90\% of the remaining 50\%.
%@ 1-58113-226-3
@conference{Petasis:2000:AAP:345508.345563,
abstract = {The recognition of Proper Nouns (PNs) is considered an important task in the area of Information Retrieval and Extraction. However the high performance of most existing PN classifiers heavily depends upon the avail-ability of large dictionaries of domain-specific Proper Nouns, and a certain amount of manual work for rule writing or manual tagging. Though it is not a heavy requirement to rely on some existing PN dictionary (of-ten these resources are available on the web), its coverage of a domain corpus may be rather low, in absence of manual updating. In this paper we propose a technique for the automatic updating of a PN Dictionary through the cooperation of an inductive and a probabilistic classifier. In our experiments we show that, whenever an existing PN Dictionary allows the identification of 50{\%} of the proper nouns within a corpus, our technique allows, without additional manual effort, the successful recognition of about 90{\%} of the remaining 50{\%}.},
added-at = {2011-08-10T12:37:26.000+0200},
address = {New York, NY, USA},
author = {Petasis, Georgios and Cucchiarelli, Alessandro and Velardi, Paola and Paliouras, Georgios and Karkaletsis, Vangelis and Spyropoulos, Constantine D.},
biburl = {https://www.bibsonomy.org/bibtex/2ab8c2c6c4b4446233e5264130334d05d/petasis},
booktitle = {Proceedings of the 23rd Annual International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)},
doi = {http://doi.acm.org/10.1145/345508.345563},
interhash = {28098811cc723c9b9fed099bfb6e21e1},
intrahash = {ab8c2c6c4b4446233e5264130334d05d},
isbn = {1-58113-226-3},
keywords = {IR, and data extraction, for information language learning machine mining natural processing text},
month = {July 24--28},
pages = {128--135},
publisher = {ACM},
series = {SIGIR '00},
timestamp = {2011-08-10T12:37:26.000+0200},
title = {{A}utomatic adaptation of proper noun dictionaries through cooperation of machine learning and probabilistic methods},
url = {http://www.ellogon.org/petasis/bibliography/SIGIR2000/SIGIR-CameraReady.pdf},
year = 2000
}