In Korean information retrieval, compound nouns play an important role in improving precision in search experiments. There are two major approaches to compound noun indexing in Korean: statistical and linguistic. Each method, however, has its own shortcomings, such as limitations when indexing diverse types of compound nouns, over-generation of compound nouns, and data sparseness in training. In this paper, we propose a corpus-based learning method, which can index diverse types of compound nouns using rules automatically extracted from a large corpus. The automatic learning method is more portable and requires less human effort, although it exhibits a performance level similar to the manual-linguistic approach. We also present a new filtering method to solve the problems of compound noun over-generation and data sparseness.
%0 Journal Article
%1 Kim:EtAl:01
%A Kim, Jee-Hyub
%A Kwak, Byung-Kwan
%A Lee, Seungwoo
%A Lee, Geunbae
%A Lee, Jong-Hyeok
%D 2001
%J Information Retrieval
%K 2001 compounds ir korean
%N 2
%P 115--132
%T A Corpus-Based Learning Method of Compound Noun Indexing Rules for Korean
%U http://dx.doi.org/10.1023/A:1011466928139
%V 4
%X In Korean information retrieval, compound nouns play an important role in improving precision in search experiments. There are two major approaches to compound noun indexing in Korean: statistical and linguistic. Each method, however, has its own shortcomings, such as limitations when indexing diverse types of compound nouns, over-generation of compound nouns, and data sparseness in training. In this paper, we propose a corpus-based learning method, which can index diverse types of compound nouns using rules automatically extracted from a large corpus. The automatic learning method is more portable and requires less human effort, although it exhibits a performance level similar to the manual-linguistic approach. We also present a new filtering method to solve the problems of compound noun over-generation and data sparseness.
@article{Kim:EtAl:01,
abstract = {In Korean information retrieval, compound nouns play an important role in improving precision in search experiments. There are two major approaches to compound noun indexing in Korean: statistical and linguistic. Each method, however, has its own shortcomings, such as limitations when indexing diverse types of compound nouns, over-generation of compound nouns, and data sparseness in training. In this paper, we propose a corpus-based learning method, which can index diverse types of compound nouns using rules automatically extracted from a large corpus. The automatic learning method is more portable and requires less human effort, although it exhibits a performance level similar to the manual-linguistic approach. We also present a new filtering method to solve the problems of compound noun over-generation and data sparseness.},
added-at = {2007-07-18T14:54:04.000+0200},
author = {Kim, Jee-Hyub and Kwak, Byung-Kwan and Lee, Seungwoo and Lee, Geunbae and Lee, Jong-Hyeok},
biburl = {https://www.bibsonomy.org/bibtex/2b00b9f6f47769d44b74a236ee1a2d12e/seandalai},
interhash = {2b51bb2d7539e59642959ef6b7ee5ead},
intrahash = {b00b9f6f47769d44b74a236ee1a2d12e},
journal = {Information Retrieval},
keywords = {2001 compounds ir korean},
number = 2,
pages = {115--132},
timestamp = {2007-07-18T14:54:04.000+0200},
title = {A Corpus-Based Learning Method of Compound Noun Indexing Rules for Korean},
url = {http://dx.doi.org/10.1023/A:1011466928139},
volume = 4,
year = 2001
}