@article{Kim:EtAl:01, title = {A Corpus-Based Learning Method of Compound Noun Indexing Rules for Korean}, author = {Jee-Hyub Kim and Byung-Kwan Kwak and Seungwoo Lee and Geunbae Lee and Jong-Hyeok Lee}, journal = {Information Retrieval}, number = {2}, pages = {115--132}, url = {http://dx.doi.org/10.1023/A:1011466928139}, volume = {4}, year = {2001}, biburl = {http://www.bibsonomy.org/bibtex/2b00b9f6f47769d44b74a236ee1a2d12e/seandalai}, abstract = {In Korean information retrieval, compound nouns play an important role in improving precision in search experiments. There are two major approaches to compound noun indexing in Korean: statistical and linguistic. Each method, however, has its own shortcomings, such as limitations when indexing diverse types of compound nouns, over-generation of compound nouns, and data sparseness in training. In this paper, we propose a corpus-based learning method, which can index diverse types of compound nouns using rules automatically extracted from a large corpus. The automatic learning method is more portable and requires less human effort, although it exhibits a performance level similar to the manual-linguistic approach. We also present a new filtering method to solve the problems of compound noun over-generation and data sparseness.}, keywords = {2001 compounds ir korean } }