The purpose of this paper is to characterize a constituent boundary parsing algorithm, using an information-theoretic measure called generalized mutual information, which serves as an alternative to traditional grammar-based parsing methods. This method is based on the hypothesis that constituent boundaries can be extracted from a given sentence (or word sequence) by analyzing the mutual information values of the part-ofspeech n-grams within the sentence. This hypothesis is supported by the performance of an implementation of this parsing algorithm which determines a recursive unlabeled bracketing of unrestricted English text with a relatively low error rate. This paper derives the generalized mutual information statistic, describes the parsing algorithm, and presents results and sample output from the parser. Introduction A standard approach to parsing a natural language is to characterize the language using a set of rules, a grammar. A grammar-based parsing algori...
Description
Parsing a Natural Language Using Mutual Information Statistics
%0 Conference Paper
%1 Magerman90parsinga
%A Magerman, David M.
%A Marcus, Mitchell P.
%D 1990
%K boundary detection grammar induction information joint mutual probability
%P 984--989
%T Parsing a Natural Language Using Mutual Information Statistics
%U http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.29.5592
%X The purpose of this paper is to characterize a constituent boundary parsing algorithm, using an information-theoretic measure called generalized mutual information, which serves as an alternative to traditional grammar-based parsing methods. This method is based on the hypothesis that constituent boundaries can be extracted from a given sentence (or word sequence) by analyzing the mutual information values of the part-ofspeech n-grams within the sentence. This hypothesis is supported by the performance of an implementation of this parsing algorithm which determines a recursive unlabeled bracketing of unrestricted English text with a relatively low error rate. This paper derives the generalized mutual information statistic, describes the parsing algorithm, and presents results and sample output from the parser. Introduction A standard approach to parsing a natural language is to characterize the language using a set of rules, a grammar. A grammar-based parsing algori...
@inproceedings{Magerman90parsinga,
abstract = {The purpose of this paper is to characterize a constituent boundary parsing algorithm, using an information-theoretic measure called generalized mutual information, which serves as an alternative to traditional grammar-based parsing methods. This method is based on the hypothesis that constituent boundaries can be extracted from a given sentence (or word sequence) by analyzing the mutual information values of the part-ofspeech n-grams within the sentence. This hypothesis is supported by the performance of an implementation of this parsing algorithm which determines a recursive unlabeled bracketing of unrestricted English text with a relatively low error rate. This paper derives the generalized mutual information statistic, describes the parsing algorithm, and presents results and sample output from the parser. Introduction A standard approach to parsing a natural language is to characterize the language using a set of rules, a grammar. A grammar-based parsing algori...},
added-at = {2011-07-29T21:15:38.000+0200},
author = {Magerman, David M. and Marcus, Mitchell P.},
biburl = {https://www.bibsonomy.org/bibtex/2e2a5d01938c597f896d1f6c6279b315a/jil},
description = {Parsing a Natural Language Using Mutual Information Statistics},
interhash = {c6fe384c0b18ce1000ec6f494a2ae54d},
intrahash = {e2a5d01938c597f896d1f6c6279b315a},
keywords = {boundary detection grammar induction information joint mutual probability},
pages = {984--989},
timestamp = {2013-11-23T20:11:51.000+0100},
title = {Parsing a Natural Language Using Mutual Information Statistics},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.29.5592},
year = 1990
}