Coherent texts are not just simple sequences of clauses and sentences, but rather complex artifacts that have highly elaborate rhetorical structure. This paper explores the extent to which well-formed rhetorical structures can be automatically derived by means of surface-form-based algorithms. These algorithms identify discourse usages of cue phrases and break sentences into clauses, hypothesize rhetorical relations that hold among textual units, and produce valid rhetorical structure trees for unrestricted natural language texts. The algorithms are empirically grounded in a corpus analysis of cue phrases and rely on a first-order formalization of rhetorical structure trees.The algorithms are evaluated both intrinsically and extrinsically. The intrinsic evaluation assesses the resemblance between automatically and manually constructed rhetorical structure trees. The extrinsic evaluation shows that automatically derived rhetorical structures can be successfully exploited in the context of text summarization.
This is a really interesting paper on how to produce a rhetoric parsing of a text.
A RST structure is hypothesised from dialogue markers, orthographical rules and mathematical constraints on the schema.
%0 Journal Article
%1 Marcu00rhetorical
%A Marcu, Daniel
%C Cambridge, MA, USA
%D 2000
%I MIT Press
%J Comput. Linguist.
%K linguistics, natural-language, parsing, rhetoric, rst
%N 3
%P 395--448
%R 10.1162/089120100561755
%T The rhetorical parsing of unrestricted texts: a surface-based approach
%U http://dx.doi.org/10.1162/089120100561755
%V 26
%X Coherent texts are not just simple sequences of clauses and sentences, but rather complex artifacts that have highly elaborate rhetorical structure. This paper explores the extent to which well-formed rhetorical structures can be automatically derived by means of surface-form-based algorithms. These algorithms identify discourse usages of cue phrases and break sentences into clauses, hypothesize rhetorical relations that hold among textual units, and produce valid rhetorical structure trees for unrestricted natural language texts. The algorithms are empirically grounded in a corpus analysis of cue phrases and rely on a first-order formalization of rhetorical structure trees.The algorithms are evaluated both intrinsically and extrinsically. The intrinsic evaluation assesses the resemblance between automatically and manually constructed rhetorical structure trees. The extrinsic evaluation shows that automatically derived rhetorical structures can be successfully exploited in the context of text summarization.
@article{Marcu00rhetorical,
abstract = {{Coherent texts are not just simple sequences of clauses and sentences, but rather complex artifacts that have highly elaborate rhetorical structure. This paper explores the extent to which well-formed rhetorical structures can be automatically derived by means of surface-form-based algorithms. These algorithms identify discourse usages of cue phrases and break sentences into clauses, hypothesize rhetorical relations that hold among textual units, and produce valid rhetorical structure trees for unrestricted natural language texts. The algorithms are empirically grounded in a corpus analysis of cue phrases and rely on a first-order formalization of rhetorical structure trees.The algorithms are evaluated both intrinsically and extrinsically. The intrinsic evaluation assesses the resemblance between automatically and manually constructed rhetorical structure trees. The extrinsic evaluation shows that automatically derived rhetorical structures can be successfully exploited in the context of text summarization.}},
added-at = {2010-12-17T18:47:41.000+0100},
address = {Cambridge, MA, USA},
author = {Marcu, Daniel},
biburl = {https://www.bibsonomy.org/bibtex/2b48b3c98df1af9aea303b8e6d36c9534/mortimer_m8},
citeulike-article-id = {149502},
citeulike-linkout-0 = {http://portal.acm.org/citation.cfm?id=971874},
citeulike-linkout-1 = {http://dx.doi.org/10.1162/089120100561755},
comment = {This is a really interesting paper on how to produce a rhetoric parsing of a text.
A RST structure is hypothesised from dialogue markers, orthographical rules and mathematical constraints on the schema.},
doi = {10.1162/089120100561755},
interhash = {b3c6b192a183fcdf7aa5b75945a1bd2d},
intrahash = {b48b3c98df1af9aea303b8e6d36c9534},
issn = {0891-2017},
journal = {Comput. Linguist.},
keywords = {linguistics, natural-language, parsing, rhetoric, rst},
month = {September},
number = 3,
pages = {395--448},
posted-at = {2005-04-05 11:09:04},
priority = {0},
publisher = {MIT Press},
timestamp = {2010-12-20T11:11:25.000+0100},
title = {{The rhetorical parsing of unrestricted texts: a surface-based approach}},
url = {http://dx.doi.org/10.1162/089120100561755},
volume = 26,
year = 2000
}