@inproceedings{Ibekwe-Sanjuan:2008, abstract = {We present a methodology combining surface NLP and Machine Learning techniques for ranking asbtracts and generating summaries based on annotated corpora. The corpora were annotated with meta-semantic tags indicating the category of information a sentence is bearing (objective, findings, newthing, hypothesis, conclusion, future work, related work). The annotated corpus is fed into an automatic summarizer for query-oriented abstract ranking and multi- abstract summarization. To adapt the summarizer to these two tasks, two novel weighting functions were devised in order to take into account the distribution of the tags in the corpus. Results, although still preliminary, are encouraging us to pursue this line of work and find better ways of building IR systems that can take into account semantic annotations in a corpus.}, added-at = {2011-10-28T09:19:50.000+0200}, address = {Glasgow}, author = {Ibekwe-Sanjuan, Fidelia and Silvia, Fernandez and Eric, Sanjuan and Eric, Charton}, biburl = {http://www.bibsonomy.org/bibtex/2b20cf8edf0e3fca373328bc564ce75bd/diego_ma}, booktitle = {ECIR'08 Workshop on: Exploiting Semantic Annotations for Information Retrieval}, interhash = {c98454a960afc8fa501120904dcc24b8}, intrahash = {b20cf8edf0e3fca373328bc564ce75bd}, keywords = {text_categorisation summarisation EBM,inf_retrieval biomedical}, pages = 14, timestamp = {2011-10-28T09:19:50.000+0200}, title = {Annotation of Scientific Summaries for Information Retrieval}, url = {http://arxiv.org/abs/1110.5722}, year = 2008 } @inproceedings{Daume:2006, abstract = {We present BAYESUM (for ???Bayesian summarization???), a model for sentence extraction in query-focused summarization. BAYESUM leverages the common case in which multiple documents are relevant to a single query. Using these documents as reinforcement for query terms, BAYESUM is not afflicted by the paucity of information in short queries. We show that approximate inference in BAYESUM is possible on large data sets and results in a stateof-the-art summarization system. Furthermore, we show how BAYESUM can be understood as a justified query expansion technique in the language modeling for IR framework.}, added-at = {2011-08-05T10:08:44.000+0200}, author = {Ill, Hal Daum{\'e} and Marcu, Daniel}, biburl = {http://www.bibsonomy.org/bibtex/250b0b54c7a9a2e56b3ef3e95142c753b/diego_ma}, crossref = {ZZZ-COLINGACL:2006}, interhash = {fba42b320e9d083f1708c36262c30877}, intrahash = {50b0b54c7a9a2e56b3ef3e95142c753b}, keywords = {summarisation}, library = {Mine (October 2006)}, pages = {305--312}, review = {Key idea: 1. Extract and GENERALISE patterns. The patterns are generalised by creating word classes on the basis of their distributional similarity. 2. Validate the extracted patterns. The patterns are ranked by examining the frequencies of words in their prefix, infix and postfix. Candidate facts are ranked by checking whether they belong to some class as known (seed) facts.}, timestamp = {2011-08-05T10:08:44.000+0200}, title = {Bayesian Query-Focused Summarization}, url = {http://www.isi.edu/\~{}marcu/papers.html}, year = 2006 } @inproceedings{Gillick:2010, abstract = {We provide evidence that intrinsic evaluation of summaries using Amazon’s Mechanical Turk is quite difficult. Experiments mirroring evaluation at the Text Analysis Conference’s summarization track show that nonexpert judges are not able to recover system rankings derived from experts.}, added-at = {2011-08-05T09:25:15.000+0200}, author = {Gillick, Dan and Liu, Yang}, biburl = {http://www.bibsonomy.org/bibtex/2e349ad3abc82bbbd253bc5d94fdd20e7/diego_ma}, booktitle = {Proceedings NAACL HLT 2010 Workshop on Creating Speech and Language Data with Amazon's Mechanical Turk}, interhash = {82fe8aa6efa6c742f04c207373ef284e}, intrahash = {e349ad3abc82bbbd253bc5d94fdd20e7}, keywords = {summarisation mechanical_turk}, library = {Bibsonomy, MQRDG2010 (August 2011)}, pages = {148-151}, timestamp = {2011-08-05T09:25:15.000+0200}, title = {Non-Expert Evaluation of Summarization Systems is Risky}, year = 2010 } @inproceedings{Molla:2010, abstract = {In this paper we motivate the need for a corpus for the development and testing of summarisation systems for evidence-based medicine. We describe the corpus which we are currently creating, and show its applicability by evaluating several simple query-based summarisation techniques using a small fragment of the corpus.}, added-at = {2011-01-19T05:42:18.000+0100}, author = {Moll{\'a}, Diego}, biburl = {http://www.bibsonomy.org/bibtex/2f67220713d3a4fa275ded7792d1ff320/diego_ma}, booktitle = {Proceedings of the Australasian Language Technology Workshop}, interhash = {5d7febb52df09e9aaea0e00a02c93dc0}, intrahash = {f67220713d3a4fa275ded7792d1ff320}, keywords = {molla_medicalnlp corpora summarisation molla_publication}, library = {Webpage (Jan 2011)}, pages = {76-80}, timestamp = {2011-01-19T05:42:18.000+0100}, title = {A Corpus for Evidence Based Medicine Summarisation}, url = {http://www.alta.asn.au/events/alta2010/proceedings/index.html}, volume = 8, year = 2010 } @article{Barzilay:2005, abstract = {A system that can produce informative summaries, highlighting common information found in many online documents, will help Web users to pinpoint information that they need without extensive reading. In this article, we introduce sentence fusion, a novel text-to-text generation technique for synthesizing common information across documents. Sentence fusion involves bottom-up local multisequence alignment to identify phrases conveying similar information and statistical generation to combine common phrases into a sentence. Sentence fusion moves the summarization field from the use of purely extractive methods to the generation of abstracts that contain sentences not found in any of the input documents and can synthesize information across sources.}, added-at = {2010-05-28T07:51:03.000+0200}, author = {Barzilay, Regina and McKeown, Kathleen R.}, biburl = {http://www.bibsonomy.org/bibtex/2677204ee6eac41e7b1dea8ca8cbbdd67/diego_ma}, interhash = {a656df5b0a2be86abc9c4efb1b9f1972}, intrahash = {677204ee6eac41e7b1dea8ca8cbbdd67}, journal = {Computational Linguistics}, keywords = {summarisation}, library = {Mine (March 2010)}, month = {September}, number = 3, pages = {297--328}, review = {This is a seminal paper on multi-document summarisation. The domain is news, and the approach follows the following steps: 1. Cluster the sentences by defining a WordNet-based sentence similarity measure. Each cluster is called a theme. 2. Rank the themes and select the top n. 3. Order the themes based on the timestamp of the earliest sentence of each theme. 4. Apply sentence fusion by following these steps: a. Identify the information based between the sentences in the theme. This is done by aligning the dependency trees and select the tree fragments in common. b. Select the most important sentence and use its tree as the starting point c. Expand the tree by attaching nodes from other sentences in the theme d. Remove the tree fragments that have not been used, always keeping sentence grammaticality e. Generate a sentence by producing all possible sentences and selecting the one with lowest entropy according to a reference corpus Several aspects of the sentence fusion method are very interesting and worth exploring for: - EBM-based summarisation - Fingerprinting of groups of documents (ARC Linkage with Topedia) - Graph-based QA}, timestamp = {2010-05-28T07:51:03.000+0200}, title = {Sentence Fusion for Multidocument News Summarization}, volume = 31, year = 2005 } @inproceedings{Wan:2009c, added-at = {2010-02-04T04:12:52.000+0100}, address = {Athens, Greece}, author = {Wan, Stephen and Dras, Mark and Dale, Robert and Paris, C{\'e}cile}, biburl = {http://www.bibsonomy.org/bibtex/2c20b8947606623244cd6e574a69af71e/diego_ma}, booktitle = {Proceedings of Conference of the European Chapter of the Association for Computational Linguistics(EACL 2009)}, interhash = {1bafdb72f2d33ec0a5e95f0310592a55}, intrahash = {c20b8947606623244cd6e574a69af71e}, keywords = {summarisation}, timestamp = {2010-02-04T04:12:52.000+0100}, title = {Improving Grammaticality in Statistical Sentence Generation: Introducing a Dependency Spanning Tree Algorithm with an Argument Satisfaction Model}, year = 2009 } @inproceedings{Wan:2009b, abstract = {The web has become a major source of information to learn about a topic. With the continuous growth of information and its high connectivity, it is hard to follow only the links that are relevant and not to get lost in hyperspace. Our aim is to support people who read documents in a highly connected information space, helping them remain on focus. Our contextually-aware in-browser text summarisation tool, IBES, does this by capturing users? current interests and providing users with contextualised summaries of linked documents, to help them decide whether the link is worth following.}, added-at = {2010-02-04T04:07:35.000+0100}, author = {Paris, C. and Wan, S.}, biburl = {http://www.bibsonomy.org/bibtex/2eb94394e92ff00a46204ac735d6adb89/diego_ma}, booktitle = {Proceedings of the International Conference on User Modelling, Adaptation and Presentation (UMAP 2009)}, interhash = {788251a2cdd46a9a8b549f40c860e3cb}, intrahash = {eb94394e92ff00a46204ac735d6adb89}, keywords = {summarisation}, timestamp = {2010-02-04T04:07:35.000+0100}, title = {Capturing the User's Reading Context for Tailoring Summaries}, url = {http://www.ict.csiro.au/staff/cecile.paris/distribution/Paris-Wan-Final-UMAP09.pdf}, year = 2009 } @inproceedings{Wan:2009, added-at = {2010-02-04T02:56:27.000+0100}, address = {Austin, Texas}, author = {Wan, Stephen and Paris, C{\'e}cile and Dale, Robert}, biburl = {http://www.bibsonomy.org/bibtex/2d944546bd20383c058a8c2c2a1dd59db/diego_ma}, booktitle = {Proceedings of the 2009 Joint Conference on Digital Libraries}, interhash = {4b05d6b780f4cb08266b4ba4d93661f3}, intrahash = {d944546bd20383c058a8c2c2a1dd59db}, keywords = {summarisation}, pages = {59-69}, timestamp = {2010-02-04T02:56:27.000+0100}, title = {Whetting the Appetite of Scientists: Producing Summaries Tailored to the Citation Context}, year = 2009 } @inproceedings{Lawrie:2001, added-at = {2010-02-03T05:39:40.000+0100}, address = {New Orleans, Louisiana, USA}, author = {Lawrie, Dawn and Croft, W. Bruce and Rosenberg, Arnold}, biburl = {http://www.bibsonomy.org/bibtex/2bf433c7d3f8fcbcf50b564c646356c66/diego_ma}, booktitle = {Proceedings of SIGIR 2001}, interhash = {ae5883523c59daa402eeb89710964bdf}, intrahash = {bf433c7d3f8fcbcf50b564c646356c66}, keywords = {summarisation}, pages = {349--357}, timestamp = {2010-02-03T05:39:40.000+0100}, title = {Finding Topic Words for Hierarchical Summarization}, year = 2001 } @inproceedings{Barzilay:1997, added-at = {2010-02-03T05:37:58.000+0100}, author = {Barzilay, Regina and Elhadad, Michael}, biburl = {http://www.bibsonomy.org/bibtex/20be4f21418a1236d4225a54359455331/diego_ma}, booktitle = {Proceedings of the ACL/EACL 1997 Workshop on Intelligent Scalable Text Summarization}, interhash = {11f9897a59e14e17ac4ffb091679c07a}, intrahash = {0be4f21418a1236d4225a54359455331}, keywords = {summarisation}, pages = {10--17}, timestamp = {2010-02-03T05:37:58.000+0100}, title = {Using lexical chains for text summarization}, year = 1997 } @inproceedings{DAvanzo:2005, added-at = {2010-02-03T05:33:46.000+0100}, author = {D\'Avanzo, Ernesto and Magnini, Bernado}, biburl = {http://www.bibsonomy.org/bibtex/23d8c8f82f1f12bf80f4096f8d867c50c/diego_ma}, booktitle = {Proceedings of Document Understanding Conferences}, interhash = {3e6494c263857ed3d157d8a188321311}, intrahash = {3d8c8f82f1f12bf80f4096f8d867c50c}, keywords = {summarisation}, pages = {6-8}, timestamp = {2010-02-03T05:33:46.000+0100}, title = {A Keyphrase-Based Approach to Summarization:the LAKE System}, year = 2005 } @inproceedings{Tratz:2008, abstract = {This paper describes BEwT­E (Basic Elements with Transformations for Evaluation),   an automatic system for summarization evaluation. BEwT­E is a new, more sophisticated implementation of the BE framework that uses transformations to match BEs (minimal­length syntactically well­formed units) that are lexically different yet semantically similar. We demonstrate the effectiveness of BEwT­E using DUC and TAC datasets.}, added-at = {2009-10-21T23:12:15.000+0200}, author = {Tratz, Stephen and Hovy, Eduard}, biburl = {http://www.bibsonomy.org/bibtex/2ba193d28464708238482c6a4d22940b5/diego_ma}, booktitle = {Proceedings TAC 2008}, interhash = {207e8bec56b853e4105cc25c9e0ed5d9}, intrahash = {ba193d28464708238482c6a4d22940b5}, keywords = {TAC summarisation}, pages = {10 pages}, publisher = {NIST}, timestamp = {2009-10-21T23:12:15.000+0200}, title = {Summarisation Evaluation Using Transformed Basic Elements}, url = {http://www.nist.gov/tac/publications/2008/additional.papers/ISI.proceedings.pdf}, year = 2008 } @inproceedings{Tatar:2008, added-at = {2009-10-21T23:09:17.000+0200}, author = {Tatar, Doina and Tamaianu-Morita, Emma and Mihis, Andrea and Lupsa, Dana}, biburl = {http://www.bibsonomy.org/bibtex/2b5c8a085ff03f87be225f07183576d03/diego_ma}, booktitle = {Proceedings CICLING 2008}, interhash = {7b7aa4a6f746aed3ac12c2f66746e31e}, intrahash = {b5c8a085ff03f87be225f07183576d03}, keywords = {summarisation entailment}, pages = {15-26}, timestamp = {2009-10-21T23:09:17.000+0200}, title = {Summarization by Logic Segmentation and Text Entailment}, url = {http://www.cicling.org/2008/RCS-vol-33/02-Tatar.pdf}, volume = 33, year = 2008 } @article{Harabagiu:2007, abstract = {Generating summaries that meet the information needs of a user relies on (1) several forms of question decomposition; (2) different summarization approaches; and (3) textual inference for combining the summarization strategies. This novel framework for summarization has the advantage of producing highly responsive summaries, as indicated by the evaluation results.}, added-at = {2009-10-21T23:09:17.000+0200}, author = {Harabagiu, Sandra and Hickl, Andrew and Lacatusu, Finley}, biburl = {http://www.bibsonomy.org/bibtex/2dffae72016582208b1c20bb067790f93/diego_ma}, interhash = {30f97c12497058459d13feb3784fb993}, intrahash = {dffae72016582208b1c20bb067790f93}, journal = {Information Processing \& Management}, keywords = {summarisation question_answering}, pages = {1619-1642}, timestamp = {2009-10-21T23:09:17.000+0200}, title = {Satisfying Information Needs with Multi-document Summaries}, url = {http://portal.acm.org/citation.cfm?id=1285158}, volume = 43, year = 2007 } @inproceedings{Demner-Fushman:2006, added-at = {2009-08-28T21:47:37.000+0200}, author = {Demner-Fushman, Dina and Lin, Jimmy}, biburl = {http://www.bibsonomy.org/bibtex/29b9991ab29887cfb95d1e3aa30921a5a/diego_ma}, booktitle = {Proceedings ACL}, date = {2006-11-03}, ee = {http://acl.ldc.upenn.edu/P/P06/P06-1106.pdf}, interhash = {547a66cdb2c9f3ff1781443740b1dd85}, intrahash = {9b9991ab29887cfb95d1e3aa30921a5a}, keywords = {clinical clustering question_answering summarisation}, publisher = {The Association for Computer Linguistics}, timestamp = {2009-08-28T21:47:37.000+0200}, title = {Answer Extraction, Semantic Clustering, and Extractive Summarization for Clinical Question Answering.}, url = {http://dblp.uni-trier.de/db/conf/acl/acl2006.html\#Demner-FushmanL06}, year = 2006 } @inproceedings{Dang:2008b, added-at = {2009-08-17T09:24:28.000+0200}, author = {Dang, Hoa Tran}, biburl = {http://www.bibsonomy.org/bibtex/21be0ee9a7553672c1912fdb3d2496121/diego_ma}, booktitle = {Proc. TAC 2008}, interhash = {d2441a48504ae8b04119ecc7f6a13398}, intrahash = {1be0ee9a7553672c1912fdb3d2496121}, keywords = {summarisation question_answering}, library = {Unknown (August 2009)}, timestamp = {2009-08-17T09:24:28.000+0200}, title = {Overview of the TAC 2008 Opinion Question Answering and Summarization Tasks}, year = 2008 } @unpublished{Teufel:2001, abstract = {In this article we propose a strategy for the summarization of scientific articles that concentrates on the rhetorical status of statements in an article: Material for summaries is selected in such a way that summaries can highlight the new contribution of the source article and situate it with respect to earlier work. We provide a gold standard for summaries of this kind consisting of a substantial corpus of conference articles in computational linguistics annotated with human judgements of the rhetorical status and relevance of each sentence in the articles. We present several experiments measuring our jugdes' agreement on these annotations. We also present an algorithm that, on the basis of the annotated training material, selects content from unseen articles and classifies it into a fixed sed of seven rhetorical categories. The output of this extraction and classification system can be viewed as a single-document summary in its own right; alternatively, it provides starting material for the generation of task-oriented and user-tailored summaries designed to give users an overview of a scientific field.}, added-at = {2009-07-06T09:40:30.000+0200}, author = {Teufel, Simone and Moens, Marc}, biburl = {http://www.bibsonomy.org/bibtex/2d65e400ab44e2720d1045c945dccbea9/diego_ma}, interhash = {bb76cf761cc2e2c342b001968b5b9d02}, intrahash = {d65e400ab44e2720d1045c945dccbea9}, keywords = {rhetorical_theory RTS summarisation}, library = {Mine (from Robert Dale), Mar'01}, note = {Draft}, timestamp = {2009-07-06T09:40:30.000+0200}, title = {Summarizing Scientific Articles --- Experiments with Relevance and Rhetorical Status}, year = 2001 } @article{Lee:2009, abstract = {In existing supervised methods, Latent Semantic Analysis (LSA) is used for sentence selection. However, the obtained results are less meaningful, because singular vectors are used as the bases for sentence selection from given documents, and singular vector components can have negative values. We propose a new unsupervised method using Non-negative Matrix Factorization (NMF) to select sentences for automatic generic document summarization. The proposed method uses non-negative constraints, which are more similar to the human cognition process. As a result, the method selects more meaningful sencntes for generic document summarization than those selected using LSA.}, added-at = {2009-07-06T09:33:52.000+0200}, author = {Lee, Ju-Hong and Park, Sun and Ahn, Chan-Min and Kim, Daeho}, biburl = {http://www.bibsonomy.org/bibtex/2e698e93ee643defc688e13c34eb9d7c1/diego_ma}, interhash = {2dd338667e30cc6ac0f9e6b79f827bb9}, intrahash = {e698e93ee643defc688e13c34eb9d7c1}, journal = {Information Processing \& Management}, keywords = {document_summarisation summarisation}, library = {Mine (July 2009)}, pages = {20-34}, timestamp = {2009-07-06T09:33:52.000+0200}, title = {Automatic Generic Document Summarization Based on Non-negative Matrix Factorization}, volume = 45, year = 2009 } @inproceedings{Tran:2006, abstract = {The DUC 2006 summarization task was to synthesize from a set of 25 documents a well-organized, fluent answer to a complex question. The task and evaluation measures were basically the same as in DUC 2005, except that an additional "overall" responsiveness measure was added which took into account both content and readability of the summary. The average performance of systems in 2006 was noticeably better than in 2005; systems achieved better focus on average, and many attempted to provide greater coherence to their summaries. The overall responsiveness metric showed that readability plays an important role in the perceived quality of the summaries.}, added-at = {2009-03-27T08:55:30.000+0100}, author = {Dang, Hoa Tran}, biburl = {http://www.bibsonomy.org/bibtex/20e4331ab0017a9e8d88e11a37d6f40a5/diego_ma}, booktitle = {Proc. Document Understanding Workshop}, interhash = {937ab9aecbfff257efb089a00e11e54d}, intrahash = {0e4331ab0017a9e8d88e11a37d6f40a5}, keywords = {summarisation}, library = {Web (March 2009)}, organization = {NIST}, pages = {10 pages}, timestamp = {2009-03-27T08:55:30.000+0100}, title = {Overview of {DUC} 2006}, url = {http://duc.nist.gov/pubs.html#2006}, year = 2006 } @article{Zhao:2009, abstract = {This paper presents a novel query expansion method, which is combined in the graph-based algorithm for query-focused multi-document summarization, so as to resolve the problem of information limit in the original query. Our approach makes use of both the sentence-to-sentence relations and the sentence-to-word relations to select the query biased informative words from the document set and use them as query expansions to improve the sentence ranking result. Compared to previous query expansion approaches, our approach can capture more relevant information with less noise. We performed experiments on the data of document understanding conference (DUC) 2005 and DUC 2006, and the evaluation results show that the proposed query expansion method can significantly improve the system performance and make our system comparable to the state-of-the-art systems.}, added-at = {2009-02-12T00:28:46.000+0100}, author = {Zhao, Lin and Wu, Lide and Huang, Xuanjing}, biburl = {http://www.bibsonomy.org/bibtex/2de1dc83f8e8cd4ef34ab24d1892ce125/diego_ma}, interhash = {512497c53926924a79fc0b459da7be29}, intrahash = {de1dc83f8e8cd4ef34ab24d1892ce125}, journal = {Information Processing and Management}, keywords = {summarisation question_answering graphs}, library = {Mine (Feb 2009)}, misc = {date = {2009-01-15}, ee = {http://dx.doi.org/10.1016/j.ipm.2008.07.001}}, number = 1, pages = {35-41}, timestamp = {2009-02-12T00:28:46.000+0100}, title = {Using query expansion in graph-based approach for query-focused multi-document summarization.}, url = {http://dblp.uni-trier.de/db/journals/ipm/ipm45.html#ZhaoWH09}, volume = 45, year = 2009 }