We present a methodology combining surface NLP and Machine Learning techniques for ranking asbtracts and generating summaries based on annotated corpora. The corpora were annotated with meta-semantic tags indicating the category of information a sentence is bearing (objective, findings, newthing, hypothesis, conclusion, future work, related work). The annotated corpus is fed into an automatic summarizer for query-oriented abstract ranking and multi- abstract summarization. To adapt the summarizer to these two tasks, two novel weighting functions were devised in order to take into account the distribution of the tags in the corpus. Results, although still preliminary, are encouraging us to pursue this line of work and find better ways of building IR systems that can take into account semantic annotations in a corpus.
%0 Conference Paper
%1 Ibekwe-Sanjuan:2008
%A Ibekwe-Sanjuan, Fidelia
%A Silvia, Fernandez
%A Eric, Sanjuan
%A Eric, Charton
%B ECIR'08 Workshop on: Exploiting Semantic Annotations for Information Retrieval
%C Glasgow
%D 2008
%K text_categorisation summarisation EBM,inf_retrieval biomedical
%P 14
%T Annotation of Scientific Summaries for Information Retrieval
%U http://arxiv.org/abs/1110.5722
%X We present a methodology combining surface NLP and Machine Learning techniques for ranking asbtracts and generating summaries based on annotated corpora. The corpora were annotated with meta-semantic tags indicating the category of information a sentence is bearing (objective, findings, newthing, hypothesis, conclusion, future work, related work). The annotated corpus is fed into an automatic summarizer for query-oriented abstract ranking and multi- abstract summarization. To adapt the summarizer to these two tasks, two novel weighting functions were devised in order to take into account the distribution of the tags in the corpus. Results, although still preliminary, are encouraging us to pursue this line of work and find better ways of building IR systems that can take into account semantic annotations in a corpus.
@inproceedings{Ibekwe-Sanjuan:2008,
abstract = {We present a methodology combining surface NLP and Machine Learning techniques for ranking asbtracts and generating summaries based on annotated corpora. The corpora were annotated with meta-semantic tags indicating the category of information a sentence is bearing (objective, findings, newthing, hypothesis, conclusion, future work, related work). The annotated corpus is fed into an automatic summarizer for query-oriented abstract ranking and multi- abstract summarization. To adapt the summarizer to these two tasks, two novel weighting functions were devised in order to take into account the distribution of the tags in the corpus. Results, although still preliminary, are encouraging us to pursue this line of work and find better ways of building IR systems that can take into account semantic annotations in a corpus.},
added-at = {2011-10-28T09:19:50.000+0200},
address = {Glasgow},
author = {Ibekwe-Sanjuan, Fidelia and Silvia, Fernandez and Eric, Sanjuan and Eric, Charton},
biburl = {https://www.bibsonomy.org/bibtex/2b20cf8edf0e3fca373328bc564ce75bd/diego_ma},
booktitle = {ECIR'08 Workshop on: Exploiting Semantic Annotations for Information Retrieval},
interhash = {c98454a960afc8fa501120904dcc24b8},
intrahash = {b20cf8edf0e3fca373328bc564ce75bd},
keywords = {text_categorisation summarisation EBM,inf_retrieval biomedical},
pages = 14,
timestamp = {2011-10-28T09:19:50.000+0200},
title = {Annotation of Scientific Summaries for Information Retrieval},
url = {http://arxiv.org/abs/1110.5722},
year = 2008
}