@inproceedings{Turmo:2007, abstract = {This paper describes QAST, a pilot track of CLEF 2007 aimed at evaluating the task of Question Answering in Speech Transcripts. The paper summarizes the evaluation framework, the systems that participated and the results achieved. These results have shown that question answering technology can be useful to deal with spontaneous speech transcripts, so for manually transcribed speech as for automatically recognized speech. The loss in accuracy from dealing with manual transcripts to dealing with automatic ones implies that there is room for future reseach in this area.}, added-at = {2009-03-27T09:05:20.000+0100}, address = {Berlin / Heidelberg}, author = {Turmo, Jordi and Comas, Pere R. and Ayache, Christelle and Mostefa, Djamel and Rosset, Sophie and Lamel, Lori}, biburl = {http://www.bibsonomy.org/bibtex/27a93e5480645ac89d78ec7bb158b35b1/diego_ma}, booktitle = {Advances in Multilingual and Multimodal Information Retrieval}, doi = {10.1007/978-3-540-85760-0}, interhash = {3236a27ba9183e4421ee05eba183d532}, intrahash = {7a93e5480645ac89d78ec7bb158b35b1}, keywords = {speech question_answering}, library = {Web (March 2009)}, pages = {249-256}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, timestamp = {2009-03-27T09:05:20.000+0100}, title = {Overview of {QAST 2007}}, url = {http://www.springerlink.com/content/d511437177k776q0/}, volume = {5152/2008}, year = 2007 } @inproceedings{Moll'a:2007, abstract = {Question answering on speech transcripts (QAst) is a pilot track of the CLEF competition. In this paper we present our contribution to QAst, which is centred on a study of Named Entity (NE) recognition on speech transcripts, and how it impacts on the accuracy of the final question answering system. We have ported AFNER, the NE recogniser of the AnswerFinder question-answering project, to the set of answer types expected in the QAst track. AFNER uses a combination of regular expressions, lists of names (gazetteers) and machine learning to find NeWS in the data. The machine learning component was trained on a development set of the AMI corpus. In the process we identified various problems with scalability of the system and the existence of errors of the extracted annotation, which lead to relatively poor performance in general. Performance was yet comparable with state of the art, and the system was second (out of three participants) in one of the QAst subtasks.}, added-at = {2008-01-29T07:21:37.000+0100}, author = {Moll{\'a}, Diego and van Zaanen, Menno and Cassidy, Steve}, biburl = {http://www.bibsonomy.org/bibtex/2202b97875a0ca06dba67da3f7febfc86/diego_ma}, booktitle = {Proc. ALTW 2007}, editor = {Colineau, Nathalie and Dras, Mark}, interhash = {2606e7117807640622c8c6ff801b2cf2}, intrahash = {202b97875a0ca06dba67da3f7febfc86}, keywords = {AnswerFinder named_entities speech molla_publication}, pages = {57-65}, timestamp = {2008-01-29T07:21:37.000+0100}, title = {Named Entity Recognition in Question Answering of Speech Data}, url = {http://www.alta.asn.au/events/altw2007/cdrom/index.html}, volume = 5, year = 2007 } @inproceedings{Waibel:2001, added-at = {2007-12-14T02:47:57.000+0100}, author = {Waibel, Alex and Ben, Michael and Metze, Florian and Ries, Klaus and Schaaf, Thomas and Schultz, Tanja and Soltau, Hagen and Yu, Hua and Zechner, Klaus}, biburl = {http://www.bibsonomy.org/bibtex/242ac1cbf4626a8dea0a229f643aafed4/diego_ma}, booktitle = {Proc. ICASSP 2001}, interhash = {66695b0593066e34a2b55c7b38daaf75}, intrahash = {42ac1cbf4626a8dea0a229f643aafed4}, keywords = {speech}, timestamp = {2007-12-14T02:47:57.000+0100}, title = {Advances in Automatic Meeting Record Creation and Access}, year = 2001 } @inproceedings{Surdeanu:2005, abstract = {This paper presents an analysis of named entity recognition and classification in spontaneous speech transcripts. We annotated a significant fraction of the Switchboard corpus with six named entity classes and investigated a battery of machine learning models that include lexical, syntactic, and semantic attributes. The best recognition and classification model obtains promising results, approaching within 5% a system evaluated on clean textual data.}, added-at = {2007-12-14T02:47:08.000+0100}, address = {Lisbon}, author = {Surdeanu, Mihai and Turmo, Jordi and Comelles, Eli}, biburl = {http://www.bibsonomy.org/bibtex/22b0790ca9fa1c810363f5df441fc1254/diego_ma}, booktitle = {Proceedings Interspeech-05}, interhash = {5c1e7cca18b3a3a40a8456f8bbf44519}, intrahash = {2b0790ca9fa1c810363f5df441fc1254}, keywords = {named_entities speech}, timestamp = {2007-12-14T02:47:08.000+0100}, title = {Named Entity Recognition from Spontaneous Open-Domain Speech}, url = {http://www.lsi.upc.edu/~comelles/}, year = 2005 } @incollection{Smeaton:2001b, abstract = {In this chapter we examine various techniques for providing content access to information stored in a continuous medium, namely digital audio and digital video. Our coverage of audio is centered around post-processing the output of automatic recognition of speech or phones and we describe the various approaches that have been taken in this area. In order to give reasonable coverage of the possibilities and limitations of content-based access to digital video information we sketch out at a high level, the approaches taken in various video compression algorithms, principally the MPEG family.. We then address approaches to shot and scene boundary detection, choosing representative frames for browsing and for search, and various browsing interfaces that have been developed. We finish with an overview of the likely developments in this area in the future.}, added-at = {2007-12-14T02:46:43.000+0100}, author = {Smeaton, Alan F.}, biburl = {http://www.bibsonomy.org/bibtex/2367f6005012e81d06a91489496bf0263/diego_ma}, booktitle = {Lectures on Information Retrieval}, interhash = {40421c511b2df08c03290e4f049cd97e}, intrahash = {367f6005012e81d06a91489496bf0263}, keywords = {inf_retrieval speech video}, pages = {93-110}, publisher = {Springer-Verlag}, series = {Lecture Notes in Computer Science}, timestamp = {2007-12-14T02:46:43.000+0100}, title = {Indexing, Browsing and Searching of Digital Video and Digital Audio Information}, year = 2001 } @article{Rudnicky:1994, abstract = {Speech recognition and speech synthesis are technologies of particular interest for their support of direct communication between humans and computers through a communications mode humans commonly use among themselves and at which they are highly skilled. Both manipulate speech in terms of its information content; recognition transforms human speech into text to be used literally (e.g., for dictation) or interpreted as commands to control applications, and synthesis allows the generation of spoken utterances from text.}, added-at = {2007-12-14T02:46:02.000+0100}, author = {Rudnicky, Alexander I. and Hauptmann, Alexander G. and Lee, Kai-Fu}, biburl = {http://www.bibsonomy.org/bibtex/24ed799d4431231eb901d366520b7b231/diego_ma}, interhash = {f0007f22ab5c7f9398482d12121ba894}, intrahash = {4ed799d4431231eb901d366520b7b231}, journal = {Communications of the ACM}, keywords = {speech}, number = 3, pages = {52-57}, timestamp = {2007-12-14T02:46:02.000+0100}, title = {Survey of Current Speech Technology}, volume = 37, year = 1994 } @article{Niu:2004, abstract = {One challenge in text processing is the treatment of case insensitive documents such as speech recognition results. The traditional approach is to re-train a language model excluding case-related features. This paper presents an alternative two-step approach whereby a preprocessing module (Step 1) is designed to restore case-sensitive form which is subsequently processed by the original system (Step 2). Step 1 is mainly implemented as a Hidden Markov Model trained on a large raw corpus of case sensitive documents. It is demonstrated that this approach (i) outperforms the feature exclusion approach for named entity tagging, (ii) leads to limited degradation for parsing, relationship extraction and case insensitive question answering, (iii) reduces system complexity, and (iv) has wide applicability: the restored text can be used in both statistical model and rule-based systems.}, added-at = {2007-12-14T02:44:29.000+0100}, author = {Niu, Cheng and Li, Wei and Jihong and Shrihari, Rohini}, biburl = {http://www.bibsonomy.org/bibtex/222f9837dac4a70418136ddfff25b241d/diego_ma}, interhash = {88217469f7bff9d742fcbcf5d3309b1d}, intrahash = {22f9837dac4a70418136ddfff25b241d}, journal = {International Journal on Artificial Intelligence Tools}, keywords = {named_entities question_answering speech}, number = 1, pages = {141-156}, timestamp = {2007-12-14T02:44:29.000+0100}, title = {Orthographic Case Restoration Using Supervised Learning without Manual Annotation}, url = {http://homepage.mac.com/liwei999/WeiLi/Publications.html}, volume = 13, year = 2004 } @article{Moreno:2002, abstract = {The authors suggest ways in which speech-based multimedia information retrieval technologies can evolve into full-fledged knowledge management systems in which audio, video, and images contribute as much as textual sources}, added-at = {2007-12-14T02:44:14.000+0100}, author = {Moreno, Pedro J. and Thong, J-M Van and Logan, Beth and Jones, Gareth J.F.}, biburl = {http://www.bibsonomy.org/bibtex/2853da4a16ae749b8256e19ac458685c6/diego_ma}, interhash = {077d1c097c8cdec0663c2ded3d023560}, intrahash = {853da4a16ae749b8256e19ac458685c6}, journal = {IEEE Computer}, keywords = {inf_retrieval speech multimedia}, number = 4, timestamp = {2007-12-14T02:44:14.000+0100}, title = {From Multimedia Retrieval to Knowledge Management}, volume = 35, year = 2002 } @article{McTear:2002, abstract = {Spoken dialogue systems allow users to interact with computer-based applications such as databases and expert systems by using natural spoken language. The origins of spoken dialogue systems can be traced back to Artificial Intelligence research in the 1950s concerned with developing conversational interfaces. However, it is only within the last decade or so, with major advances in speech technology, that large-scale working systems have been developed and, in some cases, introduced into commercial environments. As a result many major telecommunications and software companies have become aware of the potential for spoken dialogue technology to provide solutions in newly developing areas such as computer-telephony integration. Voice portals, which provide a speech-based interface between a telephone user and Web-based services, are the most recent application of spoken dialogue technology. This article describes the main components of the technology---speech recognition, language understanding, dialogue management, communication with an external source such as a database, language generation, speech synthesis---and shows how these component technologies can be integrated into a spoken dialogue system. The article describes in detail the methods that have been adopted in some well-known dialogue systems, explores different system architectures, considers issues of specification, design, and evaluation, reviews some currently available dialogue development toolkits, and outlines prospects for future development.}, added-at = {2007-12-14T02:43:14.000+0100}, author = {McTear, Michael F.}, biburl = {http://www.bibsonomy.org/bibtex/29d0c0e63dfcb696cf90b0981309a0b69/diego_ma}, interhash = {e808979df60268c15d5684ca9398c479}, intrahash = {9d0c0e63dfcb696cf90b0981309a0b69}, journal = {ACM Computing Surveys}, keywords = {speech dialogue_system}, number = 1, pages = {90-169}, timestamp = {2007-12-14T02:43:14.000+0100}, title = {Spoken Dialogue Technology: Enabling the Conversational User Interface}, url = {http://portal.acm.org/citation.cfm?id=505285}, volume = 34, year = 2002 } @inproceedings{Li:2003, abstract = {Most question answering (QA) systems rely on both keyword index and Named Entity (NE) tagging. The corpus from which the QA systems attempt to retrieve answers is usually mixed case text. However, there are numerous corpora that consist of case insensitive documents, e.g. speech recognition results. This paper presents a successful approach to QA on a case insensitive corpus, whereby a preprocessing module is designed to restore the case-sensitive form. The document pool with the restored case then feeds the QA system, which remains unchanged. The case restoration preprocessing is implemented as a Hidden Markov Model trained on a large raw corpus of case sensitive documents. It is demonstrated that this approach leads to very limited degradation in QA benchmarking (2.8%), mainly due to the limited degradation in the underlying information extraction support.}, added-at = {2007-12-14T02:42:23.000+0100}, author = {Li, Wei and Srihari, Rohini and Niu, Cheng and Li, Xiaoge}, biburl = {http://www.bibsonomy.org/bibtex/27f5c7977cedc4baf876d55e19e9e99b1/diego_ma}, booktitle = {Proc. ACL 2003 Workshop on Multilingual Summarization and Question Answering}, interhash = {dc531e7db11697f674851f5fe300015d}, intrahash = {7f5c7977cedc4baf876d55e19e9e99b1}, keywords = {question_answering speech}, pages = {84-93}, timestamp = {2007-12-14T02:42:23.000+0100}, title = {Question Answering on a Case Insensitive Corpus}, year = 2003 } @article{Leavitt:2002, added-at = {2007-12-14T02:42:12.000+0100}, author = {Leavitt, Neal}, biburl = {http://www.bibsonomy.org/bibtex/21f732440eb7a503828db1c6ae4b8eecf/diego_ma}, interhash = {fe3395e0b221e329ba0bc51c9b32a10d}, intrahash = {1f732440eb7a503828db1c6ae4b8eecf}, journal = {IEEE Computer}, keywords = {speech}, number = 10, pages = {23-25}, timestamp = {2007-12-14T02:42:12.000+0100}, title = {Let's Hear it for Audio Mining}, volume = 35, year = 2002 } @misc{Kramer:2002, added-at = {2007-12-14T02:41:47.000+0100}, author = {Kramer, Pamela}, biburl = {http://www.bibsonomy.org/bibtex/290f103cb00fcd31d7ec0268544a895a1/diego_ma}, howpublished = {\myurl{http://www.research.ibm.com/ thinkresearch/ pages/ 2002/ 20020411_meetingminer.shtml}}, interhash = {b5ef5502c9a369dd9e4caa2da3ab9ac2}, intrahash = {90f103cb00fcd31d7ec0268544a895a1}, keywords = {speech inf_retrieval}, note = {Last visited 6 February 2004}, timestamp = {2007-12-14T02:41:47.000+0100}, title = {Getting the Answers}, url = {http://www.research.ibm.com/thinkresearch/pages/2002/20020411_meetingminer.shtml}, year = 2002 } @article{Hori:2003, abstract = {This paper proposes a statistical approach to automatic speech summarization. In our method, a set of words maximizing a summarization score indicating the appropriateness of summarization is extracted from automatically transcribed speech and then concatenated to create a summary. The extraction process is performed using a dynamic programming (DP) technique based on a target compression ratio. In this paper, we demonstrate how an English news broadcast transcribed by a speech recognizer is automatically summarized. We adapted our method, which was originally proposed for Japanese, to English by modifying the model for estimating word concatenation probabilities based on a dependency structure in the original speech given by a stochastic dependency context free grammar (SDCFG). We also propose a method of summarizing multiple utterances using a two-level DP technique. The automatically summarized sentences are evaluated by summarization accuracy based on a comparison with a manual summary of speech that has been correctly transcribed by human subjects. Our experimental results indicate that the method we propose can effectively extract relatively important information and remove redundant and irrelevant information from English news broadcasts.}, added-at = {2007-12-14T02:40:45.000+0100}, author = {Hori, Chiori and Furui, Sadaoki and Malkin, Rob and Yu, Hua and Waibel, Alex}, biburl = {http://www.bibsonomy.org/bibtex/29bb0d14d65d2b1982f99d20c0eda83bb/diego_ma}, interhash = {b5e70c6734bff3925e4bf61230ee6a0f}, intrahash = {9bb0d14d65d2b1982f99d20c0eda83bb}, journal = {EURASIP Journal on Applied Signal Processing}, keywords = {speech summarisation}, pages = {1-12}, timestamp = {2007-12-14T02:40:45.000+0100}, title = {A Statistical Approach to Automatic Speech Summarization}, url = {http://asp.hindawi.com/volume-2003/S1110865703211112.html}, volume = 2, year = 2003 } @article{Gorin:2002, abstract = {The next generation of voice-based interface technology will enable easy-to-use automation of new and existing communication services, making human-machine interaction more natural.}, added-at = {2007-12-14T02:39:27.000+0100}, author = {Gorin, Allen L. and Abella, Alicia and Alonso, Tirso and Riccardi, Giuseppe and Wright, Jeremy H.}, biburl = {http://www.bibsonomy.org/bibtex/2a43fbdd1cd0f2de278eda8fbdb4886ee/diego_ma}, interhash = {89e6100d573df5a18574fa342ddc3fb3}, intrahash = {a43fbdd1cd0f2de278eda8fbdb4886ee}, journal = {IEEE Computer}, keywords = {speech dialogue_system}, number = 4, pages = {51-56}, timestamp = {2007-12-14T02:39:27.000+0100}, title = {Automated Natural Spoken Dialog}, volume = 35, year = 2002 } @article{Brown:2001, abstract = {Speech is a tantalizing mode of human communication. On the one hand, humans understand speech with ease and use speech to express complex ideas, information, and knowledge. On the other hand, automatic speech recognition with computers is very hard, and extracting knowledge from speech is even harder. Nevertheless, the potential reward for solving this problem drives us to pursue it. Before we can exploit speech as a knowledge resource, however, we must understand the current state of the art in speech recognition and the relevant, successful applications of speech recognition in the related areas of multimedia indexing and search. In this paper we advocate the study of speech as a knowledge resource, provide a brief introduction to the state of the art in speech recognition, describe a number of systems that use speech recognition to enable multimedia analysis, indexing, and search, and present a number of exploratory applications of speech recognition that move toward the goal of exploiting speech as a knowledge resource...}, added-at = {2007-12-14T02:36:42.000+0100}, author = {Brown, E. W. and Srinivasan, S. and Coden, A. and Ponceleon, D. and Cooper, J.W. and Amir, A.}, biburl = {http://www.bibsonomy.org/bibtex/241937fd5a5b3382411f8f8292800cc20/diego_ma}, interhash = {35e91a26af4ffff6f819adb0948f43d0}, intrahash = {41937fd5a5b3382411f8f8292800cc20}, journal = {IBM Systems Journal}, keywords = {speech inf_retrieval inf_extraction}, month = Dec, number = 4, pages = {985-1001}, timestamp = {2007-12-14T02:36:42.000+0100}, title = {Towards Speech as a Knowledge Resource}, url = {http://www.findarticles.com/cf_dls/m0ISJ/4_40/82373866/p1/article.jhtml}, volume = 40, year = 2001 } @misc{Brainhat:2001, added-at = {2007-12-14T02:36:28.000+0100}, author = {Brainhat}, biburl = {http://www.bibsonomy.org/bibtex/2d9ea38b39a6a70d9a1dc981dbc5b767b/diego_ma}, howpublished = {On-line publication}, interhash = {e4f23babceb33255cbd4b1563dd53604}, intrahash = {d9ea38b39a6a70d9a1dc981dbc5b767b}, keywords = {NLP speech}, timestamp = {2007-12-14T02:36:28.000+0100}, title = {Natural Language Processing to Improve Speech Recognition}, url = {http://www.brainhat.com/White\%20paper.pdf}, year = 2001 } @article{Bird:2001, abstract = {`Linguistic annotation' covers any descriptive or analytic notations applied to raw language data. The basic data may be in the form of time functions - audio, video and/or physiological recordings - or it may be textual. The added notations may include transcriptions of all sorts (from phonetic features to discourse structures), part-of-speech and sense tagging, syntactic analysis, `named entity' identification, co-reference annotation, and so on. While there are several ongoing efforts to provide formats and tools for such annotations and to publish annotated linguistic databases, the lack of widely accepted standards is becoming a critical problem. Proposed standards, to the extent they exist, have focused on file formats. This paper focuses instead on the logical structure of linguistic annotations. We survey a wide variety of existing annotation formats and demonstrate a common conceptual core, the annotation graph. This provides a formal framework for constructing, maintaining and searching linguistic annotations, while remaining consistent with many alternative data structures and file formats.}, added-at = {2007-12-14T02:36:05.000+0100}, author = {Bird, Steven and Liberman, Mark}, biburl = {http://www.bibsonomy.org/bibtex/2b6e21bef2c26443ad9baab6bd97fac24/diego_ma}, interhash = {22d110917b15427ac6de604fcaa6fd18}, intrahash = {b6e21bef2c26443ad9baab6bd97fac24}, journal = {Speech Communication}, keywords = {annotation speech}, number = {1,2}, pages = {23-60}, timestamp = {2007-12-14T02:36:05.000+0100}, title = {A Formal Framework for Linguistic Annotation}, url = {http://www.ldc.upenn.edu/sb/home/publications.html}, volume = 33, year = 2003 } @inproceedings{Appelt:1999, abstract = {This paper describes the application of the TextPro system to the task of recognition of named entities in speech. TextPro is a lightweight engine for interpreting cascaded finite-state transducers. Although originally intended for processing text, the experience of this evaluation demonstrates the system can easily be adapted to processing transcripts generated by a speech recognizer as well.}, added-at = {2007-12-14T02:35:35.000+0100}, author = {Appelt, Douglas E. and Martin, David}, biburl = {http://www.bibsonomy.org/bibtex/2a1cda268226866da44d71e407d59cd09/diego_ma}, booktitle = {Proc. DARPA Broadcast News Workshop}, interhash = {56f8ebc4d30e77de1f906879acd444a9}, intrahash = {a1cda268226866da44d71e407d59cd09}, keywords = {speech inf_extraction}, pages = {51-54}, timestamp = {2007-12-14T02:35:35.000+0100}, title = {Named Entity Recognition in Speech: Approach and Results Using the {TextPro} System}, url = {http://www.speech.sri.com/projects/sieve/publications.html}, year = 1999 }