@book{Jacobs:1992, added-at = {2007-12-14T02:49:40.000+0100}, address = {Hillsdale (New Jersey), Hove and London}, biburl = {http://www.bibsonomy.org/bibtex/2054cc1a1012f52eded8daadb49f3fa8d/diego_ma}, editor = {Jacobs, Paul S.}, interhash = {e2bca8ba91aacabf78b9dbf16550ef98}, intrahash = {054cc1a1012f52eded8daadb49f3fa8d}, keywords = {inf_extraction inf_retrieval}, publisher = {Lawrence Erlbaum}, timestamp = {2007-12-14T02:49:40.000+0100}, title = {Text-based Intelligent Systems: current research and practice in information extraction and retrieval}, year = 1992 } @inproceedings{Shrihari:2000, abstract = {This paper discusses the use of our information extraction (IE) system, Textract, in the question answering (QA) track of the recently held TREC-8 tests. One of our major objectives is to examine how IE can help IR (Information Retrieval) in applications like QA. Our study shows: (i) IE can provide solid support for QA; (ii) low-level IE like Named Entity tagging is often a necessary component in handling most types of questions; (iii) a robust natural language shallow parser provides a structural basis for handling questions; (iv) high-level domain independent IE, i.e. extraction of multiple relationships and general events, is expected to bring about a breakthrough in QA.}, added-at = {2007-12-14T02:46:54.000+0100}, author = {Srihari, Rohini and Li, Wei}, biburl = {http://www.bibsonomy.org/bibtex/29df0d224595fff1f2ef1f62c4f9dd43f/diego_ma}, booktitle = {Proceedings TREC 8 (1999)}, interhash = {3fc98ec61b8ff27af5bfdf32eaf3926f}, intrahash = {9df0d224595fff1f2ef1f62c4f9dd43f}, keywords = {question_answering inf_extraction}, timestamp = {2007-12-14T02:46:54.000+0100}, title = {Information Extraction Supported Question Answering}, url = {http://trec.nist.gov/pubs/trec8/t8_proceedings.html}, year = 2000 } @misc{ZZZ-MUC7, added-at = {2007-12-14T02:44:16.000+0100}, address = {San Diego, CA}, author = {MUC-7}, biburl = {http://www.bibsonomy.org/bibtex/28f07b5ee8e9cd1fbaa65cee01466f81a/diego_ma}, booktitle = {Proc. {MUC}-7}, interhash = {c0b390d7f8c8e07419f50242ec78cac1}, intrahash = {8f07b5ee8e9cd1fbaa65cee01466f81a}, keywords = {inf_extraction}, note = {\myurl{http://www.muc.saic.com}}, organization = {SAIC}, timestamp = {2007-12-14T02:44:16.000+0100}, title = {Proc. of the Seventh Message Understanding Conference ({MUC}-7)}, url = {http://www.muc.saic.com}, year = 1998 } @incollection{Morgan:1996, added-at = {2007-12-14T02:44:15.000+0100}, address = {Los Altos, CA}, author = {Morgan, Richard and Garigliano, Roberto and Gallaghan, Paul and Poria, Sanjay and Smith, Mark and Urbanowicz, Agnieszka and Collingham, Russell and Costantino, Marco and Cooper, Chris}, biburl = {http://www.bibsonomy.org/bibtex/2b22909c40d24125b7ebe8d685f2f347d/diego_ma}, booktitle = {Proceedings of the 6th {ARPA} Message Understanding Conference}, editor = {ARPA}, interhash = {3a3e7b4c0754fa732380b1be5a734dea}, intrahash = {b22909c40d24125b7ebe8d685f2f347d}, keywords = {inf_extraction}, publisher = {Morgan Kaufmann}, timestamp = {2007-12-14T02:44:15.000+0100}, title = {University of {Durham}: description of the {LOLITA} system as used in {MUC}-6}, year = 1996 } @article{Mikheev:1999, abstract = {This paper reports on the development of a Named Entity recognition system developed fully within the xml paradigm.}, added-at = {2007-12-14T02:43:22.000+0100}, author = {Mikheev, Andrei and Grover, Claire and Moens, Marc}, biburl = {http://www.bibsonomy.org/bibtex/2405df565d9049db319b921f2467a2a8d/diego_ma}, interhash = {4cd9aee7a25e0efde51421d9fa21734e}, intrahash = {405df565d9049db319b921f2467a2a8d}, journal = {Markup Languages}, keywords = {inf_extraction XML}, number = 3, pages = {89-113}, timestamp = {2007-12-14T02:43:22.000+0100}, title = {{XML} Tools and Architecture for Named Entity Recognition}, url = {http://citeseer.ist.psu.edu/288299.html}, volume = 1, year = 1999 } @incollection{Lin:1996, added-at = {2007-12-14T02:42:29.000+0100}, address = {Los Altos, CA}, author = {Lin, Dekang}, biburl = {http://www.bibsonomy.org/bibtex/2139994ba19dfd8a24e4e88de592c7879/diego_ma}, booktitle = {Proceedings of the 6th {ARPA} Message Understanding Conference}, editor = {ARPA}, interhash = {a58170a6be5169a3e4f030f296b4a496}, intrahash = {139994ba19dfd8a24e4e88de592c7879}, keywords = {inf_extraction}, publisher = {Morgan Kaufmann}, timestamp = {2007-12-14T02:42:29.000+0100}, title = {University of {Manitoba}: description of the {PIE} system used for {MUC}-6}, year = 1996 } @misc{--98satisfying, added-at = {2007-12-14T02:41:52.000+0100}, author = {L. and Malouf, B. and Sag, I.}, biburl = {http://www.bibsonomy.org/bibtex/2aac6796bb2d0bf2ab5abc2d206770be4/diego_ma}, interhash = {7118abd947dd5544a81122c6978409d0}, intrahash = {aac6796bb2d0bf2ab5abc2d206770be4}, keywords = {inf_extraction}, timestamp = {2007-12-14T02:41:52.000+0100}, title = {Satisfying constraints on extraction and adjunction}, url = {citeseer.nj.nec.com/362807.html}, year = 1998 } @inproceedings{Kushmerick:1997, abstract = {Wrapper Induction for Information Extraction by Nicholas Kushmerick Chairperson of Supervisory Committee: Professor Daniel S. Weld Department of Computer Science and Engineering The Internet presents numerous sources of useful information---telephone directories, product catalogs, stock quotes, weather forecasts, etc. Recently, many systems have been built that automatically gather and manipulate such information on a user's behalf. However, these resources are usually formatted for use by people (e.g., the relevant content is embedded in HTML pages), so extracting their content is difficult. Wrappers are often used for this purpose. A wrapper is a procedure for extracting a particular ...}, added-at = {2007-12-14T02:41:50.000+0100}, author = {Kushmerick, Nicholas and Weld, Daniel S. and Doorenbos, Robert}, biburl = {http://www.bibsonomy.org/bibtex/2103a6927010dadecd61449ced6256a2d/diego_ma}, booktitle = {Proc. IJCAI-97}, interhash = {68cdf9d86fe93a7450d95f1403815d18}, intrahash = {103a6927010dadecd61449ced6256a2d}, keywords = {inf_extraction}, timestamp = {2007-12-14T02:41:50.000+0100}, title = {Wrapper Induction for Information Extraction}, url = {http://citeseer.nj.nec.com/kushmerick97wrapper.html}, year = 1997 } @inproceedings{Humphreys:1998, abstract = {The University of Sheffield NLP group took part in MUC-7 using the LaSIE-II system, an evolution of the LaSIE (Large Scale Information Extraction) system first created for participation in MUC-6 and part of a larger research effort into information extraction underway in our group. LaSIE-II was used to carry out all five of the MUC-7 tasks and was, in fact, the only system to take part in all of the MUC-7 tasks. While LaSIE-II is significantly different from the earlier version (differences are detailed below) there are no radical changes in the basic philosophy of the approach. This could be described as seeking a pragmatic middle way in the shallow vs deep analysis debate which has characterised the last several MUCs. That is, while aware that information extraction tasks may not require full text understanding, and hence that systems should be optimised to make use of shallow techniques where appropriate, we have not wanted to preclude the application of arbitrarily sophisticated linguistic analysis techniques where these may prove useful. The result is an eclectic mixture of techniques including finite state recognition of domain-specific lexical patterns, partial parsing using a restricted context-free grammar, simplified semantic representation of each sentence in the text and a formal representation of the whole discourse from which all of the IE task results and the coreference task results are derived. From our perspective, LaSIE-II should not be viewed as the expression of a theory about how to do IE, but as a laboratory in which ongoing experiments with different component NL processing techniques, and most importantly, their interaction are being carried out. Seen this way, one of the most important developments in LaSIE-II is its modularised architecture and integration into the GATE platform (see below) which has enabled us to gain much deeper insights into strengths and weaknesses of components of the system and the ways in which these interact.}, added-at = {2007-12-14T02:40:56.000+0100}, author = {Humphreys, Kevin and Gaizauskas, Rob and Huyck, C. and Mitchell, B. and Cunningham, Hamish and Wilks, Yorick}, biburl = {http://www.bibsonomy.org/bibtex/2631ac100ba28312f13bb12fb06efb7e1/diego_ma}, booktitle = {Proc. MUC-7}, interhash = {66b006c4b4644ec0a37393eeb4bdcc4c}, intrahash = {631ac100ba28312f13bb12fb06efb7e1}, keywords = {inf_extraction GATE}, note = {On-line proceedings, \myurl{http:\\www.muc.saic.com/}}, organization = {SAIC}, timestamp = {2007-12-14T02:40:56.000+0100}, title = {University of {S}heffield: Description of the {LaSIE-II} System and Used for {MUC-7}}, url = {http:\\www.muc.saic.com/}, year = 1998 } @inproceedings{Humphreys:2000, abstract = {Information extraction technology, as defined and developed through the U.S. DARPA Message Understanding Conferences (MUCs), has proved successful at extracting information primarily from newswire texts and primarily in domains concerned with human activity. In this paper we consider the application of this technology to the extraction of information from scientific journal papers in the area of molecular biology. In particular, we describe how an information extraction system designed to participate in the MUC exercises has been modified for two bioinformatics applications: EMPathIE, concerned with enzyme and metabolic pathways; and PASTA, concerned with protein structure. Progress to date provides convincing grounds for believing that IE techniques will deliver novel and effective ways for scientists to make use of the core literature which defines their disciplines.}, added-at = {2007-12-14T02:40:53.000+0100}, author = {Humphreys, Kevin and Demetriou, George and Gaizauskas, Robert}, biburl = {http://www.bibsonomy.org/bibtex/2f9d2c45fe747b42f22f5da88cf71fd41/diego_ma}, booktitle = {Proceedings of the Pacific Symposium on Biocomputing' 00 (PSB'00)}, interhash = {e43000b020376c4dc3129c5fe02b8654}, intrahash = {f9d2c45fe747b42f22f5da88cf71fd41}, keywords = {inf_extraction named_entities}, organization = {Honolulu, Hawaii}, pages = {502-513}, timestamp = {2007-12-14T02:40:53.000+0100}, title = {Two Applications of Information Extraction to Biological Science Journal Articles: Enzyme Interactions and Protein Structures}, url = {http://www.bionlp.org/psb2000/humphreys.pdf}, year = 2000 } @incollection{Hobbs:1996:2, added-at = {2007-12-14T02:40:39.000+0100}, address = {Cambridge, MA}, author = {Hobbs, Jerry R. and Appelt, Douglas E. and Bear, John and Israel, David and Kameyama, Megumi and Stickel, Mark and Tyson, Mabry}, biburl = {http://www.bibsonomy.org/bibtex/285029401b889776fead6d310eeb57c20/diego_ma}, booktitle = {Finite State Devices for Natural Language Processing}, editor = {Roche, E. and Schabes, Y.}, interhash = {8723bab8ef46eef8d8a15a380e9720ee}, intrahash = {85029401b889776fead6d310eeb57c20}, keywords = {inf_extraction}, publisher = {MIT Press}, timestamp = {2007-12-14T02:40:39.000+0100}, title = {{FASTUS}: A Cascaded Finite-state Transducer for Extracting Information from Natural-language Text}, year = 1996 } @techreport{Hobbs:1991, added-at = {2007-12-14T02:40:28.000+0100}, address = {Menlo Park, CA}, author = {Hobbs, Jerry and Appelt, Douglas E. and Bear, John S. and Tyson, Mabry and Magerman, David}, biburl = {http://www.bibsonomy.org/bibtex/22f6800fd78ebaf880c0b967f0c69c756/diego_ma}, institution = {AI Center, SRI International}, interhash = {77b194d89b7f7f74b234a61682824b96}, intrahash = {2f6800fd78ebaf880c0b967f0c69c756}, keywords = {inf_extraction}, timestamp = {2007-12-14T02:40:28.000+0100}, title = {The {TACITUS} system: The {MUC}-3 experience}, year = 1991 } @inproceedings{Harabagiu:2000, abstract = {In this paper we present a new method of automatic acquisition of linguistic patterns for Information Extraction, as implemented in the CICERO system. Our approach combines lexico-semantic information available from the WordNet database with collocating data extracted from training corpora. Due to the open-domain nature of the WordNet information and the immediate availability of large collections of texts, our method can be easily ported to open-domain Information Extraction.}, added-at = {2007-12-14T02:39:57.000+0100}, author = {Harabagiu, Sanda M. and Maiorano, Steven J.}, biburl = {http://www.bibsonomy.org/bibtex/28d0e1f520803bed0df16f9b2b7d4279f/diego_ma}, booktitle = {Proc. LREC-2000}, interhash = {b6e86701aa642b8a38417a69621199d8}, intrahash = {8d0e1f520803bed0df16f9b2b7d4279f}, keywords = {inf_extraction WordNet}, timestamp = {2007-12-14T02:39:57.000+0100}, title = {Acquisition of Linguistic Patterns for Knowledge-Based Information Extraction}, url = {http://www.seas.smu.edu/\~{}sanda/papers.html}, year = 2000 } @inproceedings{Estival:1997, added-at = {2007-12-14T02:38:38.000+0100}, address = {Gold Coast}, author = {Estival, Dominique}, biburl = {http://www.bibsonomy.org/bibtex/23835dcbe71fd4eaf8caa9bbd0a0f2928/diego_ma}, booktitle = {Proceedings of IPMM'97}, interhash = {0a89ae48c87efd8f20996c8ccc19ae06}, intrahash = {3835dcbe71fd4eaf8caa9bbd0a0f2928}, keywords = {inf_extraction}, month = {July}, timestamp = {2007-12-14T02:38:38.000+0100}, title = {Can Syntactic Processing Help Extracting Information from Texts?}, url = {http://www.arts.unimelb.edu.au/Dept/LALX/people/estival.html}, year = 1997 } @article{Cowie:1996, added-at = {2007-12-14T02:37:49.000+0100}, author = {Cowie, Jim and Lehnert, Wendy}, biburl = {http://www.bibsonomy.org/bibtex/2b12d7770e1cc93b15642225f050bf1db/diego_ma}, interhash = {9e94ed74de991c8a9acce1b63fddaedf}, intrahash = {b12d7770e1cc93b15642225f050bf1db}, journal = {Communications of the {ACM}}, keywords = {inf_extraction}, month = {January}, number = 1, pages = {80-91}, timestamp = {2007-12-14T02:37:49.000+0100}, title = {Information Extraction}, volume = 39, year = 1996 } @article{Brown:2001, abstract = {Speech is a tantalizing mode of human communication. On the one hand, humans understand speech with ease and use speech to express complex ideas, information, and knowledge. On the other hand, automatic speech recognition with computers is very hard, and extracting knowledge from speech is even harder. Nevertheless, the potential reward for solving this problem drives us to pursue it. Before we can exploit speech as a knowledge resource, however, we must understand the current state of the art in speech recognition and the relevant, successful applications of speech recognition in the related areas of multimedia indexing and search. In this paper we advocate the study of speech as a knowledge resource, provide a brief introduction to the state of the art in speech recognition, describe a number of systems that use speech recognition to enable multimedia analysis, indexing, and search, and present a number of exploratory applications of speech recognition that move toward the goal of exploiting speech as a knowledge resource...}, added-at = {2007-12-14T02:36:42.000+0100}, author = {Brown, E. W. and Srinivasan, S. and Coden, A. and Ponceleon, D. and Cooper, J.W. and Amir, A.}, biburl = {http://www.bibsonomy.org/bibtex/241937fd5a5b3382411f8f8292800cc20/diego_ma}, interhash = {35e91a26af4ffff6f819adb0948f43d0}, intrahash = {41937fd5a5b3382411f8f8292800cc20}, journal = {IBM Systems Journal}, keywords = {speech inf_retrieval inf_extraction}, month = Dec, number = 4, pages = {985-1001}, timestamp = {2007-12-14T02:36:42.000+0100}, title = {Towards Speech as a Knowledge Resource}, url = {http://www.findarticles.com/cf_dls/m0ISJ/4_40/82373866/p1/article.jhtml}, volume = 40, year = 2001 } @inproceedings{Bear:1997, added-at = {2007-12-14T02:35:54.000+0100}, author = {Bear, John and Israel, David and Petit, Jeff and Martin, David}, biburl = {http://www.bibsonomy.org/bibtex/2e2b31470e55a95619d8b9ccd1de86319/diego_ma}, booktitle = {Proceedings of the Sixth {Text} {REtrieval} {Conference}}, interhash = {5b5020190d397d7fad51e99101c6cd95}, intrahash = {e2b31470e55a95619d8b9ccd1de86319}, keywords = {inf_retrieval inf_extraction}, timestamp = {2007-12-14T02:35:54.000+0100}, title = {Using Information Extraction to Improve Document Retrieval}, url = {http://trec.nist.gov/pubs/trec6/t6\_proceedings.html}, year = 1997 } @article{vanBakel:1997, added-at = {2007-12-14T02:35:42.000+0100}, author = {van Bakel, Bas and Postma, Geert}, biburl = {http://www.bibsonomy.org/bibtex/2f031dd33d862ac55fa8df99022fc3c59/diego_ma}, interhash = {f45f528c59b3493f35b2006efabe7680}, intrahash = {f031dd33d862ac55fa8df99022fc3c59}, journal = {Trends in Analytical Chemistry}, keywords = {inf_extraction}, number = 63, timestamp = {2007-12-14T02:35:42.000+0100}, title = {Challenges for chemical information extraction and text retrieval}, url = {http://www.elsevier.nl:80/homepage/saa/trac/cheminfo.htm}, volume = 16, year = 1997 } @inproceedings{Appelt:1999, abstract = {This paper describes the application of the TextPro system to the task of recognition of named entities in speech. TextPro is a lightweight engine for interpreting cascaded finite-state transducers. Although originally intended for processing text, the experience of this evaluation demonstrates the system can easily be adapted to processing transcripts generated by a speech recognizer as well.}, added-at = {2007-12-14T02:35:35.000+0100}, author = {Appelt, Douglas E. and Martin, David}, biburl = {http://www.bibsonomy.org/bibtex/2a1cda268226866da44d71e407d59cd09/diego_ma}, booktitle = {Proc. DARPA Broadcast News Workshop}, interhash = {56f8ebc4d30e77de1f906879acd444a9}, intrahash = {a1cda268226866da44d71e407d59cd09}, keywords = {speech inf_extraction}, pages = {51-54}, timestamp = {2007-12-14T02:35:35.000+0100}, title = {Named Entity Recognition in Speech: Approach and Results Using the {TextPro} System}, url = {http://www.speech.sri.com/projects/sieve/publications.html}, year = 1999 } @inproceedings{Appelt:1993, added-at = {2007-12-14T02:35:34.000+0100}, address = {Los Altos, CA}, author = {Appelt, Douglas E. and Hobbs, Jerry R. and Bear, John and Israel, David and Kameyama, Megumi and Tyson, Mabry}, biburl = {http://www.bibsonomy.org/bibtex/254b3b65647010abdb5bd5f098ab5b491/diego_ma}, booktitle = {Proc. {MUC-5}}, interhash = {e5640c8d895a21a2504035a80682b1ef}, intrahash = {54b3b65647010abdb5bd5f098ab5b491}, keywords = {inf_extraction}, pages = {221-235}, publisher = {Morgan Kaufmann}, timestamp = {2007-12-14T02:35:34.000+0100}, title = {{SRI:} description of the {JV}-{FASTUS} system used for {MUC-5}}, year = 1993 }