@article{Nadeau:2007, added-at = {2009-07-06T10:21:03.000+0200}, author = {Nadeau, David and Sekine, Satoshi}, biburl = {http://www.bibsonomy.org/bibtex/23f48214f33ff712b7bcd070d6408fee5/diego_ma}, interhash = {2d9a1a5440885a8741a1686f344a9494}, intrahash = {3f48214f33ff712b7bcd070d6408fee5}, journal = {Journal of Linguisticae Investigationes}, keywords = {named_entities survey}, library = {Mine (July 2009)}, number = 1, pages = {1-20}, timestamp = {2009-07-06T10:21:03.000+0200}, title = {A Survey of Named Entity Recognition and Classification}, url = {http://nlp.cs.nyu.edu/sekine/papers/li07.pdf}, volume = 30, year = 2007 } @inproceedings{Molla:2006a, abstract = {Current text-based question answering (QA) systems usually contain a named entity recogniser (NER) as a core component. Named entity recognition as traditionally been developed as a component for information extraction systems, and current techniques are focused on this end use. However, no formal assessment has been done on the characteristics of a NER within the task of question answering. In this paper we present a NER that aims at higher recall by allowing multiple entity labels to strings. The NER is embedded in a question answering system and the overall QA system performance is compared to that of one with a traditional variation of the NER that only allows single entity labels. It is shown that the added noise produced introduced by the additional labels is offset by the higher recall gained, therefore enabling the QA system to have a better chance to find the answer.}, added-at = {2008-10-10T05:18:00.000+0200}, author = {Moll{\'a}, Diego and van Zaanen, Menno and Smith, Daniel}, biburl = {http://www.bibsonomy.org/bibtex/264e2ff61c23df0f5f02914ea091dc158/diego_ma}, booktitle = {Proceedings ALTW 2006}, interhash = {6759916d3748647a40af77f818a52130}, intrahash = {64e2ff61c23df0f5f02914ea091dc158}, keywords = {named_entities AnswerFinder molla_publication}, pages = {51-58}, timestamp = {2008-10-10T05:18:00.000+0200}, title = {Named Entity Recognition for Question Answering}, year = 2006 } @inproceedings{Moll'a:2007, abstract = {Question answering on speech transcripts (QAst) is a pilot track of the CLEF competition. In this paper we present our contribution to QAst, which is centred on a study of Named Entity (NE) recognition on speech transcripts, and how it impacts on the accuracy of the final question answering system. We have ported AFNER, the NE recogniser of the AnswerFinder question-answering project, to the set of answer types expected in the QAst track. AFNER uses a combination of regular expressions, lists of names (gazetteers) and machine learning to find NeWS in the data. The machine learning component was trained on a development set of the AMI corpus. In the process we identified various problems with scalability of the system and the existence of errors of the extracted annotation, which lead to relatively poor performance in general. Performance was yet comparable with state of the art, and the system was second (out of three participants) in one of the QAst subtasks.}, added-at = {2008-01-29T07:21:37.000+0100}, author = {Moll{\'a}, Diego and van Zaanen, Menno and Cassidy, Steve}, biburl = {http://www.bibsonomy.org/bibtex/2202b97875a0ca06dba67da3f7febfc86/diego_ma}, booktitle = {Proc. ALTW 2007}, editor = {Colineau, Nathalie and Dras, Mark}, interhash = {2606e7117807640622c8c6ff801b2cf2}, intrahash = {202b97875a0ca06dba67da3f7febfc86}, keywords = {AnswerFinder named_entities speech molla_publication}, pages = {57-65}, timestamp = {2008-01-29T07:21:37.000+0100}, title = {Named Entity Recognition in Question Answering of Speech Data}, url = {http://www.alta.asn.au/events/altw2007/cdrom/index.html}, volume = 5, year = 2007 } @inproceedings{Zhou:2002, abstract = {This paper proposes a Hidden Markov Model (HMM) and an HMM-based chunk tagger, from which a named entity (NE) recognition (NER) system is built to recognize and classify names, times and numerical quantities. Through the HMM, our system is able to apply and integrate four types of internal and external evidences: 1) simple deterministic internal feature of the words, such as capitalization and digitalization; 2) internal semantic feature of important triggers; 3) internal gazetteer feature; 4) external macro context feature. In this way, the NER problem can be resolved effectively. Evaluation of our system on MUC-6 and MUC-7 English NE tasks achieves F-measures of 96.6% and 94.1% respectively. It shows that the performance is significantly better than reported by any other machine-learning system. Moreover, the performance is even consistently better than those based on handcrafted rules.}, added-at = {2007-12-14T02:48:42.000+0100}, author = {Zhou, GuoDong and Su, Jian}, biburl = {http://www.bibsonomy.org/bibtex/2753c74267baabe14fe6d420b78043998/diego_ma}, booktitle = {Proc. 40th Annual Meeting of the Association for Computational Linguistics (ACL 2002)}, interhash = {9c075941b088aa183059d9db2a6746c9}, intrahash = {753c74267baabe14fe6d420b78043998}, keywords = {named_entities machine_learning}, timestamp = {2007-12-14T02:48:42.000+0100}, title = {Named Entity Recognition using an {HMM}-based Chunk Tagger}, year = 2002 } @inproceedings{Surdeanu:2005, abstract = {This paper presents an analysis of named entity recognition and classification in spontaneous speech transcripts. We annotated a significant fraction of the Switchboard corpus with six named entity classes and investigated a battery of machine learning models that include lexical, syntactic, and semantic attributes. The best recognition and classification model obtains promising results, approaching within 5% a system evaluated on clean textual data.}, added-at = {2007-12-14T02:47:08.000+0100}, address = {Lisbon}, author = {Surdeanu, Mihai and Turmo, Jordi and Comelles, Eli}, biburl = {http://www.bibsonomy.org/bibtex/22b0790ca9fa1c810363f5df441fc1254/diego_ma}, booktitle = {Proceedings Interspeech-05}, interhash = {5c1e7cca18b3a3a40a8456f8bbf44519}, intrahash = {2b0790ca9fa1c810363f5df441fc1254}, keywords = {named_entities speech}, timestamp = {2007-12-14T02:47:08.000+0100}, title = {Named Entity Recognition from Spontaneous Open-Domain Speech}, url = {http://www.lsi.upc.edu/~comelles/}, year = 2005 } @inproceedings{Noguera:2005, abstract = {In a previous paper we proved that Named Entity Recognition plays an important role to improve Question Answering by both increasing the quality of the data and by reducing its quantity. Here we present a more in-depth discussion, studying several ways in which NER can be applied in order to produce a maximum data reduction. We achieve a 60% reduction without significant data loss and a 92.5% with a reasonable implication in data quality.}, added-at = {2007-12-14T02:44:32.000+0100}, author = {Noguera, Elisa and Toral, Antonio and Llopis, Fernando and Mu{\~n}oz, Rafael}, biburl = {http://www.bibsonomy.org/bibtex/28a56675983754175b10280cc944d8e92/diego_ma}, booktitle = {Proceedings of the 8th International Conference on Text, Speech \& Dialogue}, interhash = {cc98195db681ce8d82bb6d6572e77618}, intrahash = {8a56675983754175b10280cc944d8e92}, keywords = {question_answering named_entities}, pages = {428-434}, timestamp = {2007-12-14T02:44:32.000+0100}, title = {Reducing Question Answering Input Data Using Named Entity Recognition}, url = {http://dx.doi.org/10.1007/11551874_55}, year = 2005 } @article{Niu:2004, abstract = {One challenge in text processing is the treatment of case insensitive documents such as speech recognition results. The traditional approach is to re-train a language model excluding case-related features. This paper presents an alternative two-step approach whereby a preprocessing module (Step 1) is designed to restore case-sensitive form which is subsequently processed by the original system (Step 2). Step 1 is mainly implemented as a Hidden Markov Model trained on a large raw corpus of case sensitive documents. It is demonstrated that this approach (i) outperforms the feature exclusion approach for named entity tagging, (ii) leads to limited degradation for parsing, relationship extraction and case insensitive question answering, (iii) reduces system complexity, and (iv) has wide applicability: the restored text can be used in both statistical model and rule-based systems.}, added-at = {2007-12-14T02:44:29.000+0100}, author = {Niu, Cheng and Li, Wei and Jihong and Shrihari, Rohini}, biburl = {http://www.bibsonomy.org/bibtex/222f9837dac4a70418136ddfff25b241d/diego_ma}, interhash = {88217469f7bff9d742fcbcf5d3309b1d}, intrahash = {22f9837dac4a70418136ddfff25b241d}, journal = {International Journal on Artificial Intelligence Tools}, keywords = {named_entities question_answering speech}, number = 1, pages = {141-156}, timestamp = {2007-12-14T02:44:29.000+0100}, title = {Orthographic Case Restoration Using Supervised Learning without Manual Annotation}, url = {http://homepage.mac.com/liwei999/WeiLi/Publications.html}, volume = 13, year = 2004 } @inproceedings{Mikheev:1999:2, added-at = {2007-12-14T02:43:23.000+0100}, author = {Mikheev, Andrei and Moens, Marc and Grover, Claire}, biburl = {http://www.bibsonomy.org/bibtex/2cf5005cce6fad851c7227451f85c4fcb/diego_ma}, booktitle = {Proceedings of the Association for Computational Linguistics}, interhash = {32311ef1b26fe7bf6c6e63cfd3da73bf}, intrahash = {cf5005cce6fad851c7227451f85c4fcb}, keywords = {named_entities}, pages = {1--8}, publisher = {Association for Computational Linguistics}, timestamp = {2007-12-14T02:43:23.000+0100}, title = {Named Entity recognition without gazetteers}, year = 1999 } @inproceedings{Humphreys:2000, abstract = {Information extraction technology, as defined and developed through the U.S. DARPA Message Understanding Conferences (MUCs), has proved successful at extracting information primarily from newswire texts and primarily in domains concerned with human activity. In this paper we consider the application of this technology to the extraction of information from scientific journal papers in the area of molecular biology. In particular, we describe how an information extraction system designed to participate in the MUC exercises has been modified for two bioinformatics applications: EMPathIE, concerned with enzyme and metabolic pathways; and PASTA, concerned with protein structure. Progress to date provides convincing grounds for believing that IE techniques will deliver novel and effective ways for scientists to make use of the core literature which defines their disciplines.}, added-at = {2007-12-14T02:40:53.000+0100}, author = {Humphreys, Kevin and Demetriou, George and Gaizauskas, Robert}, biburl = {http://www.bibsonomy.org/bibtex/2f9d2c45fe747b42f22f5da88cf71fd41/diego_ma}, booktitle = {Proceedings of the Pacific Symposium on Biocomputing' 00 (PSB'00)}, interhash = {e43000b020376c4dc3129c5fe02b8654}, intrahash = {f9d2c45fe747b42f22f5da88cf71fd41}, keywords = {inf_extraction named_entities}, organization = {Honolulu, Hawaii}, pages = {502-513}, timestamp = {2007-12-14T02:40:53.000+0100}, title = {Two Applications of Information Extraction to Biological Science Journal Articles: Enzyme Interactions and Protein Structures}, url = {http://www.bionlp.org/psb2000/humphreys.pdf}, year = 2000 } @inproceedings{Gallippi:1996, abstract = {The development of natural language proccessing (NLP) systems that perform machine translation (MT) and information retrieval (IR) has highlighted the need for the automatic recognition of proper names. While various name recognizers have been developed, they suffer from being too limited; some only recognize one name class, and all are language specific. This work develops an approach to multilingual name recognition that allows a system optimized for one language to be ported to another with little additional effort and resources. An initial core set of linguistic features, useful for name recognition in most languages, is identified. When porting to a new language, these features need to be converted (partly by hand, partly by on-line lists), after which point machine learning (ML) techniques build decision trees that map features to name classes. A system initially optimized for English has been successfully ported to Spanish and Japanese. Only a few days of human effort for each new language results in performance levels comparable to that of the best current English systems.}, added-at = {2007-12-14T02:39:12.000+0100}, author = {Gallippi, Anthony F.}, biburl = {http://www.bibsonomy.org/bibtex/2fe587667402ffc32b6d0d635db627a7d/diego_ma}, booktitle = {Proc. COLING 1996}, interhash = {cad65f94e1e4629c51d9f357c2cc18bd}, intrahash = {fe587667402ffc32b6d0d635db627a7d}, keywords = {named_entities}, pages = {424-429}, timestamp = {2007-12-14T02:39:12.000+0100}, title = {Learning to Recognize Names Across Languages}, url = {http://acl.ldc.upenn.edu/C/C96/C96-1072.pdf}, year = 1996 } @inproceedings{Fleischman:2002, abstract = {While Named Entity extraction is useful in many natural language applications, the coarse categories that most NE extractors work with prove insufficient for complex applications such as Question Answering and Ontology generation. We examine one coarse category of named entities, persons, and describe a method for automatically classifying person instances into eight finergrained subcategories. We present a supervised learning method that considers the local context surrounding the entity as well as more global semantic information derived from topic signatures and WordNet. We reinforce this method with an algorithm that takes advantage of the presence of entities in multiple contexts.}, added-at = {2007-12-14T02:38:51.000+0100}, address = {Taipei, Taiwan}, author = {Fleischman, Michael and Hovy, Eduard}, biburl = {http://www.bibsonomy.org/bibtex/2a97c64b39d5d713400082f785d734d5b/diego_ma}, booktitle = {Proceedings of Coling 2002}, interhash = {7ad1118a2b8da63ff1089efa456cbd14}, intrahash = {a97c64b39d5d713400082f785d734d5b}, keywords = {machine_learning named_entities}, timestamp = {2007-12-14T02:38:51.000+0100}, title = {Fine Grained Classification of Named Entities}, url = {http://www.mit.edu/~mbf/COLING_02.pdf}, year = 2002 } @inproceedings{Chieu:2002, abstract = {This paper presents a maximum entropy-based named entity recognizer (NER). It differs from previous machine learning-based NERs in that it uses information from the whole document to classify each word, with just one classifier. Previous work that involves the gathering of information from the whole document often uses a secondary classifier, which corrects the mistakes of a primary sentencebased classifier. In this paper, we show that the maximum entropy framework is able to make use of global information directly, and achieves performance that is comparable to the best previous machine learning-based NERs on MUC-6 and MUC-7 test data.}, added-at = {2007-12-14T02:37:20.000+0100}, author = {Chieu, Haoi Leong and Ng, Hwee Tou}, biburl = {http://www.bibsonomy.org/bibtex/2d4f233fd44172dfcf0d98dd03bc781e1/diego_ma}, booktitle = {Proceedings COLING 2002}, interhash = {3cf03ab32dea86078e392abb70b62066}, intrahash = {d4f233fd44172dfcf0d98dd03bc781e1}, keywords = {named_entities machine_learning}, timestamp = {2007-12-14T02:37:20.000+0100}, title = {Named Entity Recognition: A Maximum Entropy Approach Using Global Information}, year = 2002 } @inproceedings{Armour:2005, abstract = {Named entities are typically associated with names of people, places and organizations and constitute a group of textual elements present in almost any type of document. The general techniques used to extract them and their variable-length property also makes them an attractive type of attribute to study in text classification. In this paper, several data sets are characterized as being either dependent or independent of named entities with a Naive Bayes based ranking technique. Using this characterization, results are presented which find named entities to be in fact useful in classification tasks, and that accuracy can be improved by considering them as a special type of attribute. Namely, the inclusion of regular terms, named entity representation and the frequency with which a classifier is retrained all have an impact on the classification of documents where named entities are important.}, added-at = {2007-12-14T02:35:36.000+0100}, address = {Gatineau, Canada}, author = {Armour, Quintin and Japkowicz, Nathalie and Matwin, Stan}, biburl = {http://www.bibsonomy.org/bibtex/2ce8cc5153963e37fc34d9b71c2bbcc93/diego_ma}, booktitle = {Proceedings CLiNE 2005}, interhash = {f1f8e4bfc10afd7c14acee90b305a590}, intrahash = {ce8cc5153963e37fc34d9b71c2bbcc93}, keywords = {named_entities text_categorisation}, timestamp = {2007-12-14T02:35:36.000+0100}, title = {The Role of Named Entities in Text Classification}, url = {http://www.crtl.ca/cline05/cline05_papers/ArmourJapkowiczMatwin.pdf}, year = 2005 }