@inproceedings{Yang:1997, added-at = {2009-11-12T21:36:09.000+0100}, address = {Nashville, US}, author = {Yang, Yiming and Pedersen, Jan O.}, biburl = {http://www.bibsonomy.org/bibtex/261e9c9275679c8c5021603eb6920d033/diego_ma}, booktitle = {Proceedings of {ICML}-97, 14th International Conference on Machine Learning}, editor = {Fisher, Douglas H.}, interhash = {016cecde345f4e36cfcac1acf6552a65}, intrahash = {61e9c9275679c8c5021603eb6920d033}, keywords = {classification machine_learning feature_selection}, pages = {412--420}, publisher = {Morgan Kaufmann Publishers, San Francisco, US}, timestamp = {2009-11-12T21:36:09.000+0100}, title = {A comparative study on feature selection in text categorization}, url = {citeseer.ist.psu.edu/yang97comparative.html}, year = 1997 } @incollection{Shen:2005, abstract = {In this paper, we explore the syntactic relation patterns for open domain factoid question answering. We propose a pattern extraction method to extract the various relations between the proper answers and different types of question words, including target words, head words, subject words and verbs, from syntactic trees. We further propose a QA-specific tree kernel to partially match the syntactic relation patterns. It makes the more tolerant matching between two patterns and helps to solve the data sparseness problem. Lastly, we incorporate the patterns into a Maximum Entropy Model to rank the answer candidates. The experiment on TREC questions shows that the syntactic relation patterns help to improve the performance by 6.91 MRR based on the common features.}, added-at = {2009-11-11T22:33:15.000+0100}, author = {Shen, Dan and Kruijff, Geert-Jan M. and Klakow, Dietrich}, biburl = {http://www.bibsonomy.org/bibtex/2a36b72928b449ac746792a05d0ec2d7c/diego_ma}, booktitle = {Natural Language Processing ? IJCNLP 2005: Second International Joint Conference, Jeju Island, Korea, October 11-13, 2005. Proceedings.}, editor = {Dale, Robert and Wong, Kam-Fai and Su, Jian and Kwong, Oi Yee}, interhash = {cb28273b2a1837858d716919cca0c5ed}, intrahash = {a36b72928b449ac746792a05d0ec2d7c}, keywords = {question_answering machine_learning}, publisher = {Springer-Verlag}, timestamp = {2009-11-11T22:33:15.000+0100}, title = {Exploring Syntactic Relation Patterns for Question Answering}, url = {http://www.aclweb.org/anthology-new/I/I05/I05-1045.pdf}, year = 2005 } @inproceedings{Shen:2006, abstract = {In this paper, we explore correlation of dependency relation paths to rank candidate answers in answer extraction. Using the correlation measure, we compare dependency relations of a candidate answer and mapped question phrases in sentence with the corresponding relations in question. Different from previous studies, we propose an approximate phrase mapping algorithm and incorporate the mapping score into the correlation measure. The correlations are further incorporated into a Maximum Entropy-based ranking model which estimates path weights from training. Experimental results show that our method significantly outperforms state-ofthe-art syntactic relation-based methods by up to 20% in MRR.}, added-at = {2009-11-11T22:33:09.000+0100}, address = {Sydney}, author = {Shen, Dan and Klakow, Dietrich}, biburl = {http://www.bibsonomy.org/bibtex/2b385d2d62a1cec0bcaa6f01019112f65/diego_ma}, booktitle = {Proceedings COLING/ACL 2006}, interhash = {6966be5911653238e919c7e841c639d8}, intrahash = {b385d2d62a1cec0bcaa6f01019112f65}, keywords = {question_answering machine_learning dependencies DG}, pages = {889-896}, timestamp = {2009-11-11T22:33:09.000+0100}, title = {Exploring Correlation of Dependency Relation Paths for Answer Extraction}, url = {http://acl.ldc.upenn.edu/P/P06/P06-1112.pdf}, year = 2006 } @article{Li:2005, added-at = {2009-09-15T17:44:45.000+0200}, author = {Li, Xin and Roth, Dan}, biburl = {http://www.bibsonomy.org/bibtex/25bd3953af92ba73c62bf74126011d01b/diego_ma}, interhash = {e62cbfe2badff0e80a6817dae3479f70}, intrahash = {5bd3953af92ba73c62bf74126011d01b}, journal = {Journal of Natural Language Engineering}, keywords = {question_classification machine_learning semantic_information}, number = 3, pages = {229-249}, timestamp = {2009-09-15T17:44:45.000+0200}, title = {Learning Question Classifiers: The Role of Semantic Information}, url = {http://l2r.cs.uiuc.edu/~danr/Papers/LiRo05a.pdf}, volume = 12, year = 2006 } @book{Manning:2008, abstract = {TOC 1 - Boolean Retrieval 2 - The term vocabulary and posting lists 3 - Dictionaries and tolerant retrieval 4 - Index construction 5 - Index compression 6 - Scoring, term weighting and the vector space model 7 - Computing scores in a complete search system 8 - Evaluation in information retrieval 9 - Relevance feedback and query expansion 10 - XML retrieval 11 - Probabilistic information retrieval 12 - Language models for information retrieval 13 - Text classification and Naive Bayes 14 - Vector space classification 15 - Support vector machines and machine learning on documents 16 - Flat clustering 17 - Hierarchical clustering 18 - Matrix decompositions and latent semantic indexing 19 - Web search basics 20 - Web crawling and indexes 21 - Link analysis}, added-at = {2009-04-20T05:42:23.000+0200}, address = {New York}, author = {Manning, Christopher D. and Raghavan, Prabhakar and Sch{\"u}tze, Hinrich}, biburl = {http://www.bibsonomy.org/bibtex/28516d94c1f7aa1e391ddd3ace4caa23b/diego_ma}, interhash = {b6954037b1d444f4afe4cad883b4d80c}, intrahash = {8516d94c1f7aa1e391ddd3ace4caa23b}, keywords = {search machine_learning text_categorisation inf_retr web_search clustering}, library = {Mine (April 2009)}, publisher = {Cambridge University Press}, timestamp = {2009-04-20T05:42:23.000+0200}, title = {Introduction to Information Retrieval}, year = 2008 } @book{Chakrabarti:2003, abstract = {TOC: 1 - Introduction 2 - Crawling the Web 3 - Web search and information retrieval 4 - Similarity and clustering 5 - Supervised learning 6 - Semisupervised learning 7 - Social network analysis 8 - Resource discovery 9 - The future of web mining}, added-at = {2009-04-20T05:34:59.000+0200}, address = {Amsterdam}, author = {Chakrabarti, Soumen}, biburl = {http://www.bibsonomy.org/bibtex/2722a822b6752526ee0e05c369f3cd6d4/diego_ma}, interhash = {88bbf9b6bc5b9bd829085ae4bfa03a22}, intrahash = {722a822b6752526ee0e05c369f3cd6d4}, keywords = {search machine_learning similarity text_mining nlp inf_retr link_analysis clustering}, library = {Mine (April 2009)}, publisher = {Morgan Kaufmann}, timestamp = {2009-04-20T05:34:59.000+0200}, title = {Mining the Web: Discovering Knowledge from Hypertext Data}, year = 2003 } @inproceedings{Pizzato:2004, abstract = {This paper presents an approach for question analysis that defines the question subject and its required answer type by building a trie-based structure from a set of question patterns. The question analysis consists of comparing the question tokens with the path of nodes in the trie. A look-ahead process solve the mismatches of unknown words by assigning a entity-type or semantically linking them with other question words. The developed approach is evaluated using different datasets showing that its performance is comparable with state-of-the-art systems.}, added-at = {2008-03-04T07:47:30.000+0100}, address = {Sydney, Australia}, author = {Pizzato, Luiz A.S.}, biburl = {http://www.bibsonomy.org/bibtex/23cb7b8d122f144ccf7957fac478a3ed2/diego_ma}, booktitle = {Proc. ALTW 2004}, editor = {Asudeh, Ash and Paris, C{\'e}cile and Wan, Stephen}, interhash = {318e434ce2c0d97480e4d2c1a9457ca2}, intrahash = {3cb7b8d122f144ccf7957fac478a3ed2}, keywords = {questions machine_learning}, organization = {Macquarie University}, pages = {25-31}, publisher = {ASSTA}, timestamp = {2008-03-04T07:47:30.000+0100}, title = {Using a Trie-based Structure for Question Analysis}, url = {http://www.ics.mq.edu.au/~pizzato/Papers/index.html}, year = 2004 } @phdthesis{vanZaanen:2002, added-at = {2008-01-29T08:06:06.000+0100}, address = {Leeds, UK}, author = {van Zaanen, Menno}, biburl = {http://www.bibsonomy.org/bibtex/238e2b0e94fe8709dae88304f30873865/diego_ma}, interhash = {f7137c162e15861f7f4e091bfae2816f}, intrahash = {38e2b0e94fe8709dae88304f30873865}, keywords = {machine_learning}, school = {University of Leeds}, timestamp = {2008-01-29T08:06:06.000+0100}, title = {Bootstrapping Structure into Language: {A}lignment-{B}ased {L}earning}, url = {http://www.ics.mq.edu.au/\~{}menno/docs/t_leeds.pdf}, year = 2002 } @incollection{Zaanen:2005, abstract = {In this article we will introduce a new approach (and several implementations) to the task of sentence classification, where pre-defined classes are assigned to sentences. This approach concentrates on structural information that is present in the sentences. This information is extracted using machine learning techniques and the patterns found are used to classify the sentences. The approach fits in between the existing machine learning and hand-crafting of regular expressions approaches, and it combines the best of both. The sequential information present in the sentences is used directly, classifiers can be generated automatically and the output and intermediate representations can be investigated and manually optimised if needed.}, added-at = {2008-01-29T08:01:43.000+0100}, address = {Heidelberg, Germany}, author = {van Zaanen, Menno and Pizzato, Luiz Augusto and Moll\'{a}, Diego}, biburl = {http://www.bibsonomy.org/bibtex/249b95e3eb33c35b0e237cc9fcb3962ba/diego_ma}, booktitle = {String Processing and Information Retrieval: 12th International Conference, SPIRE 2005.}, editor = {Consens, Mariano and Navarro, Gonzalo}, interhash = {fc20ac0a4dd9ceba412dab0137f1d8ac}, intrahash = {49b95e3eb33c35b0e237cc9fcb3962ba}, keywords = {question_answering machine_learning molla_publication}, pages = {139-150}, publisher = {Springer-Verlag}, timestamp = {2008-01-29T08:01:43.000+0100}, title = {Classifying Sentences Using Induced Structure}, year = 2005 } @inproceedings{vanZaanen:2005:2, added-at = {2008-01-29T08:01:27.000+0100}, address = {Buenos Aires}, author = {van Zaanen, Menno and Pizzato, Luiz A.S. and Moll{\'a}, Diego}, biburl = {http://www.bibsonomy.org/bibtex/233a161454c4867f97377379545d4ad3d/diego_ma}, booktitle = {Proc. Twelfth Edition of the Symposium on String Processing and Information Retrieval (SPIRE2005)}, interhash = {fc20ac0a4dd9ceba412dab0137f1d8ac}, intrahash = {33a161454c4867f97377379545d4ad3d}, keywords = {machine_learning question_answering molla_publication}, timestamp = {2008-01-29T08:01:27.000+0100}, title = {Classifying Sentences Using Induced Structure}, year = 2005 } @inproceedings{Zhou:2002, abstract = {This paper proposes a Hidden Markov Model (HMM) and an HMM-based chunk tagger, from which a named entity (NE) recognition (NER) system is built to recognize and classify names, times and numerical quantities. Through the HMM, our system is able to apply and integrate four types of internal and external evidences: 1) simple deterministic internal feature of the words, such as capitalization and digitalization; 2) internal semantic feature of important triggers; 3) internal gazetteer feature; 4) external macro context feature. In this way, the NER problem can be resolved effectively. Evaluation of our system on MUC-6 and MUC-7 English NE tasks achieves F-measures of 96.6% and 94.1% respectively. It shows that the performance is significantly better than reported by any other machine-learning system. Moreover, the performance is even consistently better than those based on handcrafted rules.}, added-at = {2007-12-14T02:48:42.000+0100}, author = {Zhou, GuoDong and Su, Jian}, biburl = {http://www.bibsonomy.org/bibtex/2753c74267baabe14fe6d420b78043998/diego_ma}, booktitle = {Proc. 40th Annual Meeting of the Association for Computational Linguistics (ACL 2002)}, interhash = {9c075941b088aa183059d9db2a6746c9}, intrahash = {753c74267baabe14fe6d420b78043998}, keywords = {named_entities machine_learning}, timestamp = {2007-12-14T02:48:42.000+0100}, title = {Named Entity Recognition using an {HMM}-based Chunk Tagger}, year = 2002 } @inproceedings{vanZaanen:2000:2, added-at = {2007-12-14T02:48:33.000+0100}, author = {van Zaanen, Menno}, biburl = {http://www.bibsonomy.org/bibtex/27f0138f56192ab8b6ee9a58bc8ccd636/diego_ma}, booktitle = {COLING 2000 - Proceedings of the 18th International Conference on Computational Linguistics}, interhash = {58ae93cf3db3cf692410f8e527361f8f}, intrahash = {7f0138f56192ab8b6ee9a58bc8ccd636}, keywords = {machine_learning grammar}, pages = {961--967}, timestamp = {2007-12-14T02:48:33.000+0100}, title = {{ABL}: Alignment-Based Learning}, url = {http://www.ics.mq.edu.au/\~{}menno/docs/p_coling00.pdf}, year = 2000 } @inproceedings{vanZaanen:2000, added-at = {2007-12-14T02:48:32.000+0100}, author = {van Zaanen, Menno}, biburl = {http://www.bibsonomy.org/bibtex/2cb4a3198fa15460df762c602a2fc24fa/diego_ma}, booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning}, editor = {Langley, Pat}, interhash = {0904bda139eb443b11e4a1147db54e5f}, intrahash = {cb4a3198fa15460df762c602a2fc24fa}, keywords = {machine_learning grammar}, month = {#jul#}, organization = {Stanford University}, pages = {1063--1070}, publisher = {Morgan Kaufmann Publishers}, timestamp = {2007-12-14T02:48:32.000+0100}, title = {Bootstrapping Syntax and Recursion using Alignment-Based Learning}, url = {http://www.ics.mq.edu.au/\~{}menno/docs/p_icml00.pdf}, year = 2000 } @inproceedings{Ravichandran:2002, abstract = {In this paper we explore the power of surface text patterns for open-domain question answering systems. In order to obtain an optimal set of patterns, we have developed a method for learning such patterns automatically. A tagged corpus is built from the Internet in a bootstrapping process by providing a few hand-crafted examples of each question type to Altavista. Patterns are then automatically extracted from the returned documents and standardized. We calculate the precision of each pattern, and the average precision for each question type. These patterns are then applied to find answers to new questions. Using the TREC-10 question set, we report results for two cases: answers determined from the TREC-10 corpus and from the web.}, added-at = {2007-12-14T02:45:31.000+0100}, author = {Ravichandran, Deepak and Hovy, Eduard}, biburl = {http://www.bibsonomy.org/bibtex/2931e40b89a930ecb93229555ebf62a8c/diego_ma}, booktitle = {Proc. ACL2002}, interhash = {e523f02cfc750ce0140e222fe006aaa8}, intrahash = {931e40b89a930ecb93229555ebf62a8c}, keywords = {question_answering machine_learning}, timestamp = {2007-12-14T02:45:31.000+0100}, title = {Learning Surface Text Patterns for a Question Answering System}, url = {http://acl.ldc.upenn.edu/P/P02/P02-1006.pdf}, year = 2002 } @article{Quinlan:1990, added-at = {2007-12-14T02:45:27.000+0100}, author = {Quinlan, J. Ross}, biburl = {http://www.bibsonomy.org/bibtex/2df8b6d0b3bcaa21e52b3d7156013d22c/diego_ma}, interhash = {9b34c8009c53a28d14c69e0e42d60f91}, intrahash = {df8b6d0b3bcaa21e52b3d7156013d22c}, journal = {Machine Learning}, keywords = {machine_learning}, number = 3, timestamp = {2007-12-14T02:45:27.000+0100}, title = {Learning Logical Definitions from Examples}, volume = 5, year = 1990 } @inproceedings{Plas:2005, abstract = {We present an experiment for finding semantically similar words on the basis of a parsed corpus of Dutch text and show that the acquired information correlates with relations found in Dutch EuroWordNet. Next, we demonstrate how the acquired knowledge can be used to boost the performance of an open-domain question answering system for Dutch. Automatically acquired lexico-semantic information is used to improve the recall of a method for extracting function relations (such as Wim Kok is the prime minister of the Netherlands) from corpora, and to improve the precision of our QA system on general WH-questions and definition questions.}, added-at = {2007-12-14T02:45:05.000+0100}, author = {van der Plas, Lonneke and Bouma, Gosse}, biburl = {http://www.bibsonomy.org/bibtex/2f43ae7517a973e4fcf0ca0dab65248a2/diego_ma}, booktitle = {Proceedings Ontolex 2005}, interhash = {745bad9e9c5c59cac655d4e7334cbedf}, intrahash = {f43ae7517a973e4fcf0ca0dab65248a2}, keywords = {question_answering machine_learning}, pages = {9 pages}, timestamp = {2007-12-14T02:45:05.000+0100}, title = {Automatic Acquisition of Lexico-semantic Knowledge for QA}, url = {http://odur.let.rug.nl/~gosse/Imix/}, year = 2005 } @inproceedings{Pizzato:2004, abstract = {This paper presents an approach for question analysis that defines the question subject and its required answer type by building a trie-based structure from a set of question patterns. The question analysis consists of comparing the question tokens with the path of nodes in the trie. A look-ahead process solve the mismatches of unknown words by assigning a entity-type or semantically linking them with other question words. The developed approach is evaluated using different datasets showing that its performance is comparable with state-of-the-art systems.}, added-at = {2007-12-14T02:45:02.000+0100}, address = {Sydney, Australia}, author = {Pizzato, Luiz A.S.}, biburl = {http://www.bibsonomy.org/bibtex/2e620a713b0d1f6cb56a4333e8d8c6cf4/diego_ma}, booktitle = {Proc. ALTW 2004}, editor = {Asudeh, Ash and Paris, C?cile and Wan, Stephen}, interhash = {318e434ce2c0d97480e4d2c1a9457ca2}, intrahash = {e620a713b0d1f6cb56a4333e8d8c6cf4}, keywords = {questions machine_learning}, organization = {Macquarie University}, pages = {25-31}, publisher = {ASSTA}, timestamp = {2007-12-14T02:45:02.000+0100}, title = {Using a Trie-based Structure for Question Analysis}, url = {http://www.ics.mq.edu.au/~pizzato/Papers/index.html}, year = 2004 } @incollection{Pereira:1999, abstract = {We describe, analyze, and evaluate experimentally a new probabilistic model for word-sequence prediction in natural language based on prediction suffix trees (PSTs). ...}, added-at = {2007-12-14T02:44:56.000+0100}, author = {Pereira, Fernando and Singer, Yoram and Tishby, Naftali}, biburl = {http://www.bibsonomy.org/bibtex/2e5f9422854abecc6ca1b2727985caaf7/diego_ma}, booktitle = {Natural Language Processing Using Very Large Corpora}, editor = {Armstrong, Susan and Church, Kenneth and Isabelle, Pierre and Manzi, Sandra and Tzoukermann, Evelyne and Yarowsky, David}, interhash = {58536b69bdb6dba6c68f66185cbe724b}, intrahash = {e5f9422854abecc6ca1b2727985caaf7}, keywords = {machine_learning}, pages = {121-136}, publisher = {Kluwer}, series = {Text, Speech and Language Technology}, timestamp = {2007-12-14T02:44:56.000+0100}, title = {Beyond Word N-Grams}, volume = 11, year = 1999 } @book{Pearl:1992, added-at = {2007-12-14T02:44:55.000+0100}, author = {Pearl, J.}, biburl = {http://www.bibsonomy.org/bibtex/2cec6bc800af4e9678663237f88d3f02a/diego_ma}, interhash = {d443aff06e63511a48cf18613b870a1c}, intrahash = {cec6bc800af4e9678663237f88d3f02a}, keywords = {machine_learning}, timestamp = {2007-12-14T02:44:55.000+0100}, title = {Probabilistic Reasoning in Intelligent Systems: Networks of Plausible Inference}, year = 1992 } @incollection{Muggleton:1992, added-at = {2007-12-14T02:44:19.000+0100}, author = {Muggleton, S. and Feng, C.}, biburl = {http://www.bibsonomy.org/bibtex/20a07f14d220a8a7646087114c0f49958/diego_ma}, booktitle = {Inductive Logic Programming}, interhash = {45f3c948ff873d78347986e852ef6349}, intrahash = {0a07f14d220a8a7646087114c0f49958}, keywords = {machine_learning}, publisher = {Academic Press}, timestamp = {2007-12-14T02:44:19.000+0100}, title = {Efficient Induction of Logic Programs}, year = 1992 }