@inproceedings{citeulike:1711972,
title = {A probabilistic analysis of the Rocchio algorithm with {TFIDF} for text categorization},
address = {Nashville, US},
author = {Thorsten Joachims},
booktitle = {Proceedings of ICML-97, 14th International Conference on Machine Learning},
editor = {Douglas H. Fisher},
pages = {143--151},
publisher = {Morgan Kaufmann Publishers, San Francisco, US},
year = 1997,
url = {http://citeseer.ist.psu.edu/54920.html},
id = {1711972},
priority = {0},
at = {2007-09-30 19:02:18},
abstract = {The Rocchio relevance feedback algorithm is one of the most popular and widely applied learning methods from information retrieval. Here, a probabilistic analysis of this algorithm is presented in a text categorization framework. The analysis gives theoretical insight into the heuristics used in the Rocchio algorithm, particularly the word weighting scheme and the similarity metric. It also suggests improvements which lead to a probabilistic variant of the Rocchio classifier. The Rocchio...},
biburl = {http://www.bibsonomy.org/bibtex/2a57078d6bd4695f6831bedcf09b5ed89/pprett},
keywords = {text relevance, machine, categorization, feedback, rocchio, learning,}
}
@inproceedings{citeulike:1711976,
title = {A Machine Learning Architecture for Optimizing Web Search Engines},
author = {J. Boyan and D. Freitag and T. Joachims},
booktitle = {Proceedings of the AAAI Workshop on Internet-Based Information Systems},
year = 1996,
url = {http://citeseer.ist.psu.edu/boyan96machine.html},
id = {1711976},
priority = {2},
at = {2007-09-30 19:03:22},
abstract = {Indexing systems for the World Wide Web, such as Lycos and Alta Vista, play an essential role in making the Web useful and usable. These systems are based on Information Retrieval methods for indexing plain text documents, but also include heuristics for adjusting their document rankings based on the special HTML structure of Web documents. In this paper, we describe a wide range of such heuristics---including a novel one inspired by reinforcement learning techniques for propagating rewards...},
biburl = {http://www.bibsonomy.org/bibtex/2f626405f45454001c24cd027470b9d62/pprett},
keywords = {search, relevance, machine, web reinforcement, feedback, learning,}
}
@inproceedings{citeulike:1711978,
title = {A Comparison of Classifiers and Document Representations for the Routing Problem},
author = {Hinrich Sch\"{u}tze and David A. Hull and Jan O. Pedersen},
booktitle = {Research and Development in Information Retrieval},
pages = {229--237},
year = 1995,
url = {http://citeseer.ist.psu.edu/schutze95comparison.html},
id = {1711978},
priority = {2},
at = {2007-09-30 19:04:16},
comment = {(private-note)the experiments conducted show that linear ann (with no hidden unit) are favourable to non-linear activation functions for the task of text filtering},
abstract = {In this paper, we compare learning techniques based on statistical classification to traditional methods of relevance feedback for the document routing problem. We consider three classification techniques which have decision rules that are derived via explicit error minimization: linear discriminant analysis, logistic regression, and neural networks. We demonstrate that the classifiers perform 1015 \% better than relevance feedback via Rocchio expansion for the TREC-2 and TREC-3 routing tasks....},
biburl = {http://www.bibsonomy.org/bibtex/28a4cb4eb513259de610474cdb12402f7/pprett},
keywords = {relevance, machine, routing connectionist, feedback, learning, ann,}
}
@inproceedings{richardson2006beyond,
title = {Beyond PageRank: machine learning for static ranking},
address = {New York, NY, USA},
author = {Matthew Richardson and Amit Prakash and Eric Brill},
booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web},
pages = {707--715},
publisher = {ACM Press},
year = 2006,
url = {http://portal.acm.org/citation.cfm?id=1135777.1135881},
id = {1370090},
priority = {0},
isbn = {1595933239},
at = {2007-09-30 20:35:12},
doi = {10.1145/1135777.1135881},
biburl = {http://www.bibsonomy.org/bibtex/22704fc1c28065f9c2771a3eb94af0f1d/pprett},
keywords = {ranknet msr, machine, pagerank, ranking, learning,}
}
@inproceedings{citeulike:1659403,
title = {Active exploration for learning rankings from clickthrough data},
address = {New York, NY, USA},
author = {Filip Radlinski and Thorsten Joachims},
booktitle = {KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining},
pages = {570--579},
publisher = {ACM Press},
year = 2007,
url = {http://portal.acm.org/citation.cfm?id=1281254},
id = {1659403},
priority = {5},
isbn = {9781595936097},
at = {2007-10-01 10:18:20},
comment = {review on geeking with greg: http://glinden.blogspot.com/2007/09/actively-learning-to-rank.html},
doi = {10.1145/1281192.1281254},
biburl = {http://www.bibsonomy.org/bibtex/216c3ba9bc64910390702af25c6e36aa3/pprett},
keywords = {clickthrough, relevance machine, feedback, learning,}
}
@inproceedings{citeulike:1714577,
title = {Learning to rank using gradient descent},
address = {New York, NY, USA},
author = {Chris Burges and Tal Shaked and Erin Renshaw and Ari Lazier and Matt Deeds and Nicole Hamilton and Greg Hullender},
booktitle = {ICML '05: Proceedings of the 22nd international conference on Machine learning},
pages = {89--96},
publisher = {ACM Press},
year = 2005,
url = {http://portal.acm.org/citation.cfm?id=1102351.1102363},
id = {1714577},
priority = {0},
isbn = {1595931805},
at = {2007-10-01 10:27:11},
doi = {10.1145/1102351.1102363},
biburl = {http://www.bibsonomy.org/bibtex/2a00e5dd434027770eae311854b0bc88a/pprett},
keywords = {search, msr, neural, machine, web ranking, networks, ranknet, learning,}
}
@inproceedings{citeulike:1284298,
title = {Improving web search ranking by incorporating user behavior information},
address = {New York, NY, USA},
author = {Eugene Agichtein and Eric Brill and Susan Dumais},
booktitle = {SIGIR '06: Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval},
pages = {19--26},
publisher = {ACM Press},
year = 2006,
url = {http://portal.acm.org/citation.cfm?id=1148170.1148177},
id = {1284298},
priority = {0},
isbn = {1595933697},
at = {2007-10-01 13:46:16},
doi = {10.1145/1148170.1148177},
biburl = {http://www.bibsonomy.org/bibtex/25c7a51a4c45d5647bde6cd22dbe82b0b/pprett},
keywords = {relevance, retrieval, web behavior, feedback, ranknet, learning, user, search, msr, re-rank, evaluation, machine, information,}
}
@article{citeulike:1773173,
title = {Some Formal Analysis of Rocchio's Similarity-Based Relevance Feedback Algorithm},
address = {Hingham, MA, USA},
author = {Zhixiang Chen and Binhai Zhu},
journal = {Inf. Retr.},
month = {January},
number = 1,
pages = {61--86},
publisher = {Kluwer Academic Publishers},
volume = 5,
year = 2002,
url = {http://portal.acm.org/citation.cfm?id=594035},
id = {1773173},
issn = {1386-4564},
priority = {3},
at = {2007-10-16 09:12:16},
doi = {10.1023/A:1012730924277},
biburl = {http://www.bibsonomy.org/bibtex/20650620782b9dc8b2709cfc56a1258d8/pprett},
keywords = {supervised, relevance, algorithm, machine, theoretical feedback, rocchio, learning,}
}
@inproceedings{citeulike:1815062,
title = {Robustness of adaptive filtering methods in a cross-benchmark evaluation},
address = {New York, NY, USA},
author = {Yiming Yang and Shinjae Yoo and Jian Zhang and Bryan Kisiel},
booktitle = {SIGIR '05: Proceedings of the 28th annual international ACM SIGIR conference on Research and development in information retrieval},
pages = {98--105},
publisher = {ACM Press},
year = 2005,
url = {http://portal.acm.org/citation.cfm?id=1076034.1076054},
id = {1815062},
priority = {2},
isbn = {1595930345},
at = {2007-10-24 12:13:22},
doi = {10.1145/1076034.1076054},
biburl = {http://www.bibsonomy.org/bibtex/2af719875009e748a430a5ca70fbffc68/pprett},
keywords = {relevance, machine, filtering, ir, information, retrieval adaptive, feedback, learning, classification,}
}
@inproceedings{citeulike:1542210,
title = {A regression framework for learning ranking functions using relative relevance judgments},
address = {New York, NY, USA},
author = {Zhaohui Zheng and Keke Chen and Gordon Sun and Hongyuan Zha},
booktitle = {SIGIR '07: Proceedings of the 30th annual international ACM SIGIR conference on Research and development in information retrieval},
pages = {287--294},
publisher = {ACM Press},
year = 2007,
url = {http://portal.acm.org/citation.cfm?id=1277741.1277792},
id = {1542210},
priority = {2},
isbn = {9781595935977},
at = {2007-10-24 13:20:47},
doi = {10.1145/1277741.1277792},
biburl = {http://www.bibsonomy.org/bibtex/22c95e8c38b1069060872530845582463/pprett},
keywords = {relevance, machine, judgements, information, retrieval ranking, feedback, learning,}
}
@book{citeulike:340715,
title = {Data Mining: Practical Machine Learning Tools and Techniques},
author = {Ian H. Witten and Eibe Frank},
edition = {Second},
howpublished = {Paperback},
month = {June},
publisher = {Morgan Kaufmann},
series = {Morgan Kaufmann Series in Data Management Systems},
year = 2005,
url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&path=ASIN/0120884070},
id = {340715},
priority = {0},
isbn = {0120884070},
at = {2007-10-25 12:31:16},
abstract = {As with any burgeoning technology that enjoys commercial attention, the use of data mining is surrounded by a great deal of hype. Exaggerated reports tell of secrets that can be uncovered by setting algorithms loose on oceans of data. But there is no magic in machine learning, no hidden power, no alchemy. Instead there is an identifiable body of practical techniques that can extract useful information from raw data. This book describes these techniques and shows how they work.
The book is a major revision of the first edition that appeared in 1999. While the basic core remains the same, it has been updated to reflect the changes that have taken place over five years, and now has nearly double the references. The highlights for the new edition include thirty new technique sections; an enhanced Weka machine learning workbench, which now features an interactive interface; comprehensive information on neural networks; a new section on Bayesian networks; plus much more.
+ Authors, Ian Witten and Eibe Frank, recipients of the 2005 ACM SIGKDD Service Award.
+ Algorithmic methods at the heart of successful data miningincluding tried and true techniques as well as leading edge methods;
+ Performance improvement techniques that work by transforming the input or output;
+ Downloadable Weka, a collection of machine learning algorithms for data mining tasks, including tools for data pre-processing, classification, regression, clustering, association rules, and visualizationin a new, interactive interface.},
biburl = {http://www.bibsonomy.org/bibtex/257ade2d873735d4c54d44365dafa7605/pprett},
keywords = {data, clustering, weka machine, mining, learning, classification,}
}
@inproceedings{citeulike:405459,
title = {Query chains: learning to rank from implicit feedback},
address = {New York, NY, USA},
author = {Filip Radlinski and Thorsten Joachims},
booktitle = {KDD '05: Proceeding of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining},
pages = {239--248},
publisher = {ACM Press},
year = 2005,
url = {http://portal.acm.org/citation.cfm?id=1081870.1081899},
id = {405459},
priority = {0},
isbn = {159593135X},
at = {2007-10-30 15:53:12},
doi = {10.1145/1081870.1081899},
biburl = {http://www.bibsonomy.org/bibtex/21dd9e335795fc4af50d27fda749f687c/pprett},
keywords = {query, learning2rank, machine, implicit, ml, ranking feedback, learning,}
}
@inproceedings{citeulike:1856472,
title = {Automatic web query classification using labeled and unlabeled training data},
address = {New York, NY, USA},
author = {Steven M. Beitzel and Eric C. Jensen and Ophir Frieder and David Grossman and David D. Lewis and Abdur Chowdhury and Aleksandr Kolcz},
booktitle = {SIGIR '05: Proceedings of the 28th annual international ACM SIGIR conference on Research and development in information retrieval},
pages = {581--582},
publisher = {ACM},
year = 2005,
url = {http://portal.acm.org/citation.cfm?id=1076138},
id = {1856472},
priority = {0},
isbn = {1595930345},
at = {2007-11-02 14:59:17},
doi = {10.1145/1076034.1076138},
biburl = {http://www.bibsonomy.org/bibtex/27139a8099874e32e2a636c930388f037/pprett},
keywords = {query, machine, mining, aol, learning, supervised log,}
}
@misc{citeulike:1930553,
title = {Efficient text categorization},
author = {Marko Grobelnik and Dunja Mladeni\'c},
year = 1998,
url = {http://citeseer.ist.psu.edu/grobelnik98efficient.html},
id = {1930553},
priority = {2},
at = {2007-11-17 13:42:45},
abstract = {We present an approach to text categorization using machine learning techniques. The approach is developed and tested on large text hierarchy named Yahoo that is available on the Web. We handle the large number of features and training examples by taking into account hierarchical structure of examples and using feature subset selection for large text data. The large number of categories is handled separately for each testing example by pruning unpromising categories. In this way, the number of...},
biburl = {http://www.bibsonomy.org/bibtex/27fb066e201b2b5bf8ee139e5a9ec9b89/pprett},
keywords = {n-gram, web, machine, categorization, text, learning, classification, yahoo}
}
@article{citeulike:944913,
title = {Machine learning in automated text categorization},
author = {Fabrizio Sebastiani},
journal = {ACM Computing Surveys},
number = 1,
pages = {1--47},
volume = 34,
year = 2002,
url = {http://citeseer.ist.psu.edu/518620.html},
id = {944913},
priority = {0},
at = {2007-11-17 14:47:10},
abstract = {The automated categorization (or classification) of texts into predefined categories has witnessed a booming interest in the last ten years, due to the increased availability of documents in digital form and the ensuing need to organize them. In the research community the dominant approach to this problem is based on machine learning techniques: a general inductive process automatically builds a classifier by learning, from a set of preclassified documents, the characteristics of the...},
biburl = {http://www.bibsonomy.org/bibtex/26f20e9c88ff79f653566789fce940c98/pprett},
keywords = {text survey, nbc, machine, categorization, rocchio, learning, classification, svm, ann,}
}
@article{citeulike:1930646,
title = {On the Optimality of the Simple Bayesian Classifier under Zero-One Loss},
author = {Pedro Domingos and Michael J. Pazzani},
journal = {Machine Learning},
number = {2-3},
pages = {103--130},
volume = 29,
year = 1997,
url = {http://citeseer.ist.psu.edu/domingos97optimality.html},
id = {1930646},
priority = {3},
at = {2007-11-17 14:54:31},
abstract = {. The simple Bayesian classifier is known to be optimal when attributes are independent given the class, but the question of whether other sufficient conditions for its optimality exist has so far not been explored. Empirical results showing that it performs surprisingly well in many domains containing clear attribute dependences suggest that the answer to this question may be positive. This article shows that, although the Bayesian classifier's probability estimates are only optimal under...},
biburl = {http://www.bibsonomy.org/bibtex/2b5e354bc45af3b7f03f5de97582226bd/pprett},
keywords = {text nbc, machine, mining, learning,}
}
@inproceedings{citeulike:1430,
title = {N-Gram-Based Text Categorization},
address = {Las Vegas, US},
author = {William B. Cavnar and John M. Trenkle},
booktitle = {Proceedings of SDAIR-94, 3rd Annual Symposium on Document Analysis and Information Retrieval},
pages = {161--175},
year = 1994,
url = {http://citeseer.ist.psu.edu/68861.html},
id = {1430},
priority = {2},
at = {2007-11-17 15:39:57},
abstract = {Text categorization is a fundamental task in document
processing, allowing the automated handling
of enormous streams of documents in
electronic form. One difficulty in handling some
classes of documents is the presence of different
kinds of textual errors, such as spelling and
grammatical errors in email, and character recognition
errors in documents that come through
OCR. Text categorization must work reliably on
all input, and thus must tolerate some level of
these kinds of problems.
We...},
biburl = {http://www.bibsonomy.org/bibtex/2b2f4de70229df66d0ecb9b2e25844a61/pprett},
keywords = {n-gram, text machine, categorization, learning,}
}
@techreport{citeulike:142938,
title = {A tutorial on learning with bayesian networks},
address = {Redmond, Washington},
author = {D. Heckerman},
institution = {Microsoft Research},
year = {\# 1995},
url = {http://citeseer.ist.psu.edu/41127.html},
id = {142938},
priority = {0},
at = {2007-11-18 14:31:53},
comment = {at least partially...},
abstract = {A Bayesian network is a graphical model that encodes probabilistic relationships among variables of interest. When used in conjunction with statistical techniques, the graphical model has several advantages for data analysis. One, because the model encodes dependencies among all variables, it readily handles situations where some data entries are missing. Two, a Bayesian network can be used to learn causal relationships, and hence can be used to gain understanding about a problem domain and to...},
biburl = {http://www.bibsonomy.org/bibtex/2b9b2b0573b14988138ce39d6f829ba2f/pprett},
keywords = {bayesian, machine, networks, probabilistic learning,}
}
@misc{citeulike:1952805,
title = {A study using n-gram features for text categorization},
author = {F. Johannes},
year = 1998,
url = {http://citeseer.ist.psu.edu/176994.html},
id = {1952805},
priority = {0},
at = {2007-11-21 16:18:49},
abstract = {In this paper, we study the effect of using n-grams (sequences of words of length n) for
text categorization. We use an efficient algorithm for generating such n-gram features in two
benchmark domains, the 20 newsgroups data set and 21,578 REUTERS newswire articles.
Our results with the rule learning algorithm RIPPER indicate that, after the removal of stop
words, word sequences of length 2 or 3 are most useful. Using longer sequences reduces
classification performance.
1 Introduction
After...},
biburl = {http://www.bibsonomy.org/bibtex/25ba0a2a2c2343196a59fe853a7b2675c/pprett},
keywords = {n-gram, text machine, categorization, learning,}
}
@article{citeulike:139944,
title = {An introduction to variable and feature selection},
author = {Isabelle Guyon and Andre Elisseeff},
journal = {J. Mach. Learn. Res.},
pages = {1157--1182},
publisher = {MIT Press},
volume = 3,
year = 2003,
url = {http://portal.acm.org/citation.cfm?id=944919.944968},
id = {139944},
issn = {1533-7928},
priority = {0},
at = {2008-02-14 11:29:35},
comment = {The reference for feature selection. Discusses filter and wrapper methods. Based on a NIPS workshop. },
biburl = {http://www.bibsonomy.org/bibtex/25e95a6259355b17e7a0f266f68062304/pprett},
keywords = {feature, machine, reduction, dimensionality, selection learning,}
}