@article{bekkerman2004ace, title = {Automatic Categorization of Email into Folders: Benchmark Experiments on Enron and SRI Corpora}, author = {R. Bekkerman and A. McCallum and G. Huang}, journal = {Center for Intelligent Information Retrieval, Technical Report IR}, volume = 418, year = 2004, abstract = {Office workers everywhere are drowning in email—not only spam, but also large quantities of legitimate email to be read and organized for browsing. Although there have been extensive investigations of automatic document categorization, email gives rise to a number of unique challenges, and there has been relatively little study of classifying email into folders. This paper presents an extensive benchmark study of email foldering using two large corpora of real-world email messages and foldering schemes: one from former Enron employees, another from participants in an SRI research pro ject. We discuss the challenges that arise from differences between email foldering and traditional document classification. We show experimental results from an array of automated classification methods and evaluation methodologies, including a new evaluation method of foldering results based on the email timeline, and including enhancements to the exponential gradient method Winnow, providing top-tier accuracy with a fraction the training time of alternative methods. We also establish that classification accuracy in many cases is relatively low, confirming the challenges of email data, and pointing toward email foldering as an important area for further research.}, biburl = {http://www.bibsonomy.org/bibtex/2c66fa858b58398f469558c5d85cf8a7a/cschenk}, keywords = {benchmark retrieval enron bayes automatic algorithms paper information sri email classification categorization folders winnow svm read:2008 ir} } @inproceedings{Cohen04, title = {Learning to Classify Email into ``Speech Acts'' }, address = {Barcelona, Spain}, author = {William W. Cohen and Vitor R. Carvalho and Tom M. Mitchell}, booktitle = {Proceedings of EMNLP 2004}, editor = {Dekang Lin and Dekai Wu}, month = {July}, pages = {309--316}, publisher = {Association for Computational Linguistics}, year = 2004, pdf = {http://www.cs.cmu.edu/~vitor/publications/papers/cohen04emnlp.pdf}, biburl = {http://www.bibsonomy.org/bibtex/29e8556fddc7d34fc0438d5fb3e5d1373/mkroell}, keywords = {email classification application} } @inproceedings{conf/ecir/KeBO06, title = {PERC: A Personal Email Classifier.}, author = {Shih-Wen Ke and Chris Bowerman and Michael P. Oakes}, booktitle = {ECIR}, crossref = {conf/ecir/2006}, editor = {Mounia Lalmas and Andy MacFarlane and Stefan M. Rüger and Anastasios Tombros and Theodora Tsikrika and Alexei Yavlinsky}, pages = {460-463}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, volume = 3936, year = 2006, url = {http://dblp.uni-trier.de/db/conf/ecir/ecir2006.html#KeBO06}, ee = {http://dx.doi.org/10.1007/11735106_41}, isbn = {3-540-33347-9}, date = {2006-04-03}, biburl = {http://www.bibsonomy.org/bibtex/2a55c3582ecaeb41a28ac6ea499873163/renew}, keywords = {email classification} } @article{bekkerman2004ace, title = {{Automatic Categorization of Email into Folders: Benchmark Experiments on Enron and SRI Corpora}}, author = {R. Bekkerman and A. McCallum and G. Huang}, journal = {Center for Intelligent Information Retrieval, Technical Report IR}, volume = 418, year = 2004, biburl = {http://www.bibsonomy.org/bibtex/2c66fa858b58398f469558c5d85cf8a7a/renew}, keywords = {email enron classification} } @inproceedings{cohen2004, title = {{Learning to classify email into “speech acts”}}, author = {W.W. Cohen and V.R. Carvalho and T.M. Mitchell}, booktitle = {Proceedings of the 2004 Conference on Empirical Methods in Natural Language Processing}, year = 2004, biburl = {http://www.bibsonomy.org/bibtex/2d70cce73894d55c11615ed4369a966ac/wnpxrz}, keywords = {email classification} }