@misc{Rubin2011, abstract = { Machine learning approaches to multi-label document classification have (to date) largely relied on discriminative modeling techniques such as support vector machines. A drawback of these approaches is that performance rapidly drops off as the total number of labels and the number of labels per document increase. This problem is amplified when the label frequencies exhibit the type of highly skewed distributions that are often observed in real-world datasets. In this paper we investigate a class of generative statistical topic models for multi-label documents that associate individual word tokens with different labels. We investigate the advantages of this approach relative to discriminative models, particularly with respect to classification problems involving large numbers of relatively rare labels. We compare the performance of generative and discriminative approaches on document labeling tasks ranging from datasets with several thousand labels to datasets with tens of labels. The experimental results indicate that generative models can achieve competitive multi-label classification performance compared to discriminative methods, and have advantages for datasets with many labels and skewed label frequencies. }, added-at = {2011-09-14T08:20:38.000+0200}, author = {Rubin, Timothy N. and Chambers, America and Smyth, Padhraic and Steyvers, Mark}, biburl = {http://www.bibsonomy.org/bibtex/2f8a5a3958ae264d19c7f5415eb7f0bce/hotho}, description = {Statistical Topic Models for Multi-Label Document Classification}, interhash = {e09d5d8587756d460a5d834025e75aac}, intrahash = {f8a5a3958ae264d19c7f5415eb7f0bce}, keywords = {mining model text tm topic toread}, note = {cite arxiv:1107.2462}, timestamp = {2011-09-14T08:20:38.000+0200}, title = {Statistical Topic Models for Multi-Label Document Classification}, url = {http://arxiv.org/abs/1107.2462}, year = 2011 } @inproceedings{1661779, abstract = {A folksonomy refers to a collection of user-defined tags with which users describe contents published on the Web. With the flourish of Web 2.0, folksonomies have become an important mean to develop the Semantic Web. Because tags in folksonomies are authored freely, there is a need to understand the structure and semantics of these tags in various applications. In this paper, we propose a learning approach to create an ontology that captures the hierarchical semantic structure of folksonomies. Our experimental results on two different genres of real world data sets show that our method can effectively learn the ontology structure from the folksonomies.}, added-at = {2009-12-23T18:06:56.000+0100}, address = {San Francisco, CA, USA}, author = {Tang, Jie and fung Leung, Ho and Luo, Qiong and Chen, Dewei and Gong, Jibin}, biburl = {http://www.bibsonomy.org/bibtex/27b335f08a288a79eb70eff89f1ec7630/hotho}, booktitle = {IJCAI'09: Proceedings of the 21st international jont conference on Artifical intelligence}, description = {Towards ontology learning from folksonomies}, interhash = {17f95a6ba585888cf45443926d8b7e98}, intrahash = {7b335f08a288a79eb70eff89f1ec7630}, keywords = {folksonomy learning model ol tagging taggingsurvey topic toread}, location = {Pasadena, California, USA}, pages = {2089--2094}, publisher = {Morgan Kaufmann Publishers Inc.}, timestamp = {2009-12-23T18:06:56.000+0100}, title = {Towards ontology learning from folksonomies}, url = {http://ijcai.org/papers09/Papers/IJCAI09-344.pdf}, year = 2009 } @article{carpena:035102, added-at = {2009-04-10T19:01:21.000+0200}, author = {Carpena, P. and Bernaola-Galv\'{a}n, P. and Hackenberg, M. and Coronado, A. V. and Oliver, J. L.}, biburl = {http://www.bibsonomy.org/bibtex/234dcb1eee3ffa31ff4eb77087343c146/hotho}, description = {Level statistics of words: Finding keywords in literary texts and symbolic sequences}, doi = {10.1103/PhysRevE.79.035102}, eid = {035102}, interhash = {3444159872c65ea89d007d1838686acc}, intrahash = {34dcb1eee3ffa31ff4eb77087343c146}, journal = {Physical Review E (Statistical, Nonlinear, and Soft Matter Physics)}, keywords = {analysis extraction keyword statistical text tm topic toread}, number = 3, numpages = {4}, pages = 035102, publisher = {APS}, timestamp = {2009-04-10T19:01:21.000+0200}, title = {Level statistics of words: Finding keywords in literary texts and symbolic sequences}, url = {http://bioinfo2.ugr.es/TextKeywords/}, volume = 79, year = 2009 } @inproceedings{IfrimTW-ICML2005, added-at = {2008-07-01T15:19:39.000+0200}, address = {Bonn, Germany}, author = {Ifrim, Georgiana and Theobald, Martin and Weikum, Gerhard}, biburl = {http://www.bibsonomy.org/bibtex/257f8241941ed979455c3dbb90893020f/hotho}, booktitle = {Proceedings of the 22nd International Conference on Machine Learning - Learning in Web Search (LWS 2005)}, description = {D5 MPI-INF Publications: Proceedings Article: Learning Word-to-Concept Mappings for Automatic Text Classification}, editor = {Raedt, Luc De and Wrobel, Stefan}, interhash = {a54c4070e0fb55f5a084a0f088230a65}, intrahash = {57f8241941ed979455c3dbb90893020f}, isbn = {1-59593-180-5}, keywords = {classification concept model tc text topic wordnet}, pages = {18--26}, timestamp = {2008-07-01T15:19:39.000+0200}, title = {Learning Word-to-Concept Mappings for Automatic Text Classification}, url = {http://www.mpi-inf.mpg.de/~ifrim/publications/icml-lws05.pdf}, year = 2005 } @inproceedings{haveliwala02topicsensitive, added-at = {2006-11-02T10:06:26.000+0100}, address = {Honolulu, Hawaii}, author = {Haveliwala, Taher H.}, biburl = {http://www.bibsonomy.org/bibtex/2c056611effc0d18aae71a6d535ff6c5a/hotho}, booktitle = {Proceedings of the Eleventh International World Wide Web Conference}, description = {Topic-Sensitive PageRank - Haveliwala (ResearchIndex)}, interhash = {29a20afd5026732686509987f603d33d}, intrahash = {c056611effc0d18aae71a6d535ff6c5a}, keywords = {pagerank toread topic}, month = May, timestamp = {2006-11-02T10:06:26.000+0100}, title = {Topic-sensitive PageRank}, url = {http://citeseer.csail.mit.edu/haveliwala02topicsensitive.html}, year = 2002 } @article{Chakrabartietal99, added-at = {2006-09-12T09:28:58.000+0200}, author = {Chakrabarti, S. and van den Berg, M. and Dom, B.}, biburl = {http://www.bibsonomy.org/bibtex/2004dd97a2b2e71fa2cfe6820c74c9701/hotho}, interhash = {e35ac8e9c02ab2a5075b9c1692ac7a2d}, intrahash = {004dd97a2b2e71fa2cfe6820c74c9701}, isbn = {90-74821-43-X}, journal = {Computer Networks}, keywords = {crawling focused topic}, pages = {1623--1640}, timestamp = {2006-09-12T09:28:58.000+0200}, title = {Focused Crawling: A New Approach to Topic-Specific Web Resource Discovery}, url = {citeseer.nj.nec.com/chakrabarti99focused.html}, volume = 31, year = 1999 } @inbook{kleinberg2006temporal, added-at = {2006-02-11T13:42:47.000+0100}, author = {Kleinberg, J.}, biburl = {http://www.bibsonomy.org/bibtex/29c57003d80b81eab2f66b2faf02acb27/hotho}, booktitle = {Data Stream Management: Processing High-Speed Data Streams}, editor = {Garofalakis, M. and Gehrke, J. and Rastogi, R.}, interhash = {85abe180184277c0396745c7ce050c98}, intrahash = {9c57003d80b81eab2f66b2faf02acb27}, isbn = {3540286071}, keywords = {techniques topic survey data detection stream **** analysis temporal trend}, publisher = {Springer}, timestamp = {2006-02-11T13:42:47.000+0100}, title = {Temporal Dynamics of On-Line Information Streams}, url = {http://www.cs.cornell.edu/home/kleinber/stream-survey04.pdf}, year = 2006 } @article{griffiths2004finding, added-at = {2006-02-09T13:03:23.000+0100}, author = {Griffiths, Thomas L. and Steyvers, Mark}, biburl = {http://www.bibsonomy.org/bibtex/2cbfda2e50bd63357890b9181d8883826/hotho}, interhash = {387a5060792d52ea73b02dd68e52559e}, intrahash = {cbfda2e50bd63357890b9181d8883826}, keywords = {topic time detection trend series ml}, timestamp = {2006-02-09T13:03:23.000+0100}, title = {Finding scientific topics}, url = {http://www.pnas.org/cgi/content/abstract/101/suppl_1/5228}, year = 2004 } @inproceedings{WS_gs01, added-at = {2005-12-20T20:21:42.000+0100}, author = {Grand, B. Le and Soto, M.}, biburl = {http://www.bibsonomy.org/bibtex/2cc72df61f4c0de369a4018ec02edffcb/hotho}, booktitle = {\cite{WS_SHB01}}, interhash = {8f0a73989e17b35dff4928d498475841}, intrahash = {cc72df61f4c0de369a4018ec02edffcb}, keywords = {web maps topic semantic mining xml}, location = {Aix-en-Provence, France}, pages = {67--83}, timestamp = {2005-12-20T20:21:42.000+0100}, title = {XML Topic Maps and Semantic Web Mining}, year = 2001 }