J. Kang, J. Ma, and Y. Liu. (2013)cite arxiv:1301.5686Comment: 2012 SIAM International Conference on Data Mining (SDM12) Pages: 564-575.
Abstract
The increasing volume of short texts generated on social media sites, such as
Twitter or Facebook, creates a great demand for effective and efficient topic
modeling approaches. While latent Dirichlet allocation (LDA) can be applied, it
is not optimal due to its weakness in handling short texts with fast-changing
topics and scalability concerns. In this paper, we propose a transfer learning
approach that utilizes abundant labeled documents from other domains (such as
Yahoo! News or Wikipedia) to improve topic modeling, with better model fitting
and result interpretation. Specifically, we develop Transfer Hierarchical LDA
(thLDA) model, which incorporates the label information from other domains via
informative priors. In addition, we develop a parallel implementation of our
model for large-scale applications. We demonstrate the effectiveness of our
thLDA model on both a microblogging dataset and standard text collections
including AP and RCV1 datasets.
%0 Generic
%1 kang2013transfer
%A Kang, Jeon-Hyung
%A Ma, Jun
%A Liu, Yan
%D 2013
%K CRP Hierarchical LDA thLDA topicModel
%T Transfer Topic Modeling with Ease and Scalability
%U http://arxiv.org/abs/1301.5686
%X The increasing volume of short texts generated on social media sites, such as
Twitter or Facebook, creates a great demand for effective and efficient topic
modeling approaches. While latent Dirichlet allocation (LDA) can be applied, it
is not optimal due to its weakness in handling short texts with fast-changing
topics and scalability concerns. In this paper, we propose a transfer learning
approach that utilizes abundant labeled documents from other domains (such as
Yahoo! News or Wikipedia) to improve topic modeling, with better model fitting
and result interpretation. Specifically, we develop Transfer Hierarchical LDA
(thLDA) model, which incorporates the label information from other domains via
informative priors. In addition, we develop a parallel implementation of our
model for large-scale applications. We demonstrate the effectiveness of our
thLDA model on both a microblogging dataset and standard text collections
including AP and RCV1 datasets.
@misc{kang2013transfer,
abstract = {The increasing volume of short texts generated on social media sites, such as
Twitter or Facebook, creates a great demand for effective and efficient topic
modeling approaches. While latent Dirichlet allocation (LDA) can be applied, it
is not optimal due to its weakness in handling short texts with fast-changing
topics and scalability concerns. In this paper, we propose a transfer learning
approach that utilizes abundant labeled documents from other domains (such as
Yahoo! News or Wikipedia) to improve topic modeling, with better model fitting
and result interpretation. Specifically, we develop Transfer Hierarchical LDA
(thLDA) model, which incorporates the label information from other domains via
informative priors. In addition, we develop a parallel implementation of our
model for large-scale applications. We demonstrate the effectiveness of our
thLDA model on both a microblogging dataset and standard text collections
including AP and RCV1 datasets.},
added-at = {2013-01-31T19:55:00.000+0100},
author = {Kang, Jeon-Hyung and Ma, Jun and Liu, Yan},
biburl = {https://www.bibsonomy.org/bibtex/2feec2161786a17fbad8c98bf45f42fc4/jeonhyuk},
description = {Transfer Topic Modeling with Ease and Scalability},
interhash = {fdf4c076fd5dde66a7224cd4a221e865},
intrahash = {feec2161786a17fbad8c98bf45f42fc4},
keywords = {CRP Hierarchical LDA thLDA topicModel},
note = {cite arxiv:1301.5686Comment: 2012 SIAM International Conference on Data Mining (SDM12) Pages: {564-575}},
timestamp = {2013-01-31T19:55:00.000+0100},
title = {Transfer Topic Modeling with Ease and Scalability},
url = {http://arxiv.org/abs/1301.5686},
year = 2013
}