The hierarchical Dirichlet process (HDP) has become an important Bayesian nonparametric model for grouped data, such as document collections. The HDP is used to construct a flexible mixed-membership model where the number of components is determined by the data. As for most Bayesian nonparametric models, exact posterior inference is intractable---practitioners use Markov chain Monte Carlo (MCMC) or variational inference. Inspired by the split-merge MCMC algorithm for the Dirichlet process (DP) mixture model, we describe a novel split-merge MCMC sampling algorithm for posterior inference in the HDP. We study its properties on both synthetic data and text corpora. We find that split-merge MCMC for the HDP can provide significant improvements over traditional Gibbs sampling, and we give some understanding of the data properties that give rise to larger improvements.
%0 Journal Article
%1 wang_split-merge_2012
%A Wang, Chong
%A Blei, David M.
%D 2012
%J arXiv:1201.1657
%K Bayesian Carlo Learning, Machine Markov Monte algorithms, chain dirichlet, hierarchical inference, model,
%T A Split-Merge MCMC Algorithm for the Hierarchical Dirichlet Process
%U http://arxiv.org/abs/1201.1657
%X The hierarchical Dirichlet process (HDP) has become an important Bayesian nonparametric model for grouped data, such as document collections. The HDP is used to construct a flexible mixed-membership model where the number of components is determined by the data. As for most Bayesian nonparametric models, exact posterior inference is intractable---practitioners use Markov chain Monte Carlo (MCMC) or variational inference. Inspired by the split-merge MCMC algorithm for the Dirichlet process (DP) mixture model, we describe a novel split-merge MCMC sampling algorithm for posterior inference in the HDP. We study its properties on both synthetic data and text corpora. We find that split-merge MCMC for the HDP can provide significant improvements over traditional Gibbs sampling, and we give some understanding of the data properties that give rise to larger improvements.
@article{wang_split-merge_2012,
abstract = {The hierarchical Dirichlet process (HDP) has become an important Bayesian nonparametric model for grouped data, such as document collections. The HDP is used to construct a flexible mixed-membership model where the number of components is determined by the data. As for most Bayesian nonparametric models, exact posterior inference is intractable---practitioners use Markov chain Monte Carlo (MCMC) or variational inference. Inspired by the split-merge MCMC algorithm for the Dirichlet process (DP) mixture model, we describe a novel split-merge MCMC sampling algorithm for posterior inference in the HDP. We study its properties on both synthetic data and text corpora. We find that split-merge MCMC for the HDP can provide significant improvements over traditional Gibbs sampling, and we give some understanding of the data properties that give rise to larger improvements.},
added-at = {2017-01-09T13:57:26.000+0100},
author = {Wang, Chong and Blei, David M.},
biburl = {https://www.bibsonomy.org/bibtex/2fe0e92bb0916fe706a102cd7469c532a/yourwelcome},
interhash = {3a6cf6c00e8c07faaac28867e64e53ec},
intrahash = {fe0e92bb0916fe706a102cd7469c532a},
journal = {arXiv:1201.1657},
keywords = {Bayesian Carlo Learning, Machine Markov Monte algorithms, chain dirichlet, hierarchical inference, model,},
month = jan,
timestamp = {2017-01-09T14:01:11.000+0100},
title = {A {Split}-{Merge} {MCMC} {Algorithm} for the {Hierarchical} {Dirichlet} {Process}},
url = {http://arxiv.org/abs/1201.1657},
urldate = {2012-06-11},
year = 2012
}