D. Newman, J. Lau, K. Grieser, and T. Baldwin. Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics, page 100--108. Stroudsburg, PA, USA, Association for Computational Linguistics, (2010)
Abstract
This paper introduces the novel task of topic coherence evaluation, whereby a set of words, as generated by a topic model, is rated for coherence or interpretability. We apply a range of topic scoring models to the evaluation task, drawing on WordNet, Wikipedia and the Google search engine, and existing research on lexical similarity/relatedness. In comparison with human scores for a set of learned topics over two distinct datasets, we show a simple co-occurrence measure based on pointwise mutual information over Wikipedia data is able to achieve results for the task at or nearing the level of inter-annotator correlation, and that other Wikipedia-based lexical relatedness methods also achieve strong results. Google produces strong, if less consistent, results, while our results over WordNet are patchy at best.
%0 Conference Paper
%1 NewmanD2010a
%A Newman, David
%A Lau, Jey Han
%A Grieser, Karl
%A Baldwin, Timothy
%B Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics
%C Stroudsburg, PA, USA
%D 2010
%I Association for Computational Linguistics
%K LDA
%P 100--108
%T Automatic Evaluation of Topic Coherence
%U http://dl.acm.org/citation.cfm?id=1857999.1858011
%X This paper introduces the novel task of topic coherence evaluation, whereby a set of words, as generated by a topic model, is rated for coherence or interpretability. We apply a range of topic scoring models to the evaluation task, drawing on WordNet, Wikipedia and the Google search engine, and existing research on lexical similarity/relatedness. In comparison with human scores for a set of learned topics over two distinct datasets, we show a simple co-occurrence measure based on pointwise mutual information over Wikipedia data is able to achieve results for the task at or nearing the level of inter-annotator correlation, and that other Wikipedia-based lexical relatedness methods also achieve strong results. Google produces strong, if less consistent, results, while our results over WordNet are patchy at best.
%@ 1-932432-65-5
@inproceedings{NewmanD2010a,
abstract = {This paper introduces the novel task of topic coherence evaluation, whereby a set of words, as generated by a topic model, is rated for coherence or interpretability. We apply a range of topic scoring models to the evaluation task, drawing on WordNet, Wikipedia and the Google search engine, and existing research on lexical similarity/relatedness. In comparison with human scores for a set of learned topics over two distinct datasets, we show a simple co-occurrence measure based on pointwise mutual information over Wikipedia data is able to achieve results for the task at or nearing the level of inter-annotator correlation, and that other Wikipedia-based lexical relatedness methods also achieve strong results. Google produces strong, if less consistent, results, while our results over WordNet are patchy at best.},
acmid = {1858011},
added-at = {2014-02-14T18:29:17.000+0100},
address = {Stroudsburg, PA, USA},
author = {Newman, David and Lau, Jey Han and Grieser, Karl and Baldwin, Timothy},
biburl = {https://www.bibsonomy.org/bibtex/27d20bf815d145f07a72a637ac47de24f/lopusz_kdd},
booktitle = {Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics},
description = {Automatic evaluation of topic coherence},
interhash = {97d4db3b77fac4173beedcd9fa45605f},
intrahash = {7d20bf815d145f07a72a637ac47de24f},
isbn = {1-932432-65-5},
keywords = {LDA},
location = {Los Angeles, California},
numpages = {9},
pages = {100--108},
publisher = {Association for Computational Linguistics},
series = {HLT '10},
timestamp = {2014-02-14T18:29:17.000+0100},
title = {Automatic Evaluation of Topic Coherence},
url = {http://dl.acm.org/citation.cfm?id=1857999.1858011},
year = 2010
}