| Authors: |
M. Steyvers
and T. Griffiths
|
| Editors: |
T. Landauer
and D. Mcnamara
and S. Dennis
and W. Kintsch
|
| URL: |
http://psiexp.ss.uci.edu/research/papers/SteyversGriffithsLSABookFormatted.pdf |
| Tags: |
generative
modeling
statistics
|
| Abstract: |
Many chapters in this book illustrate that applying a statistical method such as Latent Semantic Analysis (LSA;
Landauer \& Dumais, 1997; Landauer, Foltz, \& Laham, 1998) to large databases can yield insight into human
cognition. The LSA approach makes three claims: that semantic information can be derived from a word-document
co-occurrence matrix; that dimensionality reduction is an essential part of this derivation; and that words and
documents can be represented as points in Euclidean space. In this chapter, we pursue an approach that is consistent
with the first two of these claims, but differs in the third, describing a class of statistical models in which the
semantic properties of words and documents are expressed in terms of probabilistic topics. |
@incollection{citeulike:383010,
title = {Probabilistic topic models},
author = {M. Steyvers and T. Griffiths},
booktitle = {Latent Semantic Analysis: A Road to Meaning},
editor = {T. Landauer and D. Mcnamara and S. Dennis and W. Kintsch},
publisher = {Laurence Erlbaum},
url = {http://psiexp.ss.uci.edu/research/papers/SteyversGriffithsLSABookFormatted.pdf},
year = {2005},
abstract = {Many chapters in this book illustrate that applying a statistical method such as Latent Semantic Analysis (LSA;
Landauer \& Dumais, 1997; Landauer, Foltz, \& Laham, 1998) to large databases can yield insight into human
cognition. The LSA approach makes three claims: that semantic information can be derived from a word-document
co-occurrence matrix; that dimensionality reduction is an essential part of this derivation; and that words and
documents can be represented as points in Euclidean space. In this chapter, we pursue an approach that is consistent
with the first two of these claims, but differs in the third, describing a class of statistical models in which the
semantic properties of words and documents are expressed in terms of probabilistic topics.},
comment = {Overview about topic/author inference
---
More info on http://oz.ss.uci.edu/237/
e.g. slides http://oz.ss.uci.edu/237/lectures/topics.pdf
slides on LSA
http://lsa.colorado.edu/~quesadaj/pdf/LSATutorial.pdf}, priority = {0}, citeulike-article-id = {383010},
keywords = {generative modeling statistics }
}