Both document clustering and word clustering are important and well-studied problems. By using the vector space model, a document collection may be represented as a word-document matrix. In this paper, we present the novel idea of modeling the document collection as a bipartite graph between documents and words. Using this model, we pose the clustering probliem as a graph partitioning problem and give a new spectral algorithm that simultaneously yields a clustering of documents and words. This co-clustrering algorithm uses the second left and right singular vectors of an appropriately scaled word-document matrix to yield good bipartitionings. In fact, it can be shown that these singular vectors give a real relaxation to the optimal solution of the graph bipartitioning problem. We present several experimental results to verify that the resulting co-clustering algoirhm works well in practice and is robust in the presence of noise.
Description
Co-clustering documents and words using Bipartite Spectral Graph Partitioning
%0 Conference Paper
%1 Dhillon01co-clusteringdocuments
%A Dhillon, Inderjit S.
%D 2001
%K 2009 Co-Clustering bipartite clustering graph partitioning seminar
%P 269--274
%T Co-clustering documents and words using Bipartite Spectral Graph Partitioning
%U http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.16.7826
%X Both document clustering and word clustering are important and well-studied problems. By using the vector space model, a document collection may be represented as a word-document matrix. In this paper, we present the novel idea of modeling the document collection as a bipartite graph between documents and words. Using this model, we pose the clustering probliem as a graph partitioning problem and give a new spectral algorithm that simultaneously yields a clustering of documents and words. This co-clustrering algorithm uses the second left and right singular vectors of an appropriately scaled word-document matrix to yield good bipartitionings. In fact, it can be shown that these singular vectors give a real relaxation to the optimal solution of the graph bipartitioning problem. We present several experimental results to verify that the resulting co-clustering algoirhm works well in practice and is robust in the presence of noise.
@inproceedings{Dhillon01co-clusteringdocuments,
abstract = {Both document clustering and word clustering are important and well-studied problems. By using the vector space model, a document collection may be represented as a word-document matrix. In this paper, we present the novel idea of modeling the document collection as a bipartite graph between documents and words. Using this model, we pose the clustering probliem as a graph partitioning problem and give a new spectral algorithm that simultaneously yields a clustering of documents and words. This co-clustrering algorithm uses the second left and right singular vectors of an appropriately scaled word-document matrix to yield good bipartitionings. In fact, it can be shown that these singular vectors give a real relaxation to the optimal solution of the graph bipartitioning problem. We present several experimental results to verify that the resulting co-clustering algoirhm works well in practice and is robust in the presence of noise.},
added-at = {2009-12-14T01:11:04.000+0100},
author = {Dhillon, Inderjit S.},
biburl = {https://www.bibsonomy.org/bibtex/2384b88529265956f21f1e7498d68066c/r.b.},
description = {Co-clustering documents and words using Bipartite Spectral Graph Partitioning},
interhash = {d112b6041dab4b4677368c7bf101f1ee},
intrahash = {384b88529265956f21f1e7498d68066c},
keywords = {2009 Co-Clustering bipartite clustering graph partitioning seminar},
pages = {269--274},
timestamp = {2009-12-14T01:11:04.000+0100},
title = {Co-clustering documents and words using Bipartite Spectral Graph Partitioning},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.16.7826},
year = 2001
}