We show that the singular value decomposition of a term similarity matrix induces a term hierarchy. This decomposition, usedin Latent Semantic Analysis and Principal Component Analysis for text, aims at identifying âconceptsâ�? that can be used inplace of the terms appearing in the documents. Unlike terms, concepts are by construction uncorrelated and hence are lesssensitive to the particular vocabulary used in documents. In this work, we explore the relation between terms and conceptsand show that for each term there exists a latent subspace dimension for which the term coincides with a concept. By varyingthe number of dimensions, terms similar but more specific than the concept can be identified, leading to a term hierarchy.
%0 Journal Article
%1 dupret2006principal
%A Dupret, Georges
%A Piwowarski, Benjamin
%D 2006
%J String Processing and Information Retrieval
%K imported ol_web2.0 methods_concepthierarchy
%P 37--48
%T Principal Components for Automatic Term Hierarchy Building
%U http://dx.doi.org/10.1007/11880561_4
%X We show that the singular value decomposition of a term similarity matrix induces a term hierarchy. This decomposition, usedin Latent Semantic Analysis and Principal Component Analysis for text, aims at identifying âconceptsâ�? that can be used inplace of the terms appearing in the documents. Unlike terms, concepts are by construction uncorrelated and hence are lesssensitive to the particular vocabulary used in documents. In this work, we explore the relation between terms and conceptsand show that for each term there exists a latent subspace dimension for which the term coincides with a concept. By varyingthe number of dimensions, terms similar but more specific than the concept can be identified, leading to a term hierarchy.
@article{dupret2006principal,
abstract = {We show that the singular value decomposition of a term similarity matrix induces a term hierarchy. This decomposition, usedin Latent Semantic Analysis and Principal Component Analysis for text, aims at identifying âconceptsâ�? that can be used inplace of the terms appearing in the documents. Unlike terms, concepts are by construction uncorrelated and hence are lesssensitive to the particular vocabulary used in documents. In this work, we explore the relation between terms and conceptsand show that for each term there exists a latent subspace dimension for which the term coincides with a concept. By varyingthe number of dimensions, terms similar but more specific than the concept can be identified, leading to a term hierarchy.},
added-at = {2011-02-17T17:42:03.000+0100},
author = {Dupret, Georges and Piwowarski, Benjamin},
biburl = {https://www.bibsonomy.org/bibtex/2b1f3cfc1d060423e224db9a0b0cdbbe6/dbenz},
description = {SpringerLink - Buchkapitel},
file = {:bast06-principal.pdf:PDF;dupret2006principal.pdf:dupret2006principal.pdf:PDF},
groups = {public},
interhash = {c1a309fb28731d35121b505f60e89ef1},
intrahash = {b1f3cfc1d060423e224db9a0b0cdbbe6},
journal = {String Processing and Information Retrieval},
journalpub = {1},
keywords = {imported ol_web2.0 methods_concepthierarchy},
pages = {37--48},
timestamp = {2013-07-31T15:39:42.000+0200},
title = {Principal Components for Automatic Term Hierarchy Building},
url = {http://dx.doi.org/10.1007/11880561_4},
username = {dbenz},
year = 2006
}