We present a novel approach to the automatic acquisition of taxonomies or concept hierarchies
from a text corpus. The approach is based on Formal Concept Analysis (FCA), a method mainly
used for the analysis of data, i.e. for investigating and processing explicitly given information. We
follow Harris' distributional hypothesis and model the context of a certain term as a vector representing
syntactic dependencies which are automatically acquired from the text corpus with a linguistic
parser. On the basis of this context information, FCA produces a lattice that we convert into
a special kind of partial order constituting a concept hierarchy. The approach is evaluated by comparing
the resulting concept hierarchies with hand-crafted taxonomies for two domains: tourism
and nance. We also directly compare our approach with hierarchical agglomerative clustering as
well as with Bi-Section-KMeans as an instance of a divisive clustering algorithm. Furthermore, we
investigate the impact of using different measures weighting the contribution of each attribute as
well as of applying a particular smoothing technique to cope with data sparseness.
%0 Journal Article
%1 paper:cimiano:2005
%A Cimiano, Philipp
%A Hotho, Andreas
%A Staab, Steffen
%D 2005
%I AAAI Press
%J Journal of Artificial Intelligence Research (JAIR)
%K 2005 analysis folksonomy graphs networks to-read
%P 305-339
%T Learning Concept Hierarchies from Text Corpora using Formal Concept Analysis
%U http://www.jair.org/media/1648/live-1648-2403-jair.pdf
%V 24
%X We present a novel approach to the automatic acquisition of taxonomies or concept hierarchies
from a text corpus. The approach is based on Formal Concept Analysis (FCA), a method mainly
used for the analysis of data, i.e. for investigating and processing explicitly given information. We
follow Harris' distributional hypothesis and model the context of a certain term as a vector representing
syntactic dependencies which are automatically acquired from the text corpus with a linguistic
parser. On the basis of this context information, FCA produces a lattice that we convert into
a special kind of partial order constituting a concept hierarchy. The approach is evaluated by comparing
the resulting concept hierarchies with hand-crafted taxonomies for two domains: tourism
and nance. We also directly compare our approach with hierarchical agglomerative clustering as
well as with Bi-Section-KMeans as an instance of a divisive clustering algorithm. Furthermore, we
investigate the impact of using different measures weighting the contribution of each attribute as
well as of applying a particular smoothing technique to cope with data sparseness.
@article{paper:cimiano:2005,
abstract = {We present a novel approach to the automatic acquisition of taxonomies or concept hierarchies
from a text corpus. The approach is based on Formal Concept Analysis (FCA), a method mainly
used for the analysis of data, i.e. for investigating and processing explicitly given information. We
follow Harris' distributional hypothesis and model the context of a certain term as a vector representing
syntactic dependencies which are automatically acquired from the text corpus with a linguistic
parser. On the basis of this context information, FCA produces a lattice that we convert into
a special kind of partial order constituting a concept hierarchy. The approach is evaluated by comparing
the resulting concept hierarchies with hand-crafted taxonomies for two domains: tourism
and nance. We also directly compare our approach with hierarchical agglomerative clustering as
well as with Bi-Section-KMeans as an instance of a divisive clustering algorithm. Furthermore, we
investigate the impact of using different measures weighting the contribution of each attribute as
well as of applying a particular smoothing technique to cope with data sparseness.},
added-at = {2008-08-11T15:16:25.000+0200},
author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen},
biburl = {https://www.bibsonomy.org/bibtex/28ed9da229b56643ae16fc83f158b334f/mschuber},
interhash = {4c09568cff62babd362aab03095f4589},
intrahash = {8ed9da229b56643ae16fc83f158b334f},
issn = {1076-9757},
journal = {Journal of Artificial Intelligence Research (JAIR)},
keywords = {2005 analysis folksonomy graphs networks to-read},
pages = {305-339},
publisher = {AAAI Press},
timestamp = {2008-09-09T12:27:57.000+0200},
title = {Learning Concept Hierarchies from Text Corpora using Formal Concept Analysis},
url = {http://www.jair.org/media/1648/live-1648-2403-jair.pdf},
vgwort = {54},
volume = 24,
year = 2005
}