A. Hotho, S. Staab, and G. Stumme. Data Mining, 2003. ICDM 2003. Third IEEE International Conference on, (November 2003)
Abstract
Text document clustering plays an important role in providing
intuitive navigation and browsing mechanisms by organizing large sets of
documents into a small number of meaningful clusters. The bag of words
representation used for these clustering methods is often unsatisfactory
as it ignores relationships between important terms that do not cooccur
literally. In order to deal with the problem, we integrate core
ontologies as background knowledge into the process of clustering text
documents. Our experimental evaluations compare clustering techniques
based on pre-categorizations of texts from Reuters newsfeeds and on a
smaller domain of an eLearning course about Java. In the experiments,
improvements of results by background knowledge compared to a baseline
without background knowledge can be shown in many interesting
combinations.
%0 Journal Article
%1 1250972
%A Hotho, A.
%A Staab, S.
%A Stumme, G.
%D 2003
%J Data Mining, 2003. ICDM 2003. Third IEEE International Conference on
%K clustering document entropy inex08paper
%P 541-544
%T Ontologies improve text document clustering
%X Text document clustering plays an important role in providing
intuitive navigation and browsing mechanisms by organizing large sets of
documents into a small number of meaningful clusters. The bag of words
representation used for these clustering methods is often unsatisfactory
as it ignores relationships between important terms that do not cooccur
literally. In order to deal with the problem, we integrate core
ontologies as background knowledge into the process of clustering text
documents. Our experimental evaluations compare clustering techniques
based on pre-categorizations of texts from Reuters newsfeeds and on a
smaller domain of an eLearning course about Java. In the experiments,
improvements of results by background knowledge compared to a baseline
without background knowledge can be shown in many interesting
combinations.
@article{1250972,
abstract = { Text document clustering plays an important role in providing
intuitive navigation and browsing mechanisms by organizing large sets of
documents into a small number of meaningful clusters. The bag of words
representation used for these clustering methods is often unsatisfactory
as it ignores relationships between important terms that do not cooccur
literally. In order to deal with the problem, we integrate core
ontologies as background knowledge into the process of clustering text
documents. Our experimental evaluations compare clustering techniques
based on pre-categorizations of texts from Reuters newsfeeds and on a
smaller domain of an eLearning course about Java. In the experiments,
improvements of results by background knowledge compared to a baseline
without background knowledge can be shown in many interesting
combinations.},
added-at = {2008-12-02T03:57:18.000+0100},
author = {Hotho, A. and Staab, S. and Stumme, G.},
biburl = {https://www.bibsonomy.org/bibtex/202920ab2622af316c940aca6b7afb101/cdevries},
interhash = {b56c36d6d9c9ca9e6bd236a0f92415a5},
intrahash = {02920ab2622af316c940aca6b7afb101},
journal = {Data Mining, 2003. ICDM 2003. Third IEEE International Conference on},
keywords = {clustering document entropy inex08paper},
month = {Nov.},
pages = { 541-544},
timestamp = {2009-03-23T09:10:10.000+0100},
title = {Ontologies improve text document clustering},
year = 2003
}