@inproceedings{Hotho2003, title = {WordNet improves text document clustering}, author = {Andreas Hotho and Steffen Staab and Gerd Stumme}, booktitle = {Semantic Web Workshop at the Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, url = {www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003wordnet.pdf}, year = {2003}, biburl = {http://www.bibsonomy.org/bibtex/2b8c511f87f1f59dcb8aa3ee03ddd7bec/marcoalvarez}, abstract = {Text document clustering plays an important role in providing intuitive navigation and browsing mechanisms by organizing large amounts of information into a small number of meaningful clusters. The bag of words representation used for these clustering methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. In order to deal with the problem, we integrate background knowledge --- in our application Wordnet --- into the process of clustering text documents. We cluster the documents by a standard partitional algorithm. Our experimental evaluation on Reuters newsfeeds compares clustering results with pre-categorizations of news. In the experiments, improvements of results by background knowledge compared to the baseline can be shown for many interesting tasks.}, keywords = {Clustering WordNet } }