Subspace clustering is an extension of traditional cluster-
ing that seeks to ¯nd clusters in di®erent subspaces within
a dataset. Often in high dimensional data, many dimen-
sions are irrelevant and can mask existing clusters in noisy
data. Feature selection removes irrelevant and redundant
dimensions by analyzing the entire dataset. Subspace clus-
tering algorithms localize the search for relevant dimensions
allowing them to ¯nd clusters that exist in multiple, possi-
bly overlapping subspaces. There are two major branches
of subspace clustering based on their search strategy. Top-
down algorithms ¯nd an initial clustering in the full set of
dimensions and evaluate the subspaces of each cluster, it-
eratively improving the results. Bottom-up approaches ¯nd
dense regions in low dimensional spaces and combine them
to form clusters. This paper presents a survey of the various
subspace clustering algorithms along with a hierarchy orga-
nizing the algorithms by their de¯ning characteristics. We
then compare the two main approaches to subspace cluster-
ing using empirical scalability and accuracy tests and discuss
some potential applications where subspace clustering could
be particularly useful.
%0 Journal Article
%1 text.clustering.review.2004
%A Parsons, Lance
%A Haque, Ehtesham
%A Liu, Huan
%D 2004
%J SIGKDD Exploration
%K clustering review subspace text
%N 1
%P 90-105
%T Subspace Clustering for High Dimensional Data: A Review
%U http://www.sigkdd.org/explorations/issues/6-1-2004-06/parsons.pdf
%V 6
%X Subspace clustering is an extension of traditional cluster-
ing that seeks to ¯nd clusters in di®erent subspaces within
a dataset. Often in high dimensional data, many dimen-
sions are irrelevant and can mask existing clusters in noisy
data. Feature selection removes irrelevant and redundant
dimensions by analyzing the entire dataset. Subspace clus-
tering algorithms localize the search for relevant dimensions
allowing them to ¯nd clusters that exist in multiple, possi-
bly overlapping subspaces. There are two major branches
of subspace clustering based on their search strategy. Top-
down algorithms ¯nd an initial clustering in the full set of
dimensions and evaluate the subspaces of each cluster, it-
eratively improving the results. Bottom-up approaches ¯nd
dense regions in low dimensional spaces and combine them
to form clusters. This paper presents a survey of the various
subspace clustering algorithms along with a hierarchy orga-
nizing the algorithms by their de¯ning characteristics. We
then compare the two main approaches to subspace cluster-
ing using empirical scalability and accuracy tests and discuss
some potential applications where subspace clustering could
be particularly useful.
@article{text.clustering.review.2004,
abstract = {Subspace clustering is an extension of traditional cluster-
ing that seeks to ¯nd clusters in di®erent subspaces within
a dataset. Often in high dimensional data, many dimen-
sions are irrelevant and can mask existing clusters in noisy
data. Feature selection removes irrelevant and redundant
dimensions by analyzing the entire dataset. Subspace clus-
tering algorithms localize the search for relevant dimensions
allowing them to ¯nd clusters that exist in multiple, possi-
bly overlapping subspaces. There are two major branches
of subspace clustering based on their search strategy. Top-
down algorithms ¯nd an initial clustering in the full set of
dimensions and evaluate the subspaces of each cluster, it-
eratively improving the results. Bottom-up approaches ¯nd
dense regions in low dimensional spaces and combine them
to form clusters. This paper presents a survey of the various
subspace clustering algorithms along with a hierarchy orga-
nizing the algorithms by their de¯ning characteristics. We
then compare the two main approaches to subspace cluster-
ing using empirical scalability and accuracy tests and discuss
some potential applications where subspace clustering could
be particularly useful.},
added-at = {2009-03-07T02:17:59.000+0100},
author = {Parsons, Lance and Haque, Ehtesham and Liu, Huan},
biburl = {https://www.bibsonomy.org/bibtex/26e630a6a5b8175aa1b5f3f647b24a684/huiyangsfsu},
interhash = {fb6aa3c035c99b66778c30520585b73f},
intrahash = {6e630a6a5b8175aa1b5f3f647b24a684},
journal = {SIGKDD Exploration },
keywords = {clustering review subspace text},
number = 1,
pages = {90-105},
timestamp = {2009-03-07T02:17:59.000+0100},
title = {Subspace Clustering for High Dimensional Data: A Review},
url = {http://www.sigkdd.org/explorations/issues/6-1-2004-06/parsons.pdf},
volume = 6,
year = 2004
}