As a prolific research area in data mining, subspace clustering and related problems induced a vast quantity of proposed solutions. However, many publications compare a new proposition—if at all—with one or two competitors, or even with a so-called “naïve” ad hoc solution, but fail to clarify the exact problem definition. As a consequence, even if two solutions are thoroughly compared experimentally, it will often remain unclear whether both solutions tackle the same problem or, if they do, whether they agree in certain tacit assumptions and how such assumptions may influence the outcome of an algorithm. In this survey, we try to clarify: (i) the different problem definitions related to subspace clustering in general; (ii) the specific difficulties encountered in this field of research; (iii) the varying assumptions, heuristics, and intuitions forming the basis of different approaches; and (iv) how several prominent solutions tackle different problems.
%0 Journal Article
%1 1497578
%A Kriegel, Hans-Peter
%A Kröger, Peer
%A Zimek, Arthur
%C New York, NY, USA
%D 2009
%I ACM
%J ACM Trans. Knowl. Discov. Data
%K clustering data seminar survey
%N 1
%P 1--58
%R http://doi.acm.org/10.1145/1497577.1497578
%T Clustering high-dimensional data: A survey on subspace clustering, pattern-based clustering, and correlation clustering
%U http://portal.acm.org/citation.cfm?id=1497578&dl=GUIDE&coll=GUIDE&CFID=59049143&CFTOKEN=34529635
%V 3
%X As a prolific research area in data mining, subspace clustering and related problems induced a vast quantity of proposed solutions. However, many publications compare a new proposition—if at all—with one or two competitors, or even with a so-called “naïve” ad hoc solution, but fail to clarify the exact problem definition. As a consequence, even if two solutions are thoroughly compared experimentally, it will often remain unclear whether both solutions tackle the same problem or, if they do, whether they agree in certain tacit assumptions and how such assumptions may influence the outcome of an algorithm. In this survey, we try to clarify: (i) the different problem definitions related to subspace clustering in general; (ii) the specific difficulties encountered in this field of research; (iii) the varying assumptions, heuristics, and intuitions forming the basis of different approaches; and (iv) how several prominent solutions tackle different problems.
@article{1497578,
abstract = {As a prolific research area in data mining, subspace clustering and related problems induced a vast quantity of proposed solutions. However, many publications compare a new proposition—if at all—with one or two competitors, or even with a so-called “naïve” ad hoc solution, but fail to clarify the exact problem definition. As a consequence, even if two solutions are thoroughly compared experimentally, it will often remain unclear whether both solutions tackle the same problem or, if they do, whether they agree in certain tacit assumptions and how such assumptions may influence the outcome of an algorithm. In this survey, we try to clarify: (i) the different problem definitions related to subspace clustering in general; (ii) the specific difficulties encountered in this field of research; (iii) the varying assumptions, heuristics, and intuitions forming the basis of different approaches; and (iv) how several prominent solutions tackle different problems.},
added-at = {2009-10-22T14:56:44.000+0200},
address = {New York, NY, USA},
author = {Kriegel, Hans-Peter and Kr\"{o}ger, Peer and Zimek, Arthur},
biburl = {https://www.bibsonomy.org/bibtex/234df6e1452156d7ef06b246817e8fc31/beate},
description = {Clustering high-dimensional data},
doi = {http://doi.acm.org/10.1145/1497577.1497578},
interhash = {6a15c9a3120af24695d27a9065cd8ed4},
intrahash = {34df6e1452156d7ef06b246817e8fc31},
issn = {1556-4681},
journal = {ACM Trans. Knowl. Discov. Data},
keywords = {clustering data seminar survey},
number = 1,
pages = {1--58},
publisher = {ACM},
timestamp = {2009-10-22T14:56:45.000+0200},
title = {Clustering high-dimensional data: A survey on subspace clustering, pattern-based clustering, and correlation clustering},
url = {http://portal.acm.org/citation.cfm?id=1497578&dl=GUIDE&coll=GUIDE&CFID=59049143&CFTOKEN=34529635},
volume = 3,
year = 2009
}