Cluster analysis is a primary method for database mining. It is either
used as a stand-alone tool to get insight into the distribution of
a data set, e.g. to focus further analysis and data processing, or
as a preprocessing step for other algorithms operating on the detected
clusters. Almost all of the well-known clustering algorithms require
input parameters which are hard to determine but have a significant
influence on the clustering result. Furthermore, for many real-data
sets there does not even exist a global parameter setting for which
the result of the clustering algorithm describes the intrinsic clustering
structure accurately. We introduce a new algorithm for the purpose
of cluster analysis which does not produce a clustering of a data
set explicitly; but instead creates an augmented ordering of the
database representing its density-based clustering structure. This
cluster-ordering contains information which is equivalent to the
density-based clusterings corresponding to a broad range of parameter
settings. It is a versatile basis for both automatic and interactive
cluster analysis. We show how to automatically and efficiently extract
not only 'traditional' clustering information (e.g. representative
points, arbitrary shaped clusters), but also the intrinsic clustering
structure. For medium sized data sets, the cluster-ordering can be
represented graphically and for very large data sets, we introduce
an appropriate visualization technique. Both are suitable for interactive
exploration of the intrinsic clustering structure offering additional
insights into the distribution and correlation of the data.
%0 Journal Article
%1 AnkEtAl99
%A Ankerst, Mihael
%A Breunig, Markus M.
%A Kriegel, Hans-Peter
%A Sander, J?rg
%C New York, NY, USA
%D 1999
%I ACM
%J ACM SIGMOD Record
%K density_clustering
%N 2
%P 49--60
%R http://doi.acm.org/10.1145/304181.304187
%T OPTICS: Ordering Points to Identify the Clustering Structure
%U http://portal.acm.org/citation.cfm?id=304187
%V 28
%X Cluster analysis is a primary method for database mining. It is either
used as a stand-alone tool to get insight into the distribution of
a data set, e.g. to focus further analysis and data processing, or
as a preprocessing step for other algorithms operating on the detected
clusters. Almost all of the well-known clustering algorithms require
input parameters which are hard to determine but have a significant
influence on the clustering result. Furthermore, for many real-data
sets there does not even exist a global parameter setting for which
the result of the clustering algorithm describes the intrinsic clustering
structure accurately. We introduce a new algorithm for the purpose
of cluster analysis which does not produce a clustering of a data
set explicitly; but instead creates an augmented ordering of the
database representing its density-based clustering structure. This
cluster-ordering contains information which is equivalent to the
density-based clusterings corresponding to a broad range of parameter
settings. It is a versatile basis for both automatic and interactive
cluster analysis. We show how to automatically and efficiently extract
not only 'traditional' clustering information (e.g. representative
points, arbitrary shaped clusters), but also the intrinsic clustering
structure. For medium sized data sets, the cluster-ordering can be
represented graphically and for very large data sets, we introduce
an appropriate visualization technique. Both are suitable for interactive
exploration of the intrinsic clustering structure offering additional
insights into the distribution and correlation of the data.
@article{AnkEtAl99,
abstract = {Cluster analysis is a primary method for database mining. It is either
used as a stand-alone tool to get insight into the distribution of
a data set, e.g. to focus further analysis and data processing, or
as a preprocessing step for other algorithms operating on the detected
clusters. Almost all of the well-known clustering algorithms require
input parameters which are hard to determine but have a significant
influence on the clustering result. Furthermore, for many real-data
sets there does not even exist a global parameter setting for which
the result of the clustering algorithm describes the intrinsic clustering
structure accurately. We introduce a new algorithm for the purpose
of cluster analysis which does not produce a clustering of a data
set explicitly; but instead creates an augmented ordering of the
database representing its density-based clustering structure. This
cluster-ordering contains information which is equivalent to the
density-based clusterings corresponding to a broad range of parameter
settings. It is a versatile basis for both automatic and interactive
cluster analysis. We show how to automatically and efficiently extract
not only 'traditional' clustering information (e.g. representative
points, arbitrary shaped clusters), but also the intrinsic clustering
structure. For medium sized data sets, the cluster-ordering can be
represented graphically and for very large data sets, we introduce
an appropriate visualization technique. Both are suitable for interactive
exploration of the intrinsic clustering structure offering additional
insights into the distribution and correlation of the data.},
added-at = {2009-02-28T21:01:39.000+0100},
address = {New York, NY, USA},
author = {Ankerst, Mihael and Breunig, Markus M. and Kriegel, Hans-Peter and Sander, J?rg},
biburl = {https://www.bibsonomy.org/bibtex/286b1a51b501c882f9a4f1cdacca3f7ed/tfalk},
doi = {http://doi.acm.org/10.1145/304181.304187},
interhash = {7417e17c0e8eec9f1a9f2bc57a476b15},
intrahash = {86b1a51b501c882f9a4f1cdacca3f7ed},
issn = {0163-5808},
journal = {ACM SIGMOD Record},
keywords = {density_clustering},
number = 2,
pages = {49--60},
publisher = {ACM},
timestamp = {2009-02-28T21:01:39.000+0100},
title = {OPTICS: Ordering Points to Identify the Clustering Structure},
url = {http://portal.acm.org/citation.cfm?id=304187},
volume = 28,
year = 1999
}