I. Davidson, S. Ravi, and M. Ester. KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining, page 240--249. New York, NY, USA, ACM, (2007)
DOI: 10.1145/1281192.1281221
Abstract
Clustering with constraints is an emerging area of data mining research. However, most work assumes that the constraints are given as one large batch. In this paper we explore the situation where the constraints are incrementally given. In this way the user after seeing a clustering can provide positive and negative feedback via constraints to critique a clustering solution. We consider the problem of efficiently updating a clustering to satisfy the new and old constraints rather than reclustering the entire data set. We show that the problem of incremental clustering under constraints is NP-hard in general, but identify several sufficient conditions which lead to efficiently solvable versions. These translate into a set of rules on the types of constraints thatcan be added and constraint set properties that must be maintained. We demonstrate that this approach is more efficient than re-clustering the entire data set and has several other advantages.
%0 Conference Paper
%1 1281221
%A Davidson, Ian
%A Ravi, S. S.
%A Ester, Martin
%B KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining
%C New York, NY, USA
%D 2007
%I ACM
%K clustering incremental
%P 240--249
%R 10.1145/1281192.1281221
%T Efficient incremental constrained clustering
%U http://portal.acm.org/citation.cfm?id=1281221
%X Clustering with constraints is an emerging area of data mining research. However, most work assumes that the constraints are given as one large batch. In this paper we explore the situation where the constraints are incrementally given. In this way the user after seeing a clustering can provide positive and negative feedback via constraints to critique a clustering solution. We consider the problem of efficiently updating a clustering to satisfy the new and old constraints rather than reclustering the entire data set. We show that the problem of incremental clustering under constraints is NP-hard in general, but identify several sufficient conditions which lead to efficiently solvable versions. These translate into a set of rules on the types of constraints thatcan be added and constraint set properties that must be maintained. We demonstrate that this approach is more efficient than re-clustering the entire data set and has several other advantages.
%@ 978-1-59593-609-7
@inproceedings{1281221,
abstract = {Clustering with constraints is an emerging area of data mining research. However, most work assumes that the constraints are given as one large batch. In this paper we explore the situation where the constraints are incrementally given. In this way the user after seeing a clustering can provide positive and negative feedback via constraints to critique a clustering solution. We consider the problem of efficiently updating a clustering to satisfy the new and old constraints rather than reclustering the entire data set. We show that the problem of incremental clustering under constraints is NP-hard in general, but identify several sufficient conditions which lead to efficiently solvable versions. These translate into a set of rules on the types of constraints thatcan be added and constraint set properties that must be maintained. We demonstrate that this approach is more efficient than re-clustering the entire data set and has several other advantages.},
added-at = {2010-05-04T15:32:02.000+0200},
address = {New York, NY, USA},
author = {Davidson, Ian and Ravi, S. S. and Ester, Martin},
biburl = {https://www.bibsonomy.org/bibtex/21fb9a9f4d40d5c42bd5dffc66d3c32c3/utahell},
booktitle = {KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining},
description = {Efficient incremental constrained clustering},
doi = {10.1145/1281192.1281221},
interhash = {041d3604de26c4650df373b135914928},
intrahash = {1fb9a9f4d40d5c42bd5dffc66d3c32c3},
isbn = {978-1-59593-609-7},
keywords = {clustering incremental},
location = {San Jose, California, USA},
pages = {240--249},
publisher = {ACM},
timestamp = {2010-05-04T15:32:02.000+0200},
title = {Efficient incremental constrained clustering},
url = {http://portal.acm.org/citation.cfm?id=1281221},
year = 2007
}