@inproceedings{Babcock2003,
title = {Maintaining variance and k-medians over data stream windows},
author = {Brian Babcock and Mayur Datar and Rajeev Motwani and Liadan O'Callaghan},
booktitle = {ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems},
pages = {234--243},
url = {http://acm.org/sigmod/pods/proc03/online/131-datar.pdf},
year = {2003},
abstract = {The sliding window model is useful for discounting stale data in data
stream applications. In this model, data elements arrive continually
and only the most recent N elements are used when answering queries.
We present a novel technique for solving two important and related
problems in the sliding window model---maintaining variance and maintaining
a k--median clustering. Our solution to the problem of maintaining
variance provides a continually updated estimate of the variance
of the last N values in a data stream with relative error of at most
e using O(1/e2 log N) memory. We present a constant-factor approximation
algorithm which maintains an approximate k--median solution for the
last N data points using O(k/t4 N2t log2 N) memory, where t < 1/2
is a parameter which trades off the space bound with the approximation
factor of O(2O(1/t)).},
keywords = {Clustering DataStream }
}