@inproceedings{Babcock2003, title = {Maintaining variance and k-medians over data stream windows}, author = {Brian Babcock and Mayur Datar and Rajeev Motwani and Liadan O'Callaghan}, booktitle = {ACM SIGMOD-SIGACT-SIGART Symposium on Principles of Database Systems}, pages = {234--243}, url = {http://acm.org/sigmod/pods/proc03/online/131-datar.pdf}, year = {2003}, biburl = {http://www.bibsonomy.org/bibtex/2c4aaf2b4ec790359f14535d0a07f474e/marcoalvarez}, abstract = {The sliding window model is useful for discounting stale data in data stream applications. In this model, data elements arrive continually and only the most recent N elements are used when answering queries. We present a novel technique for solving two important and related problems in the sliding window model---maintaining variance and maintaining a k--median clustering. Our solution to the problem of maintaining variance provides a continually updated estimate of the variance of the last N values in a data stream with relative error of at most e using O(1/e2 log N) memory. We present a constant-factor approximation algorithm which maintains an approximate k--median solution for the last N data points using O(k/t4 N2t log2 N) memory, where t < 1/2 is a parameter which trades off the space bound with the approximation factor of O(2O(1/t)).}, keywords = {Clustering DataStream } }