@inproceedings{mccallum00efficient, title = {Efficient clustering of high-dimensional data sets with application to reference matching}, address = {New York, NY, USA}, author = {Andrew McCallum and Kamal Nigam and Lyle H. Ungar}, booktitle = {KDD '00: Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery and data mining}, pages = {169--178}, publisher = {ACM Press}, year = 2000, location = {Boston, Massachusetts, United States}, isbn = {1-58113-233-6}, doi = {http://doi.acm.org/10.1145/347090.347123}, description = {Efficient clustering of high-dimensional data sets with application to reference matching}, biburl = {http://www.bibsonomy.org/bibtex/2346d1db87c3bda5fcf4ec5f92a75e16a/sb3000}, keywords = {dataset efficiency clustering} } @inproceedings{burrows06googlebigtable, title = {Bigtable: A Distributed Storage System for Structured Data}, author = {Fay Chang and Jeffrey Dean and Sanjay Ghemawat and Wilson C. Hsieh and Deborah A. Wallach and Mike Burrows and Tushar Chandra and Andrew Fikes and Robert E. Gruber}, booktitle = {OSDI'06: Seventh Symposium on Operating System Design and Implementation, Seattle, WA, November, 2006}, pages = {205--218}, year = 2006, url = {http://labs.google.com/papers/bigtable-osdi06.pdf}, biburl = {http://www.bibsonomy.org/bibtex/2913bfd5965ad54bb09847d66765bcc04/sb3000}, keywords = {scalability google fault-tolerance performance distributed efficiency database algorithm} }