In this paper we study the privacy preservation properties of aspecific technique for query log anonymization: token-based hashing. In this approach, each query is tokenized, and then a secure hash function is applied to each token. We show that statistical techniques may be applied to partially compromise the anonymization. We then analyze the specific risks that arise from these partial compromises, focused on revelation of identity from unambiguous names, addresses, and so forth, and the revelation of facts associated with an identity that are deemed to be highly sensitive. Our goal in this work is two fold: to show that token-based hashing is unsuitable for anonymization, and to present a concrete analysis of specific techniques that may be effective in breaching privacy, against which other anonymization schemes should be measured.
%0 Conference Paper
%1 kumar2007anonymizing
%A Kumar, Ravi
%A Novak, Jasmine
%A Pang, Bo
%A Tomkins, Andrew
%B Proceedings of the 16th international conference on World Wide Web
%C New York, NY, USA
%D 2007
%I ACM
%K anonymisation implicit-feedback privacy query-log social-search
%P 629--638
%R 10.1145/1242572.1242657
%T On anonymizing query logs via token-based hashing
%U http://doi.acm.org/10.1145/1242572.1242657
%X In this paper we study the privacy preservation properties of aspecific technique for query log anonymization: token-based hashing. In this approach, each query is tokenized, and then a secure hash function is applied to each token. We show that statistical techniques may be applied to partially compromise the anonymization. We then analyze the specific risks that arise from these partial compromises, focused on revelation of identity from unambiguous names, addresses, and so forth, and the revelation of facts associated with an identity that are deemed to be highly sensitive. Our goal in this work is two fold: to show that token-based hashing is unsuitable for anonymization, and to present a concrete analysis of specific techniques that may be effective in breaching privacy, against which other anonymization schemes should be measured.
%@ 978-1-59593-654-7
@inproceedings{kumar2007anonymizing,
abstract = {In this paper we study the privacy preservation properties of aspecific technique for query log anonymization: token-based hashing. In this approach, each query is tokenized, and then a secure hash function is applied to each token. We show that statistical techniques may be applied to partially compromise the anonymization. We then analyze the specific risks that arise from these partial compromises, focused on revelation of identity from unambiguous names, addresses, and so forth, and the revelation of facts associated with an identity that are deemed to be highly sensitive. Our goal in this work is two fold: to show that token-based hashing is unsuitable for anonymization, and to present a concrete analysis of specific techniques that may be effective in breaching privacy, against which other anonymization schemes should be measured.},
acmid = {1242657},
added-at = {2011-07-25T15:42:25.000+0200},
address = {New York, NY, USA},
author = {Kumar, Ravi and Novak, Jasmine and Pang, Bo and Tomkins, Andrew},
biburl = {https://www.bibsonomy.org/bibtex/2b59cbfbfdb40636d68e5bfd30ec1ac95/beate},
booktitle = {Proceedings of the 16th international conference on World Wide Web},
description = {On anonymizing query logs via token-based hashing},
doi = {10.1145/1242572.1242657},
interhash = {c4673af61745108df090458d7eee693b},
intrahash = {b59cbfbfdb40636d68e5bfd30ec1ac95},
isbn = {978-1-59593-654-7},
keywords = {anonymisation implicit-feedback privacy query-log social-search},
location = {Banff, Alberta, Canada},
numpages = {10},
pages = {629--638},
publisher = {ACM},
series = {WWW '07},
timestamp = {2011-07-25T15:42:25.000+0200},
title = {On anonymizing query logs via token-based hashing},
url = {http://doi.acm.org/10.1145/1242572.1242657},
year = 2007
}