@article{Sawyer:2008, title = {Special Section on Natural Language in Software Engineering}, author = {P. Sawyer and V. Gervasi}, booktitle = {Software, IET}, pages = {1-2}, volume = 2, year = 2008, url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?tp=&arnumber=4460889&isnumber=4460888}, issn = {1751-8814}, doi = {10.1049/iet-sen:20089034}, description = {Welcome to IEEE Xplore 2.0: Special Section on Natural Language in Software Engineering}, abstract = {Not Available}, biburl = {http://www.bibsonomy.org/bibtex/2d55ef372846dae639344c4932e252ff2/renew}, keywords = {software engineering nlp} } @article{versley2007ast, title = {{Antecedent selection techniques for high-recall coreference resolution}}, author = {Yannick Versley}, journal = {Proceedings of EMNLP/CoNLL}, year = 2007, biburl = {http://www.bibsonomy.org/bibtex/21d836f74cdadf0a84e2c5ae794de1da5/renew}, keywords = {coreference} } @inproceedings{citeulike:2348620, title = {Analyzing and Accessing Wikipedia as a Lexical Semantic Resource}, author = {Torsten Zesch and Iryna Gurevych and Max Mühlhäuser}, booktitle = {Biannual Conference of the Society for Computational Linguistics and Language Technology}, school = {Darmstadt University of Technology}, year = 2007, id = {2348620}, priority = {4}, description = {stuff from citeyoulike}, abstract = {We analyze Wikipedia as a lexical semantic resource and compare it with conventional resources, such as dictionaries, thesauri, semantic wordnets, etc. Different parts of Wikipedia reflect different aspects of these resources. We show that Wikipedia contains a vast amount of knowledge about, e.g., named entities, domain specific terms, and rare word senses. If Wikipedia is to be used as a lexical semantic resource in large-scale NLP tasks, efficient programmatic access to the knowledge therein is required. We review existing access mechanisms and show that they are limited with respect to performance and the provided access functions. Therefore, we introduce a general purpose, high performance Java-based Wikipedia API that overcomes these limitations. It is available for research purposes at http://www.ukp.tu-darmstadt.de/software/WikipediaAPI.}, biburl = {http://www.bibsonomy.org/bibtex/22d8f740fe023824a89405eaaddc4bfce/renew}, keywords = {nlp semantics wikipedia lexical} } @inproceedings{1148267, title = {You are what you say: privacy risks of public mentions}, address = {New York, NY, USA}, author = {Dan Frankowski and Dan Cosley and Shilad Sen and Loren Terveen and John Riedl}, booktitle = {SIGIR '06: Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval}, pages = {565--572}, publisher = {ACM}, year = 2006, url = {http://portal.acm.org/citation.cfm?doid=1148170.1148267}, location = {Seattle, Washington, USA}, isbn = {1-59593-369-7}, doi = {http://doi.acm.org/10.1145/1148170.1148267}, description = {You are what you say}, abstract = {In today's data-rich networked world, people express many aspects of their lives online. It is common to segregate different aspects in different places: you might write opinionated rants about movies in your blog under a pseudonym while participating in a forum or web site for scholarly discussion of medical ethics under your real name. However, it may be possible to link these separate identities, because the movies, journal articles, or authors you mention are from a sparse relation space whose properties (e.g., many items related to by only a few users) allow re-identification. This re-identification violates people's intentions to separate aspects of their life and can have negative consequences; it also may allow other privacy violations, such as obtaining a stronger identifier like name and address.This paper examines this general problem in a specific setting: re-identification of users from a public web movie forum in a private movie ratings dataset. We present three major results. First, we develop algorithms that can re-identify a large proportion of public users in a sparse relation space. Second, we evaluate whether private dataset owners can protect user privacy by hiding data; we show that this requires extensive and undesirable changes to the dataset, making it impractical. Third, we evaluate two methods for users in a public forum to protect their own privacy, suppression and misdirection. Suppression doesn't work here either. However, we show that a simple misdirection strategy works well: mention a few popular items that you haven't rated.}, biburl = {http://www.bibsonomy.org/bibtex/297d07bfe67a320c6f764ca4b361985b3/renew}, keywords = {nlp privacy} }