@book{RayPerl2002, title = {Perl and XML}, address = {Beijing [u.a.]}, author = {Erik T. Ray and Jason MacIntosh}, edition = {1. ed.}, publisher = {O'Reilly}, year = 2002, isbn = {0-596-00205-X}, description = {KUG Recherche-Portal}, biburl = {http://www.bibsonomy.org/bibtex/26816bb1d0f4a97876fd516b7e207d255/beate}, keywords = {Perl XML} } @inproceedings{song2008sensemaking, title = {CiteSense: supporting sensemaking of research literature}, address = {New York, NY, USA}, author = {Xiaolong Zhang and Yan Qu and C. Lee Giles and Piyou Song}, booktitle = {CHI '08: Proceeding of the twenty-sixth annual SIGCHI conference on Human factors in computing systems}, pages = {677--680}, publisher = {ACM}, year = 2008, url = {http://portal.acm.org/citation.cfm?id=1357054.1357161&coll=GUIDE&dl=ACM&type=series&idx=SERIES260&part=series&WantType=Proceedings&title=CHI}, location = {Florence, Italy}, isbn = {978-1-60558-011-1}, doi = {http://doi.acm.org/10.1145/1357054.1357161}, description = {CHI: CHI '08, CiteSense: supporting sensemaking of ...}, biburl = {http://www.bibsonomy.org/bibtex/2b69f467ca81c753d0192f02c574ad931/beate}, keywords = {tools citation research analysis} } @article{jason2008social, title = {Tagging and searching: Search retrieval effectiveness of folksonomies on the World Wide Web}, author = {P. Jason Morrison}, journal = {Information Processing & Management}, pages = {1397--1648}, volume = 44, year = 2008, url = {http://dx.doi.org/10.1016/j.ipm.2007.12.010}, id = {2473655}, priority = {0}, at = {2008-03-05 16:37:39}, doi = {10.1016/j.ipm.2007.12.010}, abstract = {Many Web sites have begun allowing users to submit items to a collection and tag them with keywords. The folksonomies built from these tags are an interesting topic that has seen little empirical research. This study compared the search information retrieval (IR) performance of folksonomies from social bookmarking Web sites against search engines and subject directories. Thirty-four participants created 103 queries for various information needs. Results from each IR system were collected and participants judged relevance. Folksonomy search results overlapped with those from the other systems, and documents found by both search engines and folksonomies were significantly more likely to be judged relevant than those returned by any single IR system type. The search engines in the study had the highest precision and recall, but the folksonomies fared surprisingly well. Del.icio.us was statistically indistinguishable from the directories in many cases. Overall the directories were more precise than the folksonomies but they had similar recall scores. Better query handling may enhance folksonomy IR performance further. The folksonomies studied were promising, and may be able to improve Web search performance.}, biburl = {http://www.bibsonomy.org/bibtex/262ac28c3d46df728a5d577f84d927be3/beate}, keywords = {analysis folksonomy dis ir social-search study} } @misc{fawcett04roc, title = {ROC Graphs: Notes and Practical Considerations for Researchers}, author = {T. Fawcett}, year = 2004, url = {citeseer.ist.psu.edu/fawcett04roc.html}, description = {ROC Graphs: Notes and Practical Considerations for Researchers}, biburl = {http://www.bibsonomy.org/bibtex/2819643be78dd30c066ebf6a97c5480df/beate}, keywords = {data-mining roc evaluation tutorial} } @inproceedings{jaeschke07organizing, title = {Organizing Publications and Bookmarks in BibSonomy}, address = {Banff, Canada}, author = {Robert Jäschke and Miranda Grahl and Andreas Hotho and Beate Krause and Christoph Schmitz and Gerd Stumme}, booktitle = {Workshop on Social and Collaborative Construction of Structured Knowledge (CKC 2007) at WWW 2007}, editor = {Harith Alani and Natasha Noy and Gerd Stumme and Peter Mika and York Sure and Denny Vrandecic}, year = 2007, url = {http://www2007.org/workshops/paper_25.pdf}, biburl = {http://www.bibsonomy.org/bibtex/2b3a5e9851647ca0a7dfb62f041872504/beate}, keywords = {system folksonomy 2007 myown social bookmarking} } @inproceedings{Jaeschke2008logsonomy, title = {Logsonomy — A Search Engine Folksonomy}, author = {Robert Jäschke and Beate Krause and Andreas Hotho and Gerd Stumme}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, publisher = {AAAI Press}, year = 2008, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/Krause2008logsonomy_short.pdf}, abstract = {In social bookmarking systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Search engines filter the vast information of the web. Queries describe a user’s information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. The clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. This poster analyzes the topological characteristics of the resulting tripartite hypergraph of queries, users and bookmarks of two query logs and compares it two a snapshot of the folksonomy del.icio.us.}, biburl = {http://www.bibsonomy.org/bibtex/2359e1eccdc524334d4a2ad51330f76ae/beate}, keywords = {logsonomy search analysis folksonomy myown 2008} } @inproceedings{anti2008krause, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, author = {Beate Krause and Andreas Hotho and Gerd Stumme}, booktitle = {Proc. of the Fourth International Workshop on Adversarial Information Retrieval on the Web}, year = 2008, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, biburl = {http://www.bibsonomy.org/bibtex/203d349d70b578ca9ac3155f661151868/beate}, keywords = {myown spam 2008 folksonomy} } @article{yanbe2007social, title = {Towards Improving Web Search by Utilizing Social Bookmarks}, author = {Yusuke Yanbe and Adam Jatowt and Satoshi Nakamura and Katsumi Tanaka}, journal = {Web Engineering}, pages = {343--357}, year = 2007, url = {http://dx.doi.org/10.1007/978-3-540-73597-7_28}, description = {SpringerLink - Book Chapter}, abstract = {Social bookmarking services have become recently popular in the Web. Along with the rapid increase in the amount of social bookmarks, future applications could leverage this data for enhancing search in the Web. This paper investigates the possibilityand potential benefits of a hybrid page ranking approach that would combine the ranking criteria of PageRank with the onebased on social bookmarks in order to improve the search in the Web. We demonstrate and discuss the results of analyticalstudy made in order to compare both popularity estimates. In addition, we propose a simple hybrid search method that combinesboth ranking metrics and we show some preliminary experiments using this approach. We hope that this study will shed new lighton the character of data in social bookmarking systems and foster development of new, effective search applications for theWeb.}, biburl = {http://www.bibsonomy.org/bibtex/24462cf1a0c1364eed97392196402735c/beate}, keywords = {web2.0 folksonomy social-search} } @inproceedings{zhou2008social, title = {Exploring Social Annotations for Information Retrieval}, address = {Beijing, Peking}, author = {Ding Zhou and Jiang Bian and Shuyi Zheng and Giles Lee and Hongyuan Zha}, booktitle = {Proceedings of the 17th International World Wide Web Conference}, year = 2008, biburl = {http://www.bibsonomy.org/bibtex/252c7a6a81d20a8adf3e942d7b1fefc25/beate}, keywords = {web2.0 ir folksonomy social-search annotation} } @inproceedings{jin2008networks, title = {Ranking Entities on the Web using Social Network Mining and Ranking Learning}, address = {Beijing, Peking}, author = {Yingzi Jin and Yutaka Matsuo and Mitsuru Ishizuka}, booktitle = {Workshop on Social Web Search and Mining}, year = 2008, biburl = {http://www.bibsonomy.org/bibtex/22b5a7060510192d41e0c18aa7ab4fdc7/beate}, keywords = {social-networks social-search} } @inproceedings{ntoulas2006spam, title = {Detecting spam web pages through content analysis}, address = {New York, NY, USA}, author = {Alexandros Ntoulas and Marc Najork and Mark Manasse and Dennis Fetterly}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, pages = {83--92}, publisher = {ACM}, year = 2006, url = {http://portal.acm.org/citation.cfm?id=1135794}, location = {Edinburgh, Scotland}, isbn = {1-59593-323-9}, doi = {http://doi.acm.org/10.1145/1135777.1135794}, description = {Detecting spam web pages through content analysis}, abstract = {In this paper, we continue our investigations of "web spam": the injection of artificially-created pages into the web in order to influence the results from search engines, to drive traffic to certain pages for fun or profit. This paper considers some previously-undescribed techniques for automatically detecting spam pages, examines the effectiveness of these techniques in isolation and when aggregated using classification algorithms. When combined, our heuristics correctly identify 2,037 (86.2%) of the 2,364 spam pages (13.8%) in our judged collection of 17,168 pages, while misidentifying 526 spam and non-spam pages (3.1%).}, biburl = {http://www.bibsonomy.org/bibtex/2c93f4228fd8552bede071569cdaa1ad9/beate}, keywords = {spam features web} } @inproceedings{mishne2005blogspam, title = {Blocking Blog Spam with Language Model Disagreement.}, author = {Gilad Mishne and David Carmel and Ronny Lempel}, booktitle = {AIRWeb}, crossref = {conf/airweb/2005}, pages = {1-6}, year = 2005, url = {http://dblp.uni-trier.de/db/conf/airweb/airweb2005.html#MishneCL05}, ee = {http://airweb.cse.lehigh.edu/2005/mishne.pdf}, date = {2007-09-12}, description = {dblp}, biburl = {http://www.bibsonomy.org/bibtex/2285dfdeb8409e59a0c516dc632e508cf/beate}, keywords = {spam language blog} } @inproceedings{wu2006semantic, title = {Detecting semantic cloaking on the web}, address = {New York, NY, USA}, author = {Baoning Wu and Brian D. Davison}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, pages = {819--828}, publisher = {ACM}, year = 2006, url = {http://portal.acm.org/citation.cfm?id=1135777.1135901}, location = {Edinburgh, Scotland}, isbn = {1-59593-323-9}, doi = {http://doi.acm.org/10.1145/1135777.1135901}, description = {Detecting semantic cloaking on the web}, abstract = {By supplying different versions of a web page to search engines and to browsers, a content provider attempts to cloak the real content from the view of the search engine. Semantic cloaking refers to differences in meaning between pages which have the effect of deceiving search engine ranking algorithms. In this paper, we propose an automated two-step method to detect semantic cloaking pages based on different copies of the same page downloaded by a web crawler and a web browser. The first step is a filtering step, which generates a candidate list of semantic cloaking pages. In the second step, a classifier is used to detect semantic cloaking pages from the candidates generated by the filtering step. Experiments on manually labeled data sets show that we can generate a classifier with a precision of 93% and a recall of 85%. We apply our approach to links from the dmoz Open Directory Project and estimate that more than 50,000 of these pages employ semantic cloaking.}, biburl = {http://www.bibsonomy.org/bibtex/2696889b2eeda8dd8391c9c1ec156634b/beate}, keywords = {spam web cloaking} } @inproceedings{gan2007spam, title = {Improving Web Spam Classifiers Using Link Structure (S)}, author = {Qingqing Gan and Torsten Suel}, booktitle = {AIRWeb}, crossref = {DBLP:conf/airweb/2007}, year = 2007, ee = {http://airweb.cse.lehigh.edu/2007/papers/paper_124.pdf}, bibsource = {DBLP, http://dblp.uni-trier.de}, description = {DBLP Record 'conf/airweb/GanS07'}, biburl = {http://www.bibsonomy.org/bibtex/24c12cfc5fe2fb8f5499847b025c5b4f6/beate}, keywords = {web spam} } @proceedings{DBLP:conf/airweb/2005, title = {AIRWeb 2005, First International Workshop on Adversarial Information Retrieval on the Web, co-located with the WWW conference, Chiba, Japan, May 2005}, booktitle = {AIRWeb}, year = 2005, bibsource = {DBLP, http://dblp.uni-trier.de}, biburl = {http://www.bibsonomy.org/bibtex/22a091b9f9abc9d77906e84ff2bae7af7/beate}, keywords = {imported} } @inproceedings{koutrika2007spam, title = {Combating spam in tagging systems}, address = {New York, NY, USA}, author = {Georgia Koutrika and Frans Adjie Effendi and Zolt\'{a}n Gy\"{o}ngyi and Paul Heymann and Hector Garcia-Molina}, booktitle = {AIRWeb '07: Proceedings of the 3rd international workshop on Adversarial information retrieval on the web}, pages = {57--64}, publisher = {ACM}, year = 2007, url = {http://portal.acm.org/citation.cfm?id=1244408.1244420}, location = {Banff, Alberta, Canada}, isbn = {978-1-59593-732-2}, doi = {http://doi.acm.org/10.1145/1244408.1244420}, description = {Combating spam in tagging systems}, biburl = {http://www.bibsonomy.org/bibtex/2776b76b33d469e438b0e5f74fc7ec7f0/beate}, keywords = {tagging spam social-systems} } @book{han2000dm, title = {Data Mining: Concepts and Techniques }, author = {Jiawei Han and Micheline Kamber}, howpublished = {Hardcover}, month = {September}, publisher = {Morgan Kaufmann}, year = 2000, url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&path=ASIN/1558604898}, biburl = {http://www.bibsonomy.org/bibtex/23b0fb591e098cfe8e2e7313173fbe406/beate}, keywords = {data-mining book} } @book{quinlan1993c45, title = {C4.5: programs for machine learning}, address = {San Francisco, CA, USA}, author = {J. Ross Quinlan}, publisher = {Morgan Kaufmann Publishers Inc.}, year = 1993, url = {http://portal.acm.org/citation.cfm?id=152181}, isbn = {1-55860-238-0}, description = {C4.5: programs for machine learning}, biburl = {http://www.bibsonomy.org/bibtex/2c0e213c325fbfc0d8ee914255cc4ee24/beate}, keywords = {c4.5 machine-learning} } @inproceedings{androutsopoulos2000evaluation, title = {An Evaluation of Naive Bayesian Anti-Spam Filtering}, author = {I. Androutsopoulos and J. Koutsias and K. Chandrinos and G. Paliouras and C. Spyropoulos}, booktitle = {Proc. of the workshop on Machine Learning in the New Information Age}, year = 2000, url = {citeseer.ist.psu.edu/androutsopoulos00evaluation.html}, description = {An Evaluation of Naive Bayesian Anti-Spam Filtering - Androutsopoulos, Koutsias, Chandrinos, Paliouras, Spyropoulos (ResearchIndex)}, biburl = {http://www.bibsonomy.org/bibtex/29ccd40141ba53aa810c9c5e6292da586/beate}, keywords = {survey spam bayes} } @article{heymann2007spam, title = {Fighting Spam on Social Web Sites: A Survey of Approaches and Future Challenges}, address = {Piscataway, NJ, USA}, author = {Paul Heymann and Georgia Koutrika and Hector Garcia-Molina}, journal = {IEEE Internet Computing}, number = 6, pages = {36--45}, publisher = {IEEE Educational Activities Department}, volume = 11, year = 2007, url = {http://portal.acm.org/citation.cfm?id=1304062.1304547&coll=GUIDE&dl=}, issn = {1089-7801}, doi = {http://dx.doi.org/10.1109/MIC.2007.125}, description = {Fighting Spam on Social Web Sites}, abstract = {In recent years, social Web sites have become important components of the Web. With their success, however, has come an increasing flux of spam. If left unchecked, spam threatens to undermine resource sharing, interactivity, and openness. The authors survey three categories of potential countermeasures: those based on detection, demotion, and prevention. Although many of these countermeasures have been proposed before for email and Web spam, the authors find that their applicability to social Web sites differs. How should we evaluate spam countermeasures for social Web sites, and what future challenges might we face?}, biburl = {http://www.bibsonomy.org/bibtex/23e0a8be9fb6fff102e6aed13d1db22f7/beate}, keywords = {spam social-networks survey} }