@inproceedings{Wu2008, title = {Automatically Refining the {Wikipedia} Infobox Ontology}, address = {Beijing, China}, author = {Fei Wu and Daniel S. Weld}, booktitle = {17th International World Wide Web Conference}, month = {April}, year = 2008, url = {http://www.cs.washington.edu/ai/iwp/publications/www08.pdf}, description = {suggested by Joel Nothman }, biburl = {http://www.bibsonomy.org/bibtex/205420af272aac792039aa1b18540f9b4/brightbyte}, keywords = {ontology wikipedia knowledge-extraction} } @inproceedings{Weaver2006, title = {Quantifying the accuracy of relational statements in {Wikipedia}: a methodology}, address = {New York, NY, USA}, author = {Gabriel Weaver and Barbara Strickland and Gregory Crane}, booktitle = {JCDL ‘06: Proceedings of the 6th ACM/IEEE-CS joint conference on Digital libraries}, pages = {358–358}, publisher = {ACM}, year = 2006, location = {Chapel Hill, NC, USA}, isbn = {1-59593-354-9}, doi = {http://doi.acm.org/10.1145/1141753.1141853}, description = {suggested by Joel Nothman }, biburl = {http://www.bibsonomy.org/bibtex/2f2307487990e4616af6af70ccb457968/brightbyte}, keywords = {knowledge-extraction wikipedia text-mining} } @inproceedings{Picca2007, title = {Using Wikipedia and supersense tagging for semi-automatic complex taxonomy construction}, address = {Borovets, Bulgaria}, author = {Davide Picca and Adrian Popescu}, booktitle = {RANLP 2007, CALP workshop}, year = 2007, url = {http://moromete.net/articles/picca_et_al_calp07_cr.pdf}, description = {suggested by Joel Nothman }, biburl = {http://www.bibsonomy.org/bibtex/23f9d976f3ae6ded8dc32205e1aa7b87c/brightbyte}, keywords = {taxonomy wikipedia tagging} } @inproceedings{Minier2007, title = {Wikipedia-Based Kernels for Text Categorization}, address = {Timisoara, Romania}, author = {Zsolt Minier and Zalan Bodo and Lehel Csato}, booktitle = {International Symposium on Symbolic and Numeric Algorithms for Scientific Computing}, pages = {157–164}, year = 2007, url = {http://ieeexplore.ieee.org/iel5/4438060/4438061/04438094.pdf?tp=&isnumber=&arnumber=4438094}, description = {suggested by Joel Nothman }, biburl = {http://www.bibsonomy.org/bibtex/2ee237ff2c889d7d2fbe41d52f23edd66/brightbyte}, keywords = {categorization wikipedia} } @article{voss2007tfc, title = {{Tagging, Folksonomy \& Co-Renaissance of Manual Indexing?}}, author = {J. Voss}, journal = {Arxiv preprint cs/0701072}, year = 2007, biburl = {http://www.bibsonomy.org/bibtex/2a36ce098a2d6488b0c35ec794737bf74/brightbyte}, keywords = {indexing tagging wikipedia folksonomy} } @article{desilets2005wu, title = {{Are wikis usable?}}, author = {A. Desilets and S. Paquet and N.G. Vinson}, journal = {International Symposium on Wikis: Proceedings of the 2005 international symposium on Wikis}, number = 18, pages = {3--15}, volume = 16, year = 2005, url = {http://iit-iti.nrc-cnrc.gc.ca/iit-publications-iti/docs/NRC-48272.pdf}, biburl = {http://www.bibsonomy.org/bibtex/2021c0c8356fa40d4aca7244318a2c265/brightbyte}, keywords = {usability education wikipedia} } @article{desilets2006tww, title = {{Translation the Wiki way}}, author = {A. Desilets and L. Gonzalez and S. Paquet and M. Stojanovic}, journal = {International Symposium on Wikis: Proceedings of the 2006 international symposium on Wikis}, number = 23, pages = {19--32}, volume = 21, year = 2006, url = {http://iit-iti.nrc-cnrc.gc.ca/iit-publications-iti/docs/NRC-48736.pdf}, biburl = {http://www.bibsonomy.org/bibtex/27f769ba2f167feccc0ec3d33f582661e/brightbyte}, keywords = {wikipedia translation} } @mastersthesis{kinzler2008th, title = {Automatischer Aufbau eines multilingualen Thesaurus durch Extraktion semantischer und lexikalischer Relationen aus der Wikipedia}, author = {Daniel Kinzler}, note = {also avialable at http://lips.informatik.uni-leipzig.de/pub/2008-4}, school = {Universität Leipzig}, year = 2008, url = {http://brightbyte.de/DA/WikiWord.pdf}, biburl = {http://www.bibsonomy.org/bibtex/2e945505ed598b97b38ef8efe4da04994/brightbyte}, keywords = {wikipedia thesis translation thesaurus relatedness named-entities} } @techreport{Alfaro2007atw, title = {Assigning Trust To Wikipedia Content}, address = {Santa Cruz, CA, USA}, author = {B.T. Adler and J. Benterou and K. Chatterjee and L. de Alfaro and I. Pye and V. Raman}, institution = {School of Engineering, University of California}, number = {UCSC-CRL-07-09}, year = 2007, url = {http://www.soe.ucsc.edu/~luca/papers/07/trust-techrep.pdf}, abstract = {The Wikipedia is a collaborative encyclopedia: anyone can contribute to its articles simply by clicking on an ``edit'' button. The open nature of the Wikipedia has been key to its success, but has also created a challenge: how can readers form an informed opinion on its reliability? We propose a system that computes quantitative values of trust for the text in Wikipedia articles; these trust values provide an indication of text reliability. The system uses as input the revision history of each article, as well as information about the reputation of the contributing authors, as provided by a reputation system. The trust of a word in an article is computed on the basis of the reputation of the original author of the word, as well as the reputation of all authors who edited the text in proximity of the word. The algorithm computes word trust values that vary smoothly across the text; the trust values can be visualized using varying text-background colors. The algorithm ensures that all changes to an article text are reflected in the trust values, preventing surreptitious content changes. We have implemented the proposed system, and we have used it to compute and display the trust of the text of thousands of articles of the English Wikipedia. To validate our trust-computation algorithms, we show that text labeled as low-trust has a significantly higher probability of being edited in the future than text labeled as high-trust. Anecdotal evidence seems to corroborate this validation: in practice, readers find the trust information valuable. }, biburl = {http://www.bibsonomy.org/bibtex/2f0a6f5f09f02276c6090e894cba1779b/brightbyte}, keywords = {reputation annotation quality wikipedia} } @techreport{Alfaro2008rcdr, title = {Robust Content-Driven Reputation}, address = {Santa Cruz, CA, USA}, author = {K. Chatterjee and L. de Alfaro and I. Pye}, institution = {School of Engineering, University of California}, number = {UCSC-SOE-08-09}, year = 2008, url = {http://www.soe.ucsc.edu/~luca/papers/08/ucsc-soe-08-09.pdf}, abstract = {In content-driven reputation systems for collaborative content, users gain or lose reputation according to how their contributions fare: authors of long-lived contributions gain reputation, while authors of reverted contributions lose reputation. Existing content-driven systems are prone to Sybil attacks, in which multiple identities, controlled by the same person, perform coordinated actions to increase their reputation. We show that content-driven reputation systems can be made resistent to such attacks by taking advantage of the fact that the reputation increments and decrements depend on content modifications, which are visible to all. We present an algorithm for content-driven reputation that prevents a set of identities from increasing their maximum reputation without doing any useful work. A variation of the algorithm ensures that the reputation of each identity which performs only non-useful work decreases. Here, work is considered useful if it causes content to evolve in a direction that is consistent with the actions of high-reputation users. We argue that the content modifications that require no effort, such as the insertion or deletion of arbitrary text, are invariably non-useful. We prove a truthfullness result for the resulting system, stating that users who wish to perform a contribution do not gain by employing complex contribution schemes, compared to simply performing the contribution at once. In particular, splitting the contribution in multiple portions, or employing the coordinated actions of multiple identities, do not yield additional reputation. Taken together, these results indicate that content-driven systems can be made robust with respect to Sybil attacks. }, biburl = {http://www.bibsonomy.org/bibtex/2c1c9eb9efc1aa2297bfe214b3931d876/brightbyte}, keywords = {wikipedia reputation quality annotation} } @techreport{Alfaro2008mac, title = {Measuring Author Contributions to the Wikipedia}, address = {Santa Cruz, CA, USA}, author = {B.T. Adler and L. de Alfaro and I. Pye and V. Raman}, institution = {School of Engineering, University of California}, number = {UCSC-SOE-08-08}, year = 2008, url = {http://www.soe.ucsc.edu/~luca/papers/08/ucsc-soe-08-08.pdf}, abstract = {We consider the problem of measuring user contributions to versioned, collaborative bodies of information, such as wikis. Measuring the contributions of individual authors can be used to divide revenue, to recognize merit, to award status promotions, and to choose the order of authors when citing the content. In the context of the Wikipedia, previous works on author contribution estimation have focused on two criteria: the total text created, and the total number of edits performed. We show that neither of these criteria work well: both techniques are vulnerable to manipulation, and the total-text criterion fails to reward people who polish or re-arrange the content. We consider and compare various alternative criteria that take into account the quality of a contribution, in addition to the quantity, and we analyze how the criteria differ in the way they rank authors according to their contributions. As an outcome of this study, we propose to adopt total edit longevity as a measure of author contribution. Edit longevity is resistant to simple attacks, since edits are counted towards an author's contribution only if other authors accept the contribution. Edit longevity equally rewards people who create content, and people who rearrange or polish the content. Finally, edit longevity distinguishes the people who contribute little (who have contribution close to zero) from spammers or vandals, whose contribution quickly grows negative. }, biburl = {http://www.bibsonomy.org/bibtex/2597bd830925a7ceb1e6dc6035ea4a276/brightbyte}, keywords = {reputation quality wikipedia annotation} } @mastersthesis{miles06, title = {A Theory of Retrieval Using Structured Vocabularies}, author = {Alistair Miles}, year = 2006, url = {http://isegserv.itd.rl.ac.uk/retrieval/}, id = {2295482}, priority = {3}, abstract = {A primary motivation for the development of the Semantic Web has been the need for effective information retrieval systems which may be realised through vocabulary control and the use of structured metadata. The technological framework of the Web (URI, HTTP, XML) and of the Semantic Web (RDF, OWL, SPARQL) provides a platform upon which distributed data and metadata applications may be constructed, but does not in itself provide any direct support for information retrieval applications per se. Widely applicable Semantic Web languages that extend this basic layer and provide generic support for retrieval applications, in addition to good practice guidelines and design patterns for developing such applications, are required. The ultimate purpose of this report is to develop a formal theory of retrieval using controlled vocabularies that have a simple and intuitive structure, to provide the necessary theoretical foundations for the development of Semantic Web languages and design patterns for distributed retrieval applications. The main body of this report is devoted to the articulation of such a theory. The theory is expressed formally through the use of mathematical notation, with the intention that this level of formality will provide the bridge between informal requirements specifications and the implementation of effective retrieval applications in computer systems. Specifically, a theory is developed to describe the ways in which a structured vocabulary may be used to construct an index over a collection of objects and then used to express queries which may be evaluated against an index to obtain a set of results. This theory is extended to consider ways in which both the precision and recall of retrieval strategies may be improved, through the use of expansion and ranking techniques and through “coordination”. The problem of translating between controlled vocabularies is also considered. The theory attempts to formalise, unify and extend the traditional wisdom of the library sciences regarding the use of thesauri, classification schemes, subject heading systems, taxonomies and other types of structured vocabulary, so that proven techniques and methodologies may be transferred to a Semantic Web context. The recently chartered W3C Semantic Web Deployment Working Group has been charged with the development of the Simple Knowledge Organisation System (SKOS) to W3C Recommendation status. SKOS is a Semantic Web language specifically intended to support information retrieval applications using controlled vocabularies that have a relatively simple structure. A formal requirements specification is the first planned deliverable in the standardisation of SKOS. An immediate goal of this report is to provide a level of abstraction that can be used to perform a comparative analysis of use cases involving information retrieval systems that operate with structured vocabularies, so that the requirements of these systems with respect to Semantic Web languages such as SKOS may be clearly determined. Also, this report suggests ways in which the theory may be mapped to concrete language constructs and representation patterns in Semantic Web languages. In so doing it is hoped that the development of SKOS and similar languages may be grounded with sufficient rigour to ensure their wide applicability and consistent use.}, biburl = {http://www.bibsonomy.org/bibtex/2e157ee6010babd7cfd826aee0e25914d/brightbyte}, keywords = {knowledge SKOS ontology information-retrieval vocabulary} } @inproceedings{biemann2004lim, title = {Language-independent Methods for Compiling Monolingual Lexical Data}, author = {C. Biemann and S. Bordag and G. Heyer and U. Quasthoff and C. Wolff}, booktitle = {Proceedings of CicLING}, pages = {215--228}, publisher = {Springer}, year = 2004, biburl = {http://www.bibsonomy.org/bibtex/262e7f963642107022d388debcfd72c0e/brightbyte}, keywords = {wortschatz cooccurence} } @article{craswell:esf, title = {Effective Site Finding using Link Anchor Information}, author = {N. Craswell and D. Hawking and S. Robertson}, year = 2001, url = {http://terral.lsi.uned.es/WebMining/Tema3.B%FAsqueda/craswell2001.pdf}, id = {2157311}, priority = {3}, biburl = {http://www.bibsonomy.org/bibtex/27c4a2ad4256210d164d140471a5b3154/brightbyte}, keywords = {link-lining web} } @inproceedings{Jarmasz2003, title = {Roget's thesaurus and semantic similarity}, author = {Mario Jarmasz and Stan Szpakowicz}, booktitle = {Conference on Recent Advances in Natural Language Processing}, pages = {212--219}, year = 2003, url = {http://www.site.uottawa.ca/~mjarmasz/pubs/jarmasz_roget_sim.pdf}, timestamp = {2007.05.18}, owner = {Marco}, abstract = {We have implemented a system that measures semantic similarity using a computerized 1987 Roget's Thesaurus, and evaluated it by performing a few typical tests. We compare the results of these tests with those produced by WordNet-based similarity measures. One of the benchmarks is Miller and Charles� list of 30 noun pairs to which human judges had assigned similarity measures. We correlate these measures with those computed by several NLP systems. The 30 pairs can be traced back to Rubenstein and Goodenough�s 65 pairs, which we have also studied. Our Roget�s-based system gets correlations of .878 for the smaller and .818 for the larger list of noun pairs; this is quite close to the .885 that Resnik obtained when he employed humans to replicate the Miller and Charles experiment. We further evaluate our measure by using Roget�s and WordNet to answer 80 TOEFL, 50 ESL and 300 Reader�s Digest questions: the correct synonym must be selected amongst a group of four words. Our system gets 78.75\%, 82.00\% and 74.33\% of the questions respectively.}, biburl = {http://www.bibsonomy.org/bibtex/2acde39a427ef0e7501f07e8b067a88f0/brightbyte}, keywords = {nlp thesaurus knowledge ontology} } @inbook{jarmasz2001rst, title = {{Roget’s Thesaurus as an Electronic Lexical Knowledge Base}}, author = {M. Jarmasz and S. Szpakowicz}, editor = {W. Gruszczynski and D. Kopcinska}, series = {NIE BEZ ZNACZENIA. Prace ofiarowane Profesorowi Zygmuntowi Saloniemu z okazji}, year = 2001, url = {http://www.site.uottawa.ca/~mjarmasz/pubs/TR-2000-02.pdf}, biburl = {http://www.bibsonomy.org/bibtex/205c46f0c52ab405ffdd48a2d7ec7a734/brightbyte}, keywords = {thesaurus nlp knowledge} } @book{stock2007, title = {Information Retrieval}, author = {Wolfgang G. Stock}, publisher = {Oldenbourg}, year = 2007, location = {M{\"u}, isbn = {3-486-58172-4, 978-3-486-58172-0}, language = {ger}, biburl = {http://www.bibsonomy.org/bibtex/2f36a2d208aa4bf661ac0a826f271762f/brightbyte}, keywords = {thesaurus search information-retrieval} } @inproceedings{vanmulligen2006oow, title = {{An Online Ontology: WiktionaryZ}}, author = {E.M. van Mulligen and E. M{\"o}ller and P.J. Roes and M. Weeber and G. Meijssen and C. Chichester and B. Mons}, booktitle = {Proceedings of Knowledge Representation in Medicine (KR-MED) 2006 }, year = 2006, biburl = {http://www.bibsonomy.org/bibtex/2c42fee8fe3ee95753596a75534ce0c45/brightbyte}, keywords = {hesaurus dictionary wikidata omegawiki wiki} } @book{asb2003, title = {Allgemeine Systematik für öffentliche Bibliotheken (ASB), Gliederung und Alphabetisches Schlagwortregister}, editor = {Ausschuss für Systematik beim Verband der Bibliotheken des Landes Nordrhein-Westfalen}, publisher = {Bock und Herchen}, year = 2003, biburl = {http://www.bibsonomy.org/bibtex/2d2e47233bc13c35187450136920834ae/brightbyte}, keywords = {classification standard indexing taxonomy} } @article{coulter1998ccs, title = {{Computing Classification System 1998: Current Status and Future Maintenance Report of the CCS Update Committee}}, author = {N. Coulter and J. French and E. Glinert and T. Horton and N. Mead and R. Rada and A. Ralston and C. Rodkin and B. Rous and A. Tucker and }, journal = {Computing Reviews}, pages = 1, year = 1998, biburl = {http://www.bibsonomy.org/bibtex/26d48779b2af30bb8f619d1ffe9c65409/brightbyte}, keywords = {classification taxonomy standard indexing} }