<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:burst="http://xmlns.com/burst/0.1/" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns="http://purl.org/rss/1.0/" xmlns:admin="http://webns.net/mvcb/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:swrc="http://swrc.ontoware.org/ontology#" xmlns:cc="http://web.resource.org/cc/"><channel rdf:about="http://www.bibsonomy.org/user/diego_ma/web"><title>BibSonomy publications for /user/diego_ma/web</title><link>http://www.bibsonomy.org/burst/user/diego_ma/web</link><description>BibSonomy RSS feed for /user/diego_ma/web</description><dc:date>2010-03-22T16:48:57+01:00</dc:date><items><rdf:Seq><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/22d2ef43fa11d022366e74f02c64fff47/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2c3eaffbd902a79f8f6d9d650b8b9be79/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/24db931ca8fe9d9c95bfd281d04c57a98/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/200ba496f53767b92d5965db71eeea8bf/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2e9031eff1c301c559385707d6b4d8218/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2b1a68eb0d57f9a5d31c547fe085e078b/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/20ac6c3623a4de9e4d7f00d1b7277c82f/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/23f5fcd345dcf7f048f8be4f31d476300/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2ed9551b7e709321bca9ef721bbb1f7f8/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2c522d6982d34510925f7abbccfb29e14/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2717277c9cc94071c949bd308e3140300/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2c08fc13dc5d0b352f0e2c542f2556688/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2db4a780c41c5d024c5e4aa7978c12256/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2e8a3cc2f0ae646c5b40fff78e5178c94/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/24f4f5fdc33c1a9ea9bf44140c460e04b/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2fe424bf3422feb1aefbe2ea9444a12e2/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2dce5528d84d977afeba965b58738f219/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2c056611effc0d18aae71a6d535ff6c5a/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/28669140e43a903fa1038b3105509d36a/diego_ma"/><rdf:li rdf:resource="http://www.bibsonomy.org/bibtex/2605a293303beef0c307dc8da107bcb98/diego_ma"/></rdf:Seq></items></channel><item rdf:about="http://www.bibsonomy.org/bibtex/22d2ef43fa11d022366e74f02c64fff47/diego_ma"><title>Googleology is Bad Science</title><link>http://www.bibsonomy.org/bibtex/22d2ef43fa11d022366e74f02c64fff47/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2009-07-01T05:50:30+02:00</dc:date><dc:subject>corpora googleology web web_data_extraction </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;A. &lt;a href=&#034;http://www.bibsonomy.org/author/Kilgarriff&#034;&gt;Kilgarriff&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Computational Linguistics&lt;/em&gt;&lt;em&gt;33(1):147-151&lt;/em&gt;(&lt;em&gt;2007&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/corpora"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/googleology"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web_data_extraction"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/22d2ef43fa11d022366e74f02c64fff47/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/22d2ef43fa11d022366e74f02c64fff47/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Article"/><owl:sameAs rdf:resource="http://www.kilgarriff.co.uk/Publications/2007-K-CL-Googleology.pdf"/><swrc:date>Wed Jul 01 05:50:30 CEST 2009</swrc:date><swrc:journal>Computational Linguistics</swrc:journal><swrc:number>1</swrc:number><swrc:pages>147-151</swrc:pages><swrc:title>Googleology is Bad Science</swrc:title><swrc:volume>33</swrc:volume><swrc:year>2007</swrc:year><swrc:keywords>corpora googleology web web_data_extraction </swrc:keywords><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="A. Kilgarriff"/></rdf:_1></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2c3eaffbd902a79f8f6d9d650b8b9be79/diego_ma"><title>Using Web Data for Linguistic Purposes</title><link>http://www.bibsonomy.org/bibtex/2c3eaffbd902a79f8f6d9d650b8b9be79/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2009-06-18T11:39:22+02:00</dc:date><dc:subject>corpora web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Anke &lt;a href=&#034;http://www.bibsonomy.org/author/L{\&amp;#034;udeling}&#034;&gt;L&amp;#252;deling&lt;/a&gt;  and Stefan &lt;a href=&#034;http://www.bibsonomy.org/author/Evert&#034;&gt;Evert&lt;/a&gt;  and Baroni, &lt;a href=&#034;http://www.bibsonomy.org/author/Marco&#034;&gt;Marco&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Corpus Linguistics and the Web, &lt;/em&gt;&lt;em&gt;Rodopi, &lt;/em&gt;&lt;em&gt;Amsterdam, &lt;/em&gt;(&lt;em&gt;2007&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/corpora"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2c3eaffbd902a79f8f6d9d650b8b9be79/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2c3eaffbd902a79f8f6d9d650b8b9be79/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InCollection"/><owl:sameAs rdf:resource="http://clic.cimec.unitn.it/marco/research.html"/><swrc:date>Thu Jun 18 11:39:22 CEST 2009</swrc:date><swrc:address>Amsterdam</swrc:address><swrc:booktitle>Corpus Linguistics and the {Web}</swrc:booktitle><swrc:pages>7-24</swrc:pages><swrc:publisher><swrc:Organization swrc:name="Rodopi"/></swrc:publisher><swrc:title>Using Web Data for Linguistic Purposes</swrc:title><swrc:year>2007</swrc:year><swrc:keywords>corpora web </swrc:keywords><swrc:abstract>The  world  wide  web  is  a  mine  of  language  data  of  unprecedented  richness  and  ease  of access  (Kilgarriff  and  Grefenstette  2003).  A  growing  body  of  studies  has  shown  that simple algorithms using web-based evidence are successful at many linguistic tasks, often outperforming sophisticated methods based on smaller but more controlled data sources (cf. Turney 2001; Keller and Lapata 2003).   Most current internet-based linguistic studies access the web through a commercial search engine. For example, some researchers rely on frequency estimates (number of hits) reported by engines (e.g. Turney 2001). Others use a search engine to find relevant pages,  and  then  retrieve  the  pages  to  build  a  corpus  (e.g.  Ghani  and  Mladenic  2001; Baroni and Bernardini 2004).   In  this  study,  we  first  survey  the  state  of  the  art,  discussing  the  advantages  and limits of various approaches, and in particular the inherent limitations of depending on a commercial search engine as a data source. We then focus on what we believe to be some of  the  core  issues  of  using  the  web  to  do  linguistics.  Some  of  these  issues  concern  the quality and nature of data we can obtain from the internet (What languages, genres and styles  are  represented  on  the  web?),  others  pertain  to  data  extraction,  encoding  and preservation  (How  can  we  ensure  data  stability?  How  can  web  data  be  marked  up  and categorized? How can we identify duplicate pages and near duplicates?), and others yet concern  quantitative  aspects  (Which  statistical quantities  can  be  reliably  estimated  from web data, and how much web data do we need? What are the possible pitfalls due to the massive presence of duplicates, mixed-language pages?). All points are illustrated through concrete examples from English, German and Italian web corpora.</swrc:abstract><swrc:hasExtraField><swrc:Field swrc:value="Bibsonomy (June 2009)" swrc:key="library"/></swrc:hasExtraField><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Anke L{\&#034;udeling}"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Stefan Evert"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Baroni, Marco"/></rdf:_3></rdf:Seq></swrc:author><swrc:editor><rdf:Seq><rdf:_1><swrc:Person swrc:name="Marianne Hundt"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Nadjia Nesselhauf"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Caroline Biewer"/></rdf:_3></rdf:Seq></swrc:editor></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/24db931ca8fe9d9c95bfd281d04c57a98/diego_ma"><title>RDF Storage and Retrieval Systems</title><link>http://www.bibsonomy.org/bibtex/24db931ca8fe9d9c95bfd281d04c57a98/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2008-09-02T03:52:42+02:00</dc:date><dc:subject>semantic_web inf_retrieval RDF web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Alice &lt;a href=&#034;http://www.bibsonomy.org/author/Hertel&#034;&gt;Hertel&lt;/a&gt;  and Jeen &lt;a href=&#034;http://www.bibsonomy.org/author/Broekstra&#034;&gt;Broekstra&lt;/a&gt;  and Heiner &lt;a href=&#034;http://www.bibsonomy.org/author/Stuckenschmidt&#034;&gt;Stuckenschmidt&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;On-line, &lt;/em&gt;(&lt;em&gt;2008&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/semantic_web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/inf_retrieval"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/RDF"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/24db931ca8fe9d9c95bfd281d04c57a98/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/24db931ca8fe9d9c95bfd281d04c57a98/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Misc"/><owl:sameAs rdf:resource="http://ki.informatik.uni-mannheim.de/fileadmin/publication/Hertel08RDFStorage.pdf"/><swrc:date>Tue Sep 02 03:52:42 CEST 2008</swrc:date><swrc:howpublished>On-line</swrc:howpublished><swrc:title>{RDF} Storage and Retrieval Systems</swrc:title><swrc:year>2008</swrc:year><swrc:keywords>semantic_web inf_retrieval RDF web </swrc:keywords><swrc:abstract>Ontologies are often used to improve data access. For this purpose, existing data has to be linked to an ontology and appropriate access mechanisms have to be provided. In this chapter, we review RDF storage and retrieval technologies as a common approach for accessing ontology-based data. We discuss different storage models, typical functionalities of RDF middleware such as data model support and reasoning capabilities and RDF query languages with a special focus on SPARQL as an emerging standard. We also discuss some trends such as support for expressive ontology and rule languages.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Alice Hertel"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Jeen Broekstra"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Heiner Stuckenschmidt"/></rdf:_3></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/200ba496f53767b92d5965db71eeea8bf/diego_ma"><title>The Google Similarity Distance</title><link>http://www.bibsonomy.org/bibtex/200ba496f53767b92d5965db71eeea8bf/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2008-02-14T07:15:55+01:00</dc:date><dc:subject>semantic_closeness web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Rudi &lt;a href=&#034;http://www.bibsonomy.org/author/Cilibrasi&#034;&gt;Cilibrasi&lt;/a&gt;  and Paul M. B. &lt;a href=&#034;http://www.bibsonomy.org/author/Vitanyi&#034;&gt;Vitanyi&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;IEEE Transactions on Knowledge and Data Engineering&lt;/em&gt;(&lt;em&gt;2007&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/semantic_closeness"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/200ba496f53767b92d5965db71eeea8bf/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/200ba496f53767b92d5965db71eeea8bf/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Article"/><owl:sameAs rdf:resource="http://www.citebase.org/abstract?id=oai:arXiv.org:cs/0412098"/><swrc:date>Thu Feb 14 07:15:55 CET 2008</swrc:date><swrc:journal>IEEE Transactions on Knowledge and Data Engineering</swrc:journal><swrc:pages>370</swrc:pages><swrc:title>The Google Similarity Distance</swrc:title><swrc:volume>19</swrc:volume><swrc:year>2007</swrc:year><swrc:keywords>semantic_closeness web </swrc:keywords><swrc:abstract> Words and phrases acquire meaning from the way they are used in society, from their relative semantics to other words and phrases. For computers the equivalent of `society&#039; is `database,&#039; and the equivalent of `use&#039; is `way to search the database.&#039; We present a new theory of similarity between words and phrases based on information distance and Kolmogorov complexity. To fix thoughts we use the world-wide-web as database, and Google as search engine. The method is also applicable to other search engines and databases. This theory is then applied to construct a method to automatically extract similarity, the Google similarity distance, of words and phrases from the world-wide-web using Google page counts. The world-wide-web is the largest database on earth, and the context information entered by millions of independent users averages out to provide automatic semantics of useful quality. We give applications in hierarchical clustering, classification, and language translation. We give examples to distinguish between colors and numbers, cluster names of paintings by 17th century Dutch masters and names of books by English novelists, the ability to understand emergencies, and primes, and we demonstrate the ability to do a simple automatic English-Spanish translation. Finally, we use the WordNet database as an objective baseline against which to judge the performance of our method. We conduct a massive randomized trial in binary classification using support vector machines to learn categories based on our Google distance, resulting in an a mean agreement of 87% with the expert crafted WordNet categories.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Rudi Cilibrasi"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Paul M. B. Vitanyi"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2e9031eff1c301c559385707d6b4d8218/diego_ma"><title>A Web-based Question Answering System</title><link>http://www.bibsonomy.org/bibtex/2e9031eff1c301c559385707d6b4d8218/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:48:41+01:00</dc:date><dc:subject>web question_answering </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Dell &lt;a href=&#034;http://www.bibsonomy.org/author/Zhang&#034;&gt;Zhang&lt;/a&gt;  and Wee Sun &lt;a href=&#034;http://www.bibsonomy.org/author/Lee&#034;&gt;Lee&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Proc. SMA Annual Symposium 2003, &lt;/em&gt;&lt;em&gt;Singapore, &lt;/em&gt;(&lt;em&gt;2003&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/question_answering"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2e9031eff1c301c559385707d6b4d8218/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2e9031eff1c301c559385707d6b4d8218/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="http://www.comp.nus.edu.sg/~smadellz/publications/publications.html"/><swrc:date>Fri Dec 14 02:48:41 CET 2007</swrc:date><swrc:address>Singapore</swrc:address><swrc:booktitle>Proc. SMA Annual Symposium 2003</swrc:booktitle><swrc:title>A Web-based Question Answering System</swrc:title><swrc:year>2003</swrc:year><swrc:keywords>web question_answering </swrc:keywords><swrc:abstract>The Web is apparently an ideal source of answers to a large variety of questions, due to the tremendous amount of information available online. This paper describes a Web-based question answering system LAMP, which is publicly accessible. A particular characteristic of this system is that it only takes advantage of the snippets in the search results returned by a search engine like Google. We think such ``snippet-tolerant&#039;&#039; property is important for an online question answering system to be practical, because it is time-consuming to download and analyze the original web documents. The performance of LAMP is comparable to the best state-of-the-art question answering systems.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Dell Zhang"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Wee Sun Lee"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2b1a68eb0d57f9a5d31c547fe085e078b/diego_ma"><title>wEBMT: Developing and Validating an Example-Based Machine Translation System Using the World Wide Web</title><link>http://www.bibsonomy.org/bibtex/2b1a68eb0d57f9a5d31c547fe085e078b/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:48:00+01:00</dc:date><dc:subject>machine_translation web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Andy &lt;a href=&#034;http://www.bibsonomy.org/author/Way&#034;&gt;Way&lt;/a&gt;  and Nano &lt;a href=&#034;http://www.bibsonomy.org/author/Gough&#034;&gt;Gough&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Computational Linguistics&lt;/em&gt;&lt;em&gt;29(3):421-457&lt;/em&gt;(&lt;em&gt;2003&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/machine_translation"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2b1a68eb0d57f9a5d31c547fe085e078b/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2b1a68eb0d57f9a5d31c547fe085e078b/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Article"/><swrc:date>Fri Dec 14 02:48:00 CET 2007</swrc:date><swrc:journal>Computational Linguistics</swrc:journal><swrc:number>3</swrc:number><swrc:pages>421-457</swrc:pages><swrc:title>\emph{wEBMT}: Developing and Validating an Example-Based Machine Translation System Using the World Wide Web</swrc:title><swrc:volume>29</swrc:volume><swrc:year>2003</swrc:year><swrc:keywords>machine_translation web </swrc:keywords><swrc:abstract>We have developed an example-based machine translation (EBMT) system that uses the World Wide Web for two different purposes: First, we populate the system&#039;s memory with translations gathered from rule-based MT systems located on the Web. The source strings input to these systems were extracted automatically from an extremely small subset of the rule types in the Penn-II Treebank. In subsequent stages, the &lt;source,target&gt; translation pairs obtained are automatically transformed into a series of resources that render the translation more successful...</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Andy Way"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Nano Gough"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/20ac6c3623a4de9e4d7f00d1b7277c82f/diego_ma"><title>Scaling Web-based Acquisition of Entailment Relations</title><link>http://www.bibsonomy.org/bibtex/20ac6c3623a4de9e4d7f00d1b7277c82f/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:47:13+01:00</dc:date><dc:subject>entailment web resources </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Idan &lt;a href=&#034;http://www.bibsonomy.org/author/Szpektor&#034;&gt;Szpektor&lt;/a&gt;  and Hristo &lt;a href=&#034;http://www.bibsonomy.org/author/Tanev&#034;&gt;Tanev&lt;/a&gt;  and Ido &lt;a href=&#034;http://www.bibsonomy.org/author/Dagan&#034;&gt;Dagan&lt;/a&gt;  and Bonaventura &lt;a href=&#034;http://www.bibsonomy.org/author/Coppola&#034;&gt;Coppola&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Proc. Empirical Methods in Natural Language Processing EMNLP, &lt;/em&gt;&lt;em&gt;Barcelona, &lt;/em&gt;(&lt;em&gt;2004&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/entailment"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/resources"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/20ac6c3623a4de9e4d7f00d1b7277c82f/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/20ac6c3623a4de9e4d7f00d1b7277c82f/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="http://eprints.pascal-network.org/archive/00000797/"/><swrc:date>Fri Dec 14 02:47:13 CET 2007</swrc:date><swrc:address>Barcelona</swrc:address><swrc:booktitle>Proc. Empirical Methods in Natural Language Processing (EMNLP)</swrc:booktitle><swrc:title>Scaling Web-based Acquisition of Entailment Relations</swrc:title><swrc:year>2004</swrc:year><swrc:keywords>entailment web resources </swrc:keywords><swrc:abstract>Paraphrase recognition is a critical step for natural language interpretation. Accordingly, many NLP applications would benefit from high coverage knowledge bases of paraphrases. However, the scalability of state-of-the-art paraphrase acquisition approaches is still limited. We present a fully unsupervised learning algorithm for Web-based extraction of entailment relations, an extended model of paraphrases. We focus on increased scalability and generality with respect to prior work, eventually aiming at a full scale knowledge base. Our current implementation of the algorithm takes as its input a verb lexicon and for each verb searches the Web for related syntactic entailment templates. Experiments show promising results with respect to the ultimate goal, achieving much better scalability than prior Web-based methods.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Idan Szpektor"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Hristo Tanev"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Ido Dagan"/></rdf:_3><rdf:_4><swrc:Person swrc:name="Bonaventura Coppola"/></rdf:_4></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/23f5fcd345dcf7f048f8be4f31d476300/diego_ma"><title>The Intelligent Surfer: Probabilistic Combination of Link and Content Information in PageRank</title><link>http://www.bibsonomy.org/bibtex/23f5fcd345dcf7f048f8be4f31d476300/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:45:44+01:00</dc:date><dc:subject>inf_retrieval web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Matthew &lt;a href=&#034;http://www.bibsonomy.org/author/Richardson&#034;&gt;Richardson&lt;/a&gt;  and Pedro &lt;a href=&#034;http://www.bibsonomy.org/author/Domingos&#034;&gt;Domingos&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Advances in Neural Information Processing Systems&lt;/em&gt;(&lt;em&gt;2002&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/inf_retrieval"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/23f5fcd345dcf7f048f8be4f31d476300/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/23f5fcd345dcf7f048f8be4f31d476300/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Article"/><swrc:date>Fri Dec 14 02:45:44 CET 2007</swrc:date><swrc:journal>Advances in Neural Information Processing Systems</swrc:journal><swrc:pages>1441-1448</swrc:pages><swrc:title>The Intelligent Surfer: Probabilistic Combination of Link and Content Information in PageRank</swrc:title><swrc:volume>14</swrc:volume><swrc:year>2002</swrc:year><swrc:keywords>inf_retrieval web </swrc:keywords><swrc:abstract>The PageRank algorithm, used in the Google search engine, greatly improves the results of Web search by taking into account the link structure of the Web. PageRank assigns to a page a score proportional to the number of times a random surfer would visit that page, if it surfed indefinitely from page to page, following all outlinks from a page with equal probability. We propose to improve PageRank by using a more intelligent surfer, one that is guided by a probabilistic model of the relevance of a page to a query. Efficient execution of our algorithm at query time is made possible by precomputing at crawl time (and thus once for all queries) the necessary terms. Experiments on two large subsets of the Web indicate that our algorithm significantly outperforms PageRank in the (human-rated) quality of the pages returned, while remaining efficient enough to be used in today’s large search engines.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Matthew Richardson"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Pedro Domingos"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2ed9551b7e709321bca9ef721bbb1f7f8/diego_ma"><title>The Web as a Parallel Corpus</title><link>http://www.bibsonomy.org/bibtex/2ed9551b7e709321bca9ef721bbb1f7f8/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:45:40+01:00</dc:date><dc:subject>web machine_translation </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Philip &lt;a href=&#034;http://www.bibsonomy.org/author/Resnik&#034;&gt;Resnik&lt;/a&gt;  and Noah A. &lt;a href=&#034;http://www.bibsonomy.org/author/Smith&#034;&gt;Smith&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Computational Linguistics&lt;/em&gt;&lt;em&gt;29(3):349-380&lt;/em&gt;(&lt;em&gt;2003&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/machine_translation"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2ed9551b7e709321bca9ef721bbb1f7f8/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2ed9551b7e709321bca9ef721bbb1f7f8/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Article"/><swrc:date>Fri Dec 14 02:45:40 CET 2007</swrc:date><swrc:journal>Computational Linguistics</swrc:journal><swrc:number>3</swrc:number><swrc:pages>349-380</swrc:pages><swrc:title>The Web as a Parallel Corpus</swrc:title><swrc:volume>29</swrc:volume><swrc:year>2003</swrc:year><swrc:keywords>web machine_translation </swrc:keywords><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Philip Resnik"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Noah A. Smith"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2c522d6982d34510925f7abbccfb29e14/diego_ma"><title>HT06, tagging paper, taxonomy, Flickr, academic article, to read</title><link>http://www.bibsonomy.org/bibtex/2c522d6982d34510925f7abbccfb29e14/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:43:00+01:00</dc:date><dc:subject>web ontology folksonomy </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Cameron &lt;a href=&#034;http://www.bibsonomy.org/author/Marlow&#034;&gt;Marlow&lt;/a&gt;  and Mor &lt;a href=&#034;http://www.bibsonomy.org/author/Naaman&#034;&gt;Naaman&lt;/a&gt;  and Danah &lt;a href=&#034;http://www.bibsonomy.org/author/Boyd&#034;&gt;Boyd&lt;/a&gt;  and Marc &lt;a href=&#034;http://www.bibsonomy.org/author/Davis&#034;&gt;Davis&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;HYPERTEXT &#039;06: Proceedings of the seventeenth conference on Hypertext and hypermedia, &lt;/em&gt;&lt;em&gt;page 31--40. &lt;/em&gt;&lt;em&gt;New York, NY, USA, &lt;/em&gt;&lt;em&gt;ACM Press, &lt;/em&gt;(&lt;em&gt;2006&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/ontology"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/folksonomy"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2c522d6982d34510925f7abbccfb29e14/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2c522d6982d34510925f7abbccfb29e14/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="http://portal.acm.org/citation.cfm?id=1149949"/><swrc:date>Fri Dec 14 02:43:00 CET 2007</swrc:date><swrc:address>New York, NY, USA</swrc:address><swrc:booktitle>HYPERTEXT &#039;06: Proceedings of the seventeenth conference on Hypertext and hypermedia</swrc:booktitle><swrc:pages>31--40</swrc:pages><swrc:publisher><swrc:Organization swrc:name="ACM Press"/></swrc:publisher><swrc:title>HT06, tagging paper, taxonomy, Flickr, academic article, to read</swrc:title><swrc:year>2006</swrc:year><swrc:keywords>web ontology folksonomy </swrc:keywords><swrc:abstract>In recent years, tagging systems have become increasingly popular. These systems enable users to add keywords (i.e., &#034;tags&#034;) to Internet resources (e.g., web pages, images, videos) without relying on a controlled vocabulary. Tagging systems have the potential to improve search, spam detection, reputation systems, and personal organization while introducing new modalities of social communication and opportunities for data mining. This potential is largely due to the social structure that underlies many of the current systems.Despite the rapid expansion of applications that support tagging of resources, tagging systems are still not well studied or understood. In this paper, we provide a short description of the academic related work to date. We offer a model of tagging systems, specifically in the context of web-based systems, to help us illustrate the possible benefits of these tools. Since many such systems already exist, we provide a taxonomy of tagging systems to help inform their analysis and design, and thus enable researchers to frame and compare evidence for the sustainability of such systems. We also provide a simple taxonomy of incentives and contribution models to inform potential evaluative frameworks. While this work does not present comprehensive empirical results, we present a preliminary study of the photo-sharing and tagging system Flickr to demonstrate our model and explore some of the issues in one sample system. This analysis helps us outline and motivate possible future directions of research in tagging systems.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Cameron Marlow"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Mor Naaman"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Danah Boyd"/></rdf:_3><rdf:_4><swrc:Person swrc:name="Marc Davis"/></rdf:_4></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2717277c9cc94071c949bd308e3140300/diego_ma"><title>Using the Web for Nominal Anaphora Resolution</title><link>http://www.bibsonomy.org/bibtex/2717277c9cc94071c949bd308e3140300/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:42:59+01:00</dc:date><dc:subject>anaphora web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Katja &lt;a href=&#034;http://www.bibsonomy.org/author/Markert&#034;&gt;Markert&lt;/a&gt;  and Malvina &lt;a href=&#034;http://www.bibsonomy.org/author/Nissim&#034;&gt;Nissim&lt;/a&gt;  and Natalia N. &lt;a href=&#034;http://www.bibsonomy.org/author/Modjeska&#034;&gt;Modjeska&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Proc. EACL Workshop on the Computational Treatment of Anaphora, &lt;/em&gt;&lt;em&gt;Budapest, Hungary, &lt;/em&gt;(&lt;em&gt;2003&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/anaphora"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2717277c9cc94071c949bd308e3140300/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2717277c9cc94071c949bd308e3140300/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="http://www.comp.leeds.ac.uk/markert/Papers/"/><swrc:date>Fri Dec 14 02:42:59 CET 2007</swrc:date><swrc:address>Budapest, Hungary</swrc:address><swrc:booktitle>Proc. EACL Workshop on the Computational Treatment of Anaphora</swrc:booktitle><swrc:title>Using the Web for Nominal Anaphora Resolution</swrc:title><swrc:year>2003</swrc:year><swrc:keywords>anaphora web </swrc:keywords><swrc:abstract>We present a novel method for resolving non-pronominal anaphora. Instead of using handcrafted lexical resources, we search the Web with shallow patterns which can be predetermined for the type of anaphoric phenomenon. In experiments for other-anaphora and bridging, our shallow, almost knowledge-free and unsupervised method achieves state-ofthe-art results.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Katja Markert"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Malvina Nissim"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Natalia N. Modjeska"/></rdf:_3></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2c08fc13dc5d0b352f0e2c542f2556688/diego_ma"><title>Is is the Right Answer? Exploiting web redundancy for answer validation</title><link>http://www.bibsonomy.org/bibtex/2c08fc13dc5d0b352f0e2c542f2556688/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:42:46+01:00</dc:date><dc:subject>web question_answering </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Bernardo &lt;a href=&#034;http://www.bibsonomy.org/author/Magnini&#034;&gt;Magnini&lt;/a&gt;  and Matteo &lt;a href=&#034;http://www.bibsonomy.org/author/Negri&#034;&gt;Negri&lt;/a&gt;  and Roberto &lt;a href=&#034;http://www.bibsonomy.org/author/Prevete&#034;&gt;Prevete&lt;/a&gt;  and Hristo &lt;a href=&#034;http://www.bibsonomy.org/author/Tanev&#034;&gt;Tanev&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Proceedings ACL 2002, &lt;/em&gt;&lt;em&gt;page 425-432. &lt;/em&gt;(&lt;em&gt;2002&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/question_answering"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2c08fc13dc5d0b352f0e2c542f2556688/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2c08fc13dc5d0b352f0e2c542f2556688/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="http://www.aclweb.org/anthology/P02-1054.pdf"/><swrc:date>Fri Dec 14 02:42:46 CET 2007</swrc:date><swrc:booktitle>Proceedings ACL 2002</swrc:booktitle><swrc:pages>425-432</swrc:pages><swrc:title>Is is the Right Answer? Exploiting web redundancy for answer validation</swrc:title><swrc:year>2002</swrc:year><swrc:keywords>web question_answering </swrc:keywords><swrc:abstract>Answer Validation is an emerging topic in Question Answering, where open domain systems are often required to rank huge amounts of candidate answers. We present a novel approach to answer validation based on the intuition that the amount of implicit knowledge which connects an answer to a question can be quantitatively estimated by exploiting the redundancy of Web information. Experiments carried out on the TREC-2001 judged-answer collection show that the approach achieves a high level of performance (i.e. 81% success rate). The simplicity and the efficiency of this approach make it suitable to be used as a module in Question Answering systems.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Bernardo Magnini"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Matteo Negri"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Roberto Prevete"/></rdf:_3><rdf:_4><swrc:Person swrc:name="Hristo Tanev"/></rdf:_4></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2db4a780c41c5d024c5e4aa7978c12256/diego_ma"><title>Intelligent Internet Systems</title><link>http://www.bibsonomy.org/bibtex/2db4a780c41c5d024c5e4aa7978c12256/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:42:21+01:00</dc:date><dc:subject>web AI </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Alon Y. &lt;a href=&#034;http://www.bibsonomy.org/author/Levy&#034;&gt;Levy&lt;/a&gt;  and Daniel S. &lt;a href=&#034;http://www.bibsonomy.org/author/Weld&#034;&gt;Weld&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Artificial Intelligence&lt;/em&gt;(&lt;em&gt;2000&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/AI"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2db4a780c41c5d024c5e4aa7978c12256/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2db4a780c41c5d024c5e4aa7978c12256/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Article"/><owl:sameAs rdf:resource="http://citeseer.nj.nec.com/levy00intelligent.html"/><swrc:date>Fri Dec 14 02:42:21 CET 2007</swrc:date><swrc:journal>Artificial Intelligence</swrc:journal><swrc:title>Intelligent Internet Systems</swrc:title><swrc:year>2000</swrc:year><swrc:keywords>web AI </swrc:keywords><swrc:abstract> The astonishing growth of the Internet is the first sign that every aspect of our economy and society are likely to change. Yet for people to realize the vast promise of networked computing, Internet applications must become dramatically more powerful and easier to use. Artificial Intelligence (AI) technology holds the key to these futuristic applications with the promise of advanced features, adaptive functionality and intuitive interfaces. We group Internet applications into four categories: 1) user modeling, 2) discovery and analysis of remote information sources, 3) information integration, and 4) web-site management. The seven papers in this special issue represent some of the latest and most exciting research in three of the four categories. This introduction attempts to place the special-issue papers in context, but we caution readers that the field is too young and moving too quickly for a comprehensive survey article.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Alon Y. Levy"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Daniel S. Weld"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2e8a3cc2f0ae646c5b40fff78e5178c94/diego_ma"><title>Scaling Question Answering to the Web</title><link>http://www.bibsonomy.org/bibtex/2e8a3cc2f0ae646c5b40fff78e5178c94/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:41:51+01:00</dc:date><dc:subject>question_answering web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Cody C.T &lt;a href=&#034;http://www.bibsonomy.org/author/Kwok&#034;&gt;Kwok&lt;/a&gt;  and Oren &lt;a href=&#034;http://www.bibsonomy.org/author/Etzioni&#034;&gt;Etzioni&lt;/a&gt;  and Daniel S. &lt;a href=&#034;http://www.bibsonomy.org/author/Weld&#034;&gt;Weld&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Proc. WWW10, &lt;/em&gt;&lt;em&gt;page 150-161. &lt;/em&gt;(&lt;em&gt;2001&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/question_answering"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2e8a3cc2f0ae646c5b40fff78e5178c94/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2e8a3cc2f0ae646c5b40fff78e5178c94/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="http://www.cs.washington.edu/homes/ctkwok/"/><swrc:date>Fri Dec 14 02:41:51 CET 2007</swrc:date><swrc:booktitle>Proc. WWW10</swrc:booktitle><swrc:pages>150-161</swrc:pages><swrc:title>Scaling Question Answering to the Web</swrc:title><swrc:year>2001</swrc:year><swrc:keywords>question_answering web </swrc:keywords><swrc:abstract>The wealth of information on the web makes it an attractive resource for seeking quick answers to simple, factual questions such as ``who was the first American in space?&#039;&#039; or ``what is the second tallest mountain in the world?&#039;&#039; Yet today&#039;s most advanced web search services (e.g., Google and AskJeeves) make it surprisingly tedious to locate answers to such questions. In this paper, we extend question-answering techniques, first studied in the information retrieval literature, to the web and experimentally evaluate their performance. First we introduce MULDER, which we believe to be the first general-purpose, fully-automated question-answering system available on the web. Second, we describe MULDER&#039;s architecture, which relies on multiple search-engine queries, natural-language parsing, and a novel voting procedure to yield reliable answers coupled with high recall. Finally, we compare MULDER&#039;s performance to that of Google and AskJeeves on questions drawn from the TREC-8 question track. We find that MULDER&#039;s recall is more than a factor of three higher than that of AskJeeves. In addition, we find that Google requires 6.6 times as much user effort to achieve the same level of recall as MULDER.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Cody C.T Kwok"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Oren Etzioni"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Daniel S. Weld"/></rdf:_3></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/24f4f5fdc33c1a9ea9bf44140c460e04b/diego_ma"><title>Information Retrieval on the Web</title><link>http://www.bibsonomy.org/bibtex/24f4f5fdc33c1a9ea9bf44140c460e04b/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:41:40+01:00</dc:date><dc:subject>inf_retrieval web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Mei &lt;a href=&#034;http://www.bibsonomy.org/author/Kobayashi&#034;&gt;Kobayashi&lt;/a&gt;  and Koichi &lt;a href=&#034;http://www.bibsonomy.org/author/Takeda&#034;&gt;Takeda&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;ACM Computing Surveys&lt;/em&gt;&lt;em&gt;32(2):144-173&lt;/em&gt;(&lt;em&gt;2000&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/inf_retrieval"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/24f4f5fdc33c1a9ea9bf44140c460e04b/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/24f4f5fdc33c1a9ea9bf44140c460e04b/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Article"/><owl:sameAs rdf:resource="http://portal.acm.org/citation.cfm?id=358934"/><swrc:date>Fri Dec 14 02:41:40 CET 2007</swrc:date><swrc:journal>ACM Computing Surveys</swrc:journal><swrc:number>2</swrc:number><swrc:pages>144-173</swrc:pages><swrc:title>Information Retrieval on the Web</swrc:title><swrc:volume>32</swrc:volume><swrc:year>2000</swrc:year><swrc:keywords>inf_retrieval web </swrc:keywords><swrc:abstract>In this paper we review studies of the growth of the Internet and technologies that are useful for information search and retrieval on the Web. We present data on the Internet from several different sources, e.g., current as well as projected number of users, hosts, and Web sites. Although numerical figures vary, overall trends cited by the sources are consistent and point to exponential growth in the past and in the coming decade. Hence it is not surprising that about 85% of Internet users surveyed claim using search engines and search services to find specific information. The same surveys show, however, that users are not satisfied with the performance of the current generation of search engines; the slow retrieval speed, communication delays, and poor quality of retrieved results (e.g., noise and broken links) are commonly cited problems. We discuss the development of new techniques targeted to resolve some of the problems associated with Web-based information retrieval and speculate on future trends.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Mei Kobayashi"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Koichi Takeda"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2fe424bf3422feb1aefbe2ea9444a12e2/diego_ma"><title>Introduction to the Special Issue on the Web as Corpus</title><link>http://www.bibsonomy.org/bibtex/2fe424bf3422feb1aefbe2ea9444a12e2/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:41:32+01:00</dc:date><dc:subject>resources web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Adam &lt;a href=&#034;http://www.bibsonomy.org/author/Kilgarriff&#034;&gt;Kilgarriff&lt;/a&gt;  and Gregory &lt;a href=&#034;http://www.bibsonomy.org/author/Grefenstette&#034;&gt;Grefenstette&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Computational Linguistics&lt;/em&gt;&lt;em&gt;29(3):333-347&lt;/em&gt;(&lt;em&gt;2003&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/resources"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2fe424bf3422feb1aefbe2ea9444a12e2/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2fe424bf3422feb1aefbe2ea9444a12e2/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Article"/><swrc:date>Fri Dec 14 02:41:32 CET 2007</swrc:date><swrc:journal>Computational Linguistics</swrc:journal><swrc:number>3</swrc:number><swrc:pages>333-347</swrc:pages><swrc:title>Introduction to the Special Issue on the Web as Corpus</swrc:title><swrc:volume>29</swrc:volume><swrc:year>2003</swrc:year><swrc:keywords>resources web </swrc:keywords><swrc:abstract>The Web, teeming as it is with language data, of all manner of varieties and languages, in vast quantity and freely available, is a fabulous linguist&#039;s playground. This special issue of Computational Linguistics explores ways in which this dream is being explored.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Adam Kilgarriff"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Gregory Grefenstette"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2dce5528d84d977afeba965b58738f219/diego_ma"><title>Omnibase: Uniform Access to Heterogeneous Data for Question Answering</title><link>http://www.bibsonomy.org/bibtex/2dce5528d84d977afeba965b58738f219/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:41:23+01:00</dc:date><dc:subject>web question_answering </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Boris &lt;a href=&#034;http://www.bibsonomy.org/author/Katz&#034;&gt;Katz&lt;/a&gt;  and Sue &lt;a href=&#034;http://www.bibsonomy.org/author/Felshin&#034;&gt;Felshin&lt;/a&gt;  and Deniz &lt;a href=&#034;http://www.bibsonomy.org/author/Yuret&#034;&gt;Yuret&lt;/a&gt;  and Ali &lt;a href=&#034;http://www.bibsonomy.org/author/Ibrahim&#034;&gt;Ibrahim&lt;/a&gt;  and Jimmy &lt;a href=&#034;http://www.bibsonomy.org/author/Lin&#034;&gt;Lin&lt;/a&gt;  and Gregory &lt;a href=&#034;http://www.bibsonomy.org/author/Marton&#034;&gt;Marton&lt;/a&gt;  and Alton Jerome &lt;a href=&#034;http://www.bibsonomy.org/author/McFarland&#034;&gt;McFarland&lt;/a&gt;  and Baris &lt;a href=&#034;http://www.bibsonomy.org/author/Temelkuran&#034;&gt;Temelkuran&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Proc. NLDB2002, &lt;/em&gt;(&lt;em&gt;2002&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/question_answering"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2dce5528d84d977afeba965b58738f219/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2dce5528d84d977afeba965b58738f219/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="http://www.ai.mit.edu/people/jimmylin/publications/index.html"/><swrc:date>Fri Dec 14 02:41:23 CET 2007</swrc:date><swrc:booktitle>Proc. NLDB2002</swrc:booktitle><swrc:title>Omnibase: Uniform Access to Heterogeneous Data for Question Answering</swrc:title><swrc:year>2002</swrc:year><swrc:keywords>web question_answering </swrc:keywords><swrc:abstract>Although the World Wide Web contains a tremendous amount of information, the lack of uniform structure makes finding the right knowledge difficult. A solution is to turn the Web into a ``virtual database&#039;&#039; and to access it through natural language. We built Omnibase, a system that integrates heterogeneous data sources using an \emph{object-property-value} model. With the help of Omnibase, our Start natural language system can now access numerous heterogeneous data sources on the Web in a uniform manner, and answers millions of user questions with high precision.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Boris Katz"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Sue Felshin"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Deniz Yuret"/></rdf:_3><rdf:_4><swrc:Person swrc:name="Ali Ibrahim"/></rdf:_4><rdf:_5><swrc:Person swrc:name="Jimmy Lin"/></rdf:_5><rdf:_6><swrc:Person swrc:name="Gregory Marton"/></rdf:_6><rdf:_7><swrc:Person swrc:name="Alton Jerome McFarland"/></rdf:_7><rdf:_8><swrc:Person swrc:name="Baris Temelkuran"/></rdf:_8></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2c056611effc0d18aae71a6d535ff6c5a/diego_ma"><title>Topic-sensitive PageRank</title><link>http://www.bibsonomy.org/bibtex/2c056611effc0d18aae71a6d535ff6c5a/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:40:07+01:00</dc:date><dc:subject>inf_retrieval web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Taher H. &lt;a href=&#034;http://www.bibsonomy.org/author/Haveliwala&#034;&gt;Haveliwala&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Proceedings of the Eleventh International World Wide Web Conference, &lt;/em&gt;&lt;em&gt;May 2002. &lt;/em&gt;</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/inf_retrieval"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2c056611effc0d18aae71a6d535ff6c5a/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2c056611effc0d18aae71a6d535ff6c5a/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><swrc:date>Fri Dec 14 02:40:07 CET 2007</swrc:date><swrc:booktitle>Proceedings of the Eleventh International World Wide Web Conference</swrc:booktitle><swrc:month>May</swrc:month><swrc:title>Topic-sensitive PageRank</swrc:title><swrc:year>2002</swrc:year><swrc:keywords>inf_retrieval web </swrc:keywords><swrc:abstract>In the original PageRank algorithm for improving the ranking of search-query results, a single PageRank vector is computed, using the link structure of the Web, to capture the relative ``importance&#039;&#039; of Web pages, independent of any particular search query. To yield more accurate search results, we propose computing a {\em set} of PageRank vectors, biased using a set of representative topics, to capture more accurately the notion of importance with respect to a particular topic...</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Taher H. Haveliwala"/></rdf:_1></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/28669140e43a903fa1038b3105509d36a/diego_ma"><title>The Indexable Web is more than 11.5 billion pages</title><link>http://www.bibsonomy.org/bibtex/28669140e43a903fa1038b3105509d36a/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:39:44+01:00</dc:date><dc:subject>web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Antonio &lt;a href=&#034;http://www.bibsonomy.org/author/Gulli&#034;&gt;Gulli&lt;/a&gt;  and Alessio &lt;a href=&#034;http://www.bibsonomy.org/author/Signorini&#034;&gt;Signorini&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Website, &lt;/em&gt;(&lt;em&gt;2005&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/28669140e43a903fa1038b3105509d36a/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/28669140e43a903fa1038b3105509d36a/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#Misc"/><swrc:date>Fri Dec 14 02:39:44 CET 2007</swrc:date><swrc:howpublished>Website</swrc:howpublished><swrc:title>The Indexable Web is more than 11.5 billion pages</swrc:title><swrc:year>2005</swrc:year><swrc:keywords>web </swrc:keywords><swrc:abstract>What is the current size of the Web? At the time of this writing, Google claims to index more than 8 billion pages, MSN Beta claims about 5 billion pages, Yahoo! at least 4 billion and Ask/Teoma more than 2 billion. Two sources for tracking the growth of the Web are [6,7], although they are not kept up to date. Estimating the size of the whole Web is quite difficult, due to its dynamic nature (According to Andrei Broder, the size of the whole Web depends strongly on whether his laptop is on the web, since it can be configured to produce links to an infinite number of URLs!). Nevertheless, it is possible to assess the size of the publically indexable Web. The indexable Web [4] is defined as &#034;the part of the Web which is considered for indexing by the major engines&#034;. In 1997, Bharat and Broder [2] estimated the size of Web indexed by Hotbot, Altavista, Excite and Infoseek (the largest search engines at that time) at 200 million pages. They also pointed out that the estimated intersection of the indexes was less than 1.4\%, or about 2.2 million pages. Furthermore, in 1998, Lawrence and Giles [3] gave a lower bound 800 million pages. These estimates have now become obsolete. In this short paper, we revise and update the estimated size of the indexable Web to at least 11.5 billion pages as of the end of January 2005. We also estimate the relative size and overlap of the largest Web search engines. Precisely Google is the largest engine, followed by Yahoo!, by Ask/Teoma, and by MSN Beta. We adopted the methodology proposed in 1997 by Bharat and Broder [2], but extended the number of queries used for testing from 35,000 in English, to more than 438,141 in 75 different languages. We remark that an estimate of the size of the web is useful in many situations, such as when compressing, ranking, spidering, indexing and mining the Web.</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Antonio Gulli"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Alessio Signorini"/></rdf:_2></rdf:Seq></swrc:author></rdf:Description></burst:publication></item><item rdf:about="http://www.bibsonomy.org/bibtex/2605a293303beef0c307dc8da107bcb98/diego_ma"><title>Web Question Answering: is more always better?</title><link>http://www.bibsonomy.org/bibtex/2605a293303beef0c307dc8da107bcb98/diego_ma</link><dc:creator>diego_ma</dc:creator><dc:date>2007-12-14T02:38:24+01:00</dc:date><dc:subject>question_answering web </dc:subject><content:encoded>&lt;span style=&#034;color:#555555;&#034;&gt;Susan &lt;a href=&#034;http://www.bibsonomy.org/author/Dumais&#034;&gt;Dumais&lt;/a&gt;  and Michele &lt;a href=&#034;http://www.bibsonomy.org/author/Banko&#034;&gt;Banko&lt;/a&gt;  and Eric &lt;a href=&#034;http://www.bibsonomy.org/author/Brill&#034;&gt;Brill&lt;/a&gt;  and Jimmy &lt;a href=&#034;http://www.bibsonomy.org/author/Lin&#034;&gt;Lin&lt;/a&gt;  and Andrew &lt;a href=&#034;http://www.bibsonomy.org/author/Ng&#034;&gt;Ng&lt;/a&gt;  &lt;/span&gt;&lt;em&gt;Proc. ACM SIGIR 2002, &lt;/em&gt;(&lt;em&gt;2002&lt;/em&gt;)</content:encoded><taxo:topics><rdf:Bag><rdf:li rdf:resource="http://www.bibsonomy.org/tag/question_answering"/><rdf:li rdf:resource="http://www.bibsonomy.org/tag/web"/></rdf:Bag></taxo:topics><burst:publication><rdf:Description rdf:about="http://www.bibsonomy.org/bibtex/2605a293303beef0c307dc8da107bcb98/diego_ma"><owl:sameAs rdf:resource="http://www.bibsonomy.org/uri/bibtex/2605a293303beef0c307dc8da107bcb98/diego_ma"/><rdf:type rdf:resource="http://swrc.ontoware.org/ontology#InProceedings"/><owl:sameAs rdf:resource="http://research.microsoft.com/\~{}sdumais/"/><swrc:date>Fri Dec 14 02:38:24 CET 2007</swrc:date><swrc:booktitle>Proc. ACM SIGIR 2002</swrc:booktitle><swrc:title>Web Question Answering: is more always better?</swrc:title><swrc:year>2002</swrc:year><swrc:keywords>question_answering web </swrc:keywords><swrc:abstract>This paper describes a question answering system that is designed to capitalize on the tremendous amount of that that is available online. Most question answering systems use a wide variety of linguistic resources. We focus instead on the redundancy available on large corpora as an important resource...</swrc:abstract><swrc:author><rdf:Seq><rdf:_1><swrc:Person swrc:name="Susan Dumais"/></rdf:_1><rdf:_2><swrc:Person swrc:name="Michele Banko"/></rdf:_2><rdf:_3><swrc:Person swrc:name="Eric Brill"/></rdf:_3><rdf:_4><swrc:Person swrc:name="Jimmy Lin"/></rdf:_4><rdf:_5><swrc:Person swrc:name="Andrew Ng"/></rdf:_5></rdf:Seq></swrc:author></rdf:Description></burst:publication></item></rdf:RDF>