Challenging the implicit reliance on document collections,
this paper discusses the pros and cons of using query logs
rather than document collections, as self-contained sources
of data in textual information extraction. The differences
are quantified as part of a large-scale study on extracting
prominent attributes or quantifiable properties of classes
(e.g., top speed, price and fuel consumption for CarModel)
from unstructured text. In a head-to-head qualitative comparison,
a lightweight extraction method produces class attributes
that are 45% more accurate on average, when acquired
from query logs rather than Web documents.
%0 Conference Paper
%1 Pasca07
%A Pasca, Marius
%A Durme, Benjamin Van
%A Garera, Nikesh
%B CIKM '07: Proceedings of the sixteenth ACM conference on Conference on information and knowledge management
%C New York, NY, USA
%D 2007
%I ACM
%K KnowledgeAcquisition QueryLogAnalysis TextMining
%P 485--494
%R http://doi.acm.org/10.1145/1321440.1321510
%T The role of documents vs. queries in extracting class attributes from text
%U http://portal.acm.org/citation.cfm?id=1321440.1321510#
%X Challenging the implicit reliance on document collections,
this paper discusses the pros and cons of using query logs
rather than document collections, as self-contained sources
of data in textual information extraction. The differences
are quantified as part of a large-scale study on extracting
prominent attributes or quantifiable properties of classes
(e.g., top speed, price and fuel consumption for CarModel)
from unstructured text. In a head-to-head qualitative comparison,
a lightweight extraction method produces class attributes
that are 45% more accurate on average, when acquired
from query logs rather than Web documents.
%@ 978-1-59593-803-9
@inproceedings{Pasca07,
abstract = {Challenging the implicit reliance on document collections,
this paper discusses the pros and cons of using query logs
rather than document collections, as self-contained sources
of data in textual information extraction. The differences
are quantified as part of a large-scale study on extracting
prominent attributes or quantifiable properties of classes
(e.g., top speed, price and fuel consumption for CarModel)
from unstructured text. In a head-to-head qualitative comparison,
a lightweight extraction method produces class attributes
that are 45% more accurate on average, when acquired
from query logs rather than Web documents.},
added-at = {2008-04-27T20:30:07.000+0200},
address = {New York, NY, USA},
author = {Pasca, Marius and Durme, Benjamin Van and Garera, Nikesh},
biburl = {https://www.bibsonomy.org/bibtex/2e7efc07f0a928f6112c25ef40c5cbd68/mkroell},
booktitle = {CIKM '07: Proceedings of the sixteenth ACM conference on Conference on information and knowledge management},
doi = {http://doi.acm.org/10.1145/1321440.1321510},
interhash = {9074336a760490673809ee7eaeaa7be3},
intrahash = {e7efc07f0a928f6112c25ef40c5cbd68},
isbn = {978-1-59593-803-9},
keywords = {KnowledgeAcquisition QueryLogAnalysis TextMining},
location = {Lisbon, Portugal},
pages = {485--494},
publisher = {ACM},
timestamp = {2009-08-06T14:51:22.000+0200},
title = {The role of documents vs. queries in extracting class attributes from text},
url = {http://portal.acm.org/citation.cfm?id=1321440.1321510#},
year = 2007
}