H. Joho, and M. Sanderson. Large Scale Semantic Access to Content (Text, Image, Video, and Sound), page 350--359. Paris, LE CENTRE DE HAUTES ETUDES INTERNATIONALES D'INFORMATIQUE DOCUMENTAIRE, (2007)event-place: Pittsburgh, Pennsylvania.
Abstract
Document frequency is used in various applications in Information Retrieval and other related fields. An assumption frequently made is that the document frequency represents a level of the term's specificity. However, empirical results to support this assumption are limited. Therefore, a large-scale experiment was carried out, using multiple corpora, to gain further insight into the relationship between the document frequency and term specificity. The results show that the assumption holds only at the very specific levels that cover the majority of vocabulary. The results also show that a larger corpus is more accurate at estimating the specificity. However, the co-occurrence information is shown to be effective for improving the accuracy when only a small corpus is available.
Large Scale Semantic Access to Content (Text, Image, Video, and Sound)
year
2007
pages
350--359
publisher
LE CENTRE DE HAUTES ETUDES INTERNATIONALES D'INFORMATIQUE DOCUMENTAIRE
series
RIAO '07
file
ACM Full Text PDF:C\:\\Users\\klaus\\Zotero\\storage\\RLWNP7ZT\\Joho und Sanderson - 2007 - Document Frequency and Term Specificity.pdf:application/pdf
%0 Conference Paper
%1 joho_document_2007
%A Joho, Hideo
%A Sanderson, Mark
%B Large Scale Semantic Access to Content (Text, Image, Video, and Sound)
%C Paris
%D 2007
%I LE CENTRE DE HAUTES ETUDES INTERNATIONALES D'INFORMATIQUE DOCUMENTAIRE
%K termspezifitaet
%P 350--359
%T Document frequency and term specificity
%U http://dl.acm.org/citation.cfm?id=1931390.1931425
%X Document frequency is used in various applications in Information Retrieval and other related fields. An assumption frequently made is that the document frequency represents a level of the term's specificity. However, empirical results to support this assumption are limited. Therefore, a large-scale experiment was carried out, using multiple corpora, to gain further insight into the relationship between the document frequency and term specificity. The results show that the assumption holds only at the very specific levels that cover the majority of vocabulary. The results also show that a larger corpus is more accurate at estimating the specificity. However, the co-occurrence information is shown to be effective for improving the accuracy when only a small corpus is available.
@inproceedings{joho_document_2007,
abstract = {Document frequency is used in various applications in Information Retrieval and other related fields. An assumption frequently made is that the document frequency represents a level of the term's specificity. However, empirical results to support this assumption are limited. Therefore, a large-scale experiment was carried out, using multiple corpora, to gain further insight into the relationship between the document frequency and term specificity. The results show that the assumption holds only at the very specific levels that cover the majority of vocabulary. The results also show that a larger corpus is more accurate at estimating the specificity. However, the co-occurrence information is shown to be effective for improving the accuracy when only a small corpus is available.},
added-at = {2019-05-21T15:25:32.000+0200},
address = {Paris},
author = {Joho, Hideo and Sanderson, Mark},
biburl = {https://www.bibsonomy.org/bibtex/2dd31fb2fe76fa63d4f5d8ce700a21e4b/lepsky},
booktitle = {Large {Scale} {Semantic} {Access} to {Content} ({Text}, {Image}, {Video}, and {Sound})},
file = {ACM Full Text PDF:C\:\\Users\\klaus\\Zotero\\storage\\RLWNP7ZT\\Joho und Sanderson - 2007 - Document Frequency and Term Specificity.pdf:application/pdf},
interhash = {68a50f8a6644ec1f1610021a10ea336f},
intrahash = {dd31fb2fe76fa63d4f5d8ce700a21e4b},
keywords = {termspezifitaet},
note = {event-place: Pittsburgh, Pennsylvania},
pages = {350--359},
publisher = {LE CENTRE DE HAUTES ETUDES INTERNATIONALES D'INFORMATIQUE DOCUMENTAIRE},
series = {{RIAO} '07},
timestamp = {2019-05-21T15:25:32.000+0200},
title = {Document frequency and term specificity},
url = {http://dl.acm.org/citation.cfm?id=1931390.1931425},
urldate = {2019-05-20},
year = 2007
}