We present a model, based on the maximum entropy method, for analyzing various measures of retrieval performance such as average precision, R-precision, and precision-at-cutoffs. Our methodology treats the value of such a measure as a constraint on the distribution of relevant documents in an unknown list, and the maximum entropy distribution can be determined subject to these constraints. For good measures of overall performance (such as average precision), the resulting maximum entropy distributions are highly correlated with actual distributions of relevant documents in lists as demonstrated through TREC data; for poor measures of overall performance, the correlation is weaker. As such, the maximum entropy method can be used to quantify the overall quality of a retrieval measure. Furthermore, for good measures of overall performance (such as average precision), we show that the corresponding maximum entropy distributions can be used to accurately infer precision-recall curves and the values of other measures of performance, and we demonstrate that the quality of these inferences far exceeds that predicted by simple retrieval measure correlation, as demonstrated through TREC data.
%0 Conference Paper
%1 1076042
%A Aslam, Javed A.
%A Yilmaz, Emine
%A Pavlu, Virgiliu
%B SIGIR '05: Proceedings of the 28th annual international ACM SIGIR conference on Research and development in information retrieval
%C New York, NY, USA
%D 2005
%I ACM Press
%K imported
%P 27--34
%R http://doi.acm.org/10.1145/1076034.1076042
%T The maximum entropy method for analyzing retrieval measures
%U http://portal.acm.org/citation.cfm?id=1076034.1076042&coll=GUIDE&dl=GUIDE&type=series&idx=1076034&part=Proceedings&WantType=Proceedings&title=Annual%20ACM%20Conference%20on%20Research%20and%20Development%20in%20Information%20Retrieval&CFID=29742571&CFTOKEN=47524788
%X We present a model, based on the maximum entropy method, for analyzing various measures of retrieval performance such as average precision, R-precision, and precision-at-cutoffs. Our methodology treats the value of such a measure as a constraint on the distribution of relevant documents in an unknown list, and the maximum entropy distribution can be determined subject to these constraints. For good measures of overall performance (such as average precision), the resulting maximum entropy distributions are highly correlated with actual distributions of relevant documents in lists as demonstrated through TREC data; for poor measures of overall performance, the correlation is weaker. As such, the maximum entropy method can be used to quantify the overall quality of a retrieval measure. Furthermore, for good measures of overall performance (such as average precision), we show that the corresponding maximum entropy distributions can be used to accurately infer precision-recall curves and the values of other measures of performance, and we demonstrate that the quality of these inferences far exceeds that predicted by simple retrieval measure correlation, as demonstrated through TREC data.
%@ 1-59593-034-5
@inproceedings{1076042,
abstract = {We present a model, based on the maximum entropy method, for analyzing various measures of retrieval performance such as average precision, R-precision, and precision-at-cutoffs. Our methodology treats the value of such a measure as a constraint on the distribution of relevant documents in an unknown list, and the maximum entropy distribution can be determined subject to these constraints. For good measures of overall performance (such as average precision), the resulting maximum entropy distributions are highly correlated with actual distributions of relevant documents in lists as demonstrated through TREC data; for poor measures of overall performance, the correlation is weaker. As such, the maximum entropy method can be used to quantify the overall quality of a retrieval measure. Furthermore, for good measures of overall performance (such as average precision), we show that the corresponding maximum entropy distributions can be used to accurately infer precision-recall curves and the values of other measures of performance, and we demonstrate that the quality of these inferences far exceeds that predicted by simple retrieval measure correlation, as demonstrated through TREC data.},
added-at = {2007-09-14T20:49:00.000+0200},
address = {New York, NY, USA},
author = {Aslam, Javed A. and Yilmaz, Emine and Pavlu, Virgiliu},
biburl = {https://www.bibsonomy.org/bibtex/29b8e6fcf8c2defe4ab796be2164ec9ab/xamde},
booktitle = {SIGIR '05: Proceedings of the 28th annual international ACM SIGIR conference on Research and development in information retrieval},
description = {: SIGIR '05, The maximum entropy method ...},
doi = {http://doi.acm.org/10.1145/1076034.1076042},
interhash = {b80b027b7bb84135757bf3714647ea33},
intrahash = {9b8e6fcf8c2defe4ab796be2164ec9ab},
isbn = {1-59593-034-5},
keywords = {imported},
location = {Salvador, Brazil},
pages = {27--34},
publisher = {ACM Press},
timestamp = {2007-09-14T20:49:03.000+0200},
title = {The maximum entropy method for analyzing retrieval measures},
url = {http://portal.acm.org/citation.cfm?id=1076034.1076042&coll=GUIDE&dl=GUIDE&type=series&idx=1076034&part=Proceedings&WantType=Proceedings&title=Annual%20ACM%20Conference%20on%20Research%20and%20Development%20in%20Information%20Retrieval&CFID=29742571&CFTOKEN=47524788},
year = 2005
}