Different evaluation measures assess different characteristics of machine learning algorithms. The empirical evaluation of
algorithms and classifiers is a matter of on-going debate among researchers. Most measures in use today focus on a classifier’sability to identify classes correctly. We note other useful properties, such as failure avoidance or class discrimination,and we suggest measures to evaluate such properties. These measures – Youden’s index, likelihood, Discriminant power – areused in medical diagnosis. We show that they are interrelated, and we apply them to a case study from the field of electronicnegotiations. We also list other learning problems which may benefit from the application of these measures.
%0 Journal Article
%1 Sokolova2006
%A Sokolova, Marina
%A Japkowicz, Nathalie
%A Szpakowicz, Stan
%D 2006
%J AI 2006: Advances in Artificial Intelligence
%K evaluationmetrics
%P 1015--1021
%T Beyond Accuracy, F-Score and ROC: A Family of Discriminant Measures for Performance Evaluation
%U http://dx.doi.org/10.1007/11941439_114
%X Different evaluation measures assess different characteristics of machine learning algorithms. The empirical evaluation of
algorithms and classifiers is a matter of on-going debate among researchers. Most measures in use today focus on a classifier’sability to identify classes correctly. We note other useful properties, such as failure avoidance or class discrimination,and we suggest measures to evaluate such properties. These measures – Youden’s index, likelihood, Discriminant power – areused in medical diagnosis. We show that they are interrelated, and we apply them to a case study from the field of electronicnegotiations. We also list other learning problems which may benefit from the application of these measures.
@article{Sokolova2006,
abstract = {Different evaluation measures assess different characteristics of machine learning algorithms. The empirical evaluation of
algorithms and classifiers is a matter of on-going debate among researchers. Most measures in use today focus on a classifier’sability to identify classes correctly. We note other useful properties, such as failure avoidance or class discrimination,and we suggest measures to evaluate such properties. These measures – Youden’s index, likelihood, Discriminant power – areused in medical diagnosis. We show that they are interrelated, and we apply them to a case study from the field of electronicnegotiations. We also list other learning problems which may benefit from the application of these measures.},
added-at = {2009-05-14T11:04:02.000+0200},
author = {Sokolova, Marina and Japkowicz, Nathalie and Szpakowicz, Stan},
biburl = {https://www.bibsonomy.org/bibtex/28138c3287f60698b314ae9afc82583a7/jamesh},
interhash = {d76c267e9efd92b3cba6ccbf382b4504},
intrahash = {8138c3287f60698b314ae9afc82583a7},
journal = {AI 2006: Advances in Artificial Intelligence},
keywords = {evaluationmetrics},
pages = {1015--1021},
timestamp = {2009-05-14T11:04:02.000+0200},
title = {Beyond Accuracy, F-Score and ROC: A Family of Discriminant Measures for Performance Evaluation},
url = {http://dx.doi.org/10.1007/11941439_114},
year = 2006
}