The question addressed in this paper is: If we present to a user an AI system
that explains how it works, how do we know whether the explanation works and
the user has achieved a pragmatic understanding of the AI? In other words, how
do we know that an explanainable AI system (XAI) is any good? Our focus is on
the key concepts of measurement. We discuss specific methods for evaluating:
(1) the goodness of explanations, (2) whether users are satisfied by
explanations, (3) how well users understand the AI systems, (4) how curiosity
motivates the search for explanations, (5) whether the user's trust and
reliance on the AI are appropriate, and finally, (6) how the human-XAI work
system performs. The recommendations we present derive from our integration of
extensive research literatures and our own psychometric evaluations.
Description
Metrics for Explainable AI: Challenges and Prospects
%0 Generic
%1 hoffman2018metrics
%A Hoffman, Robert R.
%A Mueller, Shane T.
%A Klein, Gary
%A Litman, Jordan
%D 2018
%K XAI evaluation metric
%T Metrics for Explainable AI: Challenges and Prospects
%U http://arxiv.org/abs/1812.04608
%X The question addressed in this paper is: If we present to a user an AI system
that explains how it works, how do we know whether the explanation works and
the user has achieved a pragmatic understanding of the AI? In other words, how
do we know that an explanainable AI system (XAI) is any good? Our focus is on
the key concepts of measurement. We discuss specific methods for evaluating:
(1) the goodness of explanations, (2) whether users are satisfied by
explanations, (3) how well users understand the AI systems, (4) how curiosity
motivates the search for explanations, (5) whether the user's trust and
reliance on the AI are appropriate, and finally, (6) how the human-XAI work
system performs. The recommendations we present derive from our integration of
extensive research literatures and our own psychometric evaluations.
@misc{hoffman2018metrics,
abstract = {The question addressed in this paper is: If we present to a user an AI system
that explains how it works, how do we know whether the explanation works and
the user has achieved a pragmatic understanding of the AI? In other words, how
do we know that an explanainable AI system (XAI) is any good? Our focus is on
the key concepts of measurement. We discuss specific methods for evaluating:
(1) the goodness of explanations, (2) whether users are satisfied by
explanations, (3) how well users understand the AI systems, (4) how curiosity
motivates the search for explanations, (5) whether the user's trust and
reliance on the AI are appropriate, and finally, (6) how the human-XAI work
system performs. The recommendations we present derive from our integration of
extensive research literatures and our own psychometric evaluations.},
added-at = {2019-11-18T12:48:58.000+0100},
author = {Hoffman, Robert R. and Mueller, Shane T. and Klein, Gary and Litman, Jordan},
biburl = {https://www.bibsonomy.org/bibtex/22d8e2471ec6636e42c345c0f6a69f551/schwemmlein},
description = {Metrics for Explainable AI: Challenges and Prospects},
interhash = {e0affac5b826686347f34b51c03012bc},
intrahash = {2d8e2471ec6636e42c345c0f6a69f551},
keywords = {XAI evaluation metric},
note = {cite arxiv:1812.04608},
timestamp = {2019-11-18T12:52:03.000+0100},
title = {Metrics for Explainable AI: Challenges and Prospects},
url = {http://arxiv.org/abs/1812.04608},
year = 2018
}