The Shapley value has become a popular method to attribute the prediction of
a machine-learning model on an input to its base features. The Shapley value
1 is known to be the unique method that satisfies certain desirable
properties, and this motivates its use. Unfortunately, despite this uniqueness
result, there are a multiplicity of Shapley values used in explaining a model's
prediction. This is because there are many ways to apply the Shapley value that
differ in how they reference the model, the training data, and the explanation
context. In this paper, we study an approach that applies the Shapley value to
conditional expectations (CES) of sets of features (cf. 2) that subsumes
several prior approaches within a common framework. We provide the first
algorithm for the general version of CES. We show that CES can result in
counterintuitive attributions in theory and in practice (we study a diabetes
prediction task); for instance, CES can assign non-zero attributions to
features that are not referenced by the model. In contrast, we show that an
approach called the Baseline Shapley (BS) does not exhibit counterintuitive
attributions; we support this claim with a uniqueness (axiomatic) result. We
show that BS is a special case of CES, and CES with an independent feature
distribution coincides with a randomized version of BS. Thus, BS fits into the
CES framework, but does not suffer from many of CES's deficiencies.
%0 Generic
%1 sundararajan2019shapley
%A Sundararajan, Mukund
%A Najmi, Amir
%D 2019
%K interpretability
%T The many Shapley values for model explanation
%U http://arxiv.org/abs/1908.08474
%X The Shapley value has become a popular method to attribute the prediction of
a machine-learning model on an input to its base features. The Shapley value
1 is known to be the unique method that satisfies certain desirable
properties, and this motivates its use. Unfortunately, despite this uniqueness
result, there are a multiplicity of Shapley values used in explaining a model's
prediction. This is because there are many ways to apply the Shapley value that
differ in how they reference the model, the training data, and the explanation
context. In this paper, we study an approach that applies the Shapley value to
conditional expectations (CES) of sets of features (cf. 2) that subsumes
several prior approaches within a common framework. We provide the first
algorithm for the general version of CES. We show that CES can result in
counterintuitive attributions in theory and in practice (we study a diabetes
prediction task); for instance, CES can assign non-zero attributions to
features that are not referenced by the model. In contrast, we show that an
approach called the Baseline Shapley (BS) does not exhibit counterintuitive
attributions; we support this claim with a uniqueness (axiomatic) result. We
show that BS is a special case of CES, and CES with an independent feature
distribution coincides with a randomized version of BS. Thus, BS fits into the
CES framework, but does not suffer from many of CES's deficiencies.
@misc{sundararajan2019shapley,
abstract = {The Shapley value has become a popular method to attribute the prediction of
a machine-learning model on an input to its base features. The Shapley value
[1] is known to be the unique method that satisfies certain desirable
properties, and this motivates its use. Unfortunately, despite this uniqueness
result, there are a multiplicity of Shapley values used in explaining a model's
prediction. This is because there are many ways to apply the Shapley value that
differ in how they reference the model, the training data, and the explanation
context. In this paper, we study an approach that applies the Shapley value to
conditional expectations (CES) of sets of features (cf. [2]) that subsumes
several prior approaches within a common framework. We provide the first
algorithm for the general version of CES. We show that CES can result in
counterintuitive attributions in theory and in practice (we study a diabetes
prediction task); for instance, CES can assign non-zero attributions to
features that are not referenced by the model. In contrast, we show that an
approach called the Baseline Shapley (BS) does not exhibit counterintuitive
attributions; we support this claim with a uniqueness (axiomatic) result. We
show that BS is a special case of CES, and CES with an independent feature
distribution coincides with a randomized version of BS. Thus, BS fits into the
CES framework, but does not suffer from many of CES's deficiencies.},
added-at = {2019-12-02T14:47:30.000+0100},
author = {Sundararajan, Mukund and Najmi, Amir},
biburl = {https://www.bibsonomy.org/bibtex/2138188c0273e28a65d948db2d5448aa8/pa},
description = {The many Shapley values for model explanation},
interhash = {0796ef14eeab4b22e65236e57d1688cd},
intrahash = {138188c0273e28a65d948db2d5448aa8},
keywords = {interpretability},
note = {cite arxiv:1908.08474Comment: 9 pages},
timestamp = {2019-12-02T14:47:30.000+0100},
title = {The many Shapley values for model explanation},
url = {http://arxiv.org/abs/1908.08474},
year = 2019
}