Ensemble approaches for uncertainty estimation have recently been applied to
the tasks of misclassification detection, out-of-distribution input detection
and adversarial attack detection. Prior Networks have been proposed as an
approach to efficiently emulating an ensemble of models by parameterising a
Dirichlet prior distribution over output distributions. These models have been
shown to outperform ensemble approaches, such as Monte-Carlo Dropout, on the
task of out-of-distribution input detection. However, scaling Prior Networks to
complex datasets with many classes is difficult using the training criteria
originally proposed. This paper makes two contributions. Firstly, we show that
the appropriate training criterion for Prior Networks is the reverse
KL-divergence between Dirichlet distributions. Using this loss we successfully
train Prior Networks on image classification datasets with up to 200 classes
and improve out-of-distribution detection performance. Secondly, taking
advantage of the new training criterion, this paper investigates using Prior
Networks to detect adversarial attacks. It is shown that the construction of
successful adaptive whitebox attacks, which affect the prediction and evade
detection, against Prior Networks trained on CIFAR-10 and CIFAR-100 takes a
greater amount of computational effort than against standard neural networks,
adversarially trained neural networks and dropout-defended networks.
Description
[1905.13472] Reverse KL-Divergence Training of Prior Networks: Improved Uncertainty and Adversarial Robustness
%0 Journal Article
%1 malinin2019reverse
%A Malinin, Andrey
%A Gales, Mark
%D 2019
%K uncertainty
%T Reverse KL-Divergence Training of Prior Networks: Improved Uncertainty
and Adversarial Robustness
%U http://arxiv.org/abs/1905.13472
%X Ensemble approaches for uncertainty estimation have recently been applied to
the tasks of misclassification detection, out-of-distribution input detection
and adversarial attack detection. Prior Networks have been proposed as an
approach to efficiently emulating an ensemble of models by parameterising a
Dirichlet prior distribution over output distributions. These models have been
shown to outperform ensemble approaches, such as Monte-Carlo Dropout, on the
task of out-of-distribution input detection. However, scaling Prior Networks to
complex datasets with many classes is difficult using the training criteria
originally proposed. This paper makes two contributions. Firstly, we show that
the appropriate training criterion for Prior Networks is the reverse
KL-divergence between Dirichlet distributions. Using this loss we successfully
train Prior Networks on image classification datasets with up to 200 classes
and improve out-of-distribution detection performance. Secondly, taking
advantage of the new training criterion, this paper investigates using Prior
Networks to detect adversarial attacks. It is shown that the construction of
successful adaptive whitebox attacks, which affect the prediction and evade
detection, against Prior Networks trained on CIFAR-10 and CIFAR-100 takes a
greater amount of computational effort than against standard neural networks,
adversarially trained neural networks and dropout-defended networks.
@article{malinin2019reverse,
abstract = {Ensemble approaches for uncertainty estimation have recently been applied to
the tasks of misclassification detection, out-of-distribution input detection
and adversarial attack detection. Prior Networks have been proposed as an
approach to efficiently emulating an ensemble of models by parameterising a
Dirichlet prior distribution over output distributions. These models have been
shown to outperform ensemble approaches, such as Monte-Carlo Dropout, on the
task of out-of-distribution input detection. However, scaling Prior Networks to
complex datasets with many classes is difficult using the training criteria
originally proposed. This paper makes two contributions. Firstly, we show that
the appropriate training criterion for Prior Networks is the reverse
KL-divergence between Dirichlet distributions. Using this loss we successfully
train Prior Networks on image classification datasets with up to 200 classes
and improve out-of-distribution detection performance. Secondly, taking
advantage of the new training criterion, this paper investigates using Prior
Networks to detect adversarial attacks. It is shown that the construction of
successful adaptive whitebox attacks, which affect the prediction and evade
detection, against Prior Networks trained on CIFAR-10 and CIFAR-100 takes a
greater amount of computational effort than against standard neural networks,
adversarially trained neural networks and dropout-defended networks.},
added-at = {2019-11-12T17:34:35.000+0100},
author = {Malinin, Andrey and Gales, Mark},
biburl = {https://www.bibsonomy.org/bibtex/268039da0c18342c3f287bd58134c6ae7/kirk86},
description = {[1905.13472] Reverse KL-Divergence Training of Prior Networks: Improved Uncertainty and Adversarial Robustness},
interhash = {154d2b19636ed0a6aa8d092d9bb98915},
intrahash = {68039da0c18342c3f287bd58134c6ae7},
keywords = {uncertainty},
note = {cite arxiv:1905.13472},
timestamp = {2019-11-12T17:34:35.000+0100},
title = {Reverse KL-Divergence Training of Prior Networks: Improved Uncertainty
and Adversarial Robustness},
url = {http://arxiv.org/abs/1905.13472},
year = 2019
}