Confidence calibration -- the problem of predicting probability estimates
representative of the true correctness likelihood -- is important for
classification models in many applications. We discover that modern neural
networks, unlike those from a decade ago, are poorly calibrated. Through
extensive experiments, we observe that depth, width, weight decay, and Batch
Normalization are important factors influencing calibration. We evaluate the
performance of various post-processing calibration methods on state-of-the-art
architectures with image and document classification datasets. Our analysis and
experiments not only offer insights into neural network learning, but also
provide a simple and straightforward recipe for practical settings: on most
datasets, temperature scaling -- a single-parameter variant of Platt Scaling --
is surprisingly effective at calibrating predictions.
Description
[1706.04599] On Calibration of Modern Neural Networks
%0 Journal Article
%1 guo2017calibration
%A Guo, Chuan
%A Pleiss, Geoff
%A Sun, Yu
%A Weinberger, Kilian Q.
%D 2017
%K calibration deep-learning
%T On Calibration of Modern Neural Networks
%U http://arxiv.org/abs/1706.04599
%X Confidence calibration -- the problem of predicting probability estimates
representative of the true correctness likelihood -- is important for
classification models in many applications. We discover that modern neural
networks, unlike those from a decade ago, are poorly calibrated. Through
extensive experiments, we observe that depth, width, weight decay, and Batch
Normalization are important factors influencing calibration. We evaluate the
performance of various post-processing calibration methods on state-of-the-art
architectures with image and document classification datasets. Our analysis and
experiments not only offer insights into neural network learning, but also
provide a simple and straightforward recipe for practical settings: on most
datasets, temperature scaling -- a single-parameter variant of Platt Scaling --
is surprisingly effective at calibrating predictions.
@article{guo2017calibration,
abstract = {Confidence calibration -- the problem of predicting probability estimates
representative of the true correctness likelihood -- is important for
classification models in many applications. We discover that modern neural
networks, unlike those from a decade ago, are poorly calibrated. Through
extensive experiments, we observe that depth, width, weight decay, and Batch
Normalization are important factors influencing calibration. We evaluate the
performance of various post-processing calibration methods on state-of-the-art
architectures with image and document classification datasets. Our analysis and
experiments not only offer insights into neural network learning, but also
provide a simple and straightforward recipe for practical settings: on most
datasets, temperature scaling -- a single-parameter variant of Platt Scaling --
is surprisingly effective at calibrating predictions.},
added-at = {2019-03-12T00:04:42.000+0100},
author = {Guo, Chuan and Pleiss, Geoff and Sun, Yu and Weinberger, Kilian Q.},
biburl = {https://www.bibsonomy.org/bibtex/2ce2c6f8772a74f4a19234111b95e75da/kirk86},
description = {[1706.04599] On Calibration of Modern Neural Networks},
interhash = {315c62001aeac75c9ddacc6a8afa7f8e},
intrahash = {ce2c6f8772a74f4a19234111b95e75da},
keywords = {calibration deep-learning},
note = {cite arxiv:1706.04599Comment: ICML 2017},
timestamp = {2019-03-12T00:04:42.000+0100},
title = {On Calibration of Modern Neural Networks},
url = {http://arxiv.org/abs/1706.04599},
year = 2017
}