We propose to reinterpret a standard discriminative classifier of p(y|x) as
an energy based model for the joint distribution p(x,y). In this setting, the
standard class probabilities can be easily computed as well as unnormalized
values of p(x) and p(x|y). Within this framework, standard discriminative
architectures may beused and the model can also be trained on unlabeled data.
We demonstrate that energy based training of the joint distribution improves
calibration, robustness, andout-of-distribution detection while also enabling
our models to generate samplesrivaling the quality of recent GAN approaches. We
improve upon recently proposed techniques for scaling up the training of energy
based models and presentan approach which adds little overhead compared to
standard classification training. Our approach is the first to achieve
performance rivaling the state-of-the-artin both generative and discriminative
learning within one hybrid model.
Description
[1912.03263] Your Classifier is Secretly an Energy Based Model and You Should Treat it Like One
%0 Journal Article
%1 grathwohl2019classifier
%A Grathwohl, Will
%A Wang, Kuan-Chieh
%A Jacobsen, Jörn-Henrik
%A Duvenaud, David
%A Norouzi, Mohammad
%A Swersky, Kevin
%D 2019
%K 2019 arxiv classification iclr machine-learning
%T Your Classifier is Secretly an Energy Based Model and You Should Treat
it Like One
%U http://arxiv.org/abs/1912.03263
%X We propose to reinterpret a standard discriminative classifier of p(y|x) as
an energy based model for the joint distribution p(x,y). In this setting, the
standard class probabilities can be easily computed as well as unnormalized
values of p(x) and p(x|y). Within this framework, standard discriminative
architectures may beused and the model can also be trained on unlabeled data.
We demonstrate that energy based training of the joint distribution improves
calibration, robustness, andout-of-distribution detection while also enabling
our models to generate samplesrivaling the quality of recent GAN approaches. We
improve upon recently proposed techniques for scaling up the training of energy
based models and presentan approach which adds little overhead compared to
standard classification training. Our approach is the first to achieve
performance rivaling the state-of-the-artin both generative and discriminative
learning within one hybrid model.
@article{grathwohl2019classifier,
abstract = {We propose to reinterpret a standard discriminative classifier of p(y|x) as
an energy based model for the joint distribution p(x,y). In this setting, the
standard class probabilities can be easily computed as well as unnormalized
values of p(x) and p(x|y). Within this framework, standard discriminative
architectures may beused and the model can also be trained on unlabeled data.
We demonstrate that energy based training of the joint distribution improves
calibration, robustness, andout-of-distribution detection while also enabling
our models to generate samplesrivaling the quality of recent GAN approaches. We
improve upon recently proposed techniques for scaling up the training of energy
based models and presentan approach which adds little overhead compared to
standard classification training. Our approach is the first to achieve
performance rivaling the state-of-the-artin both generative and discriminative
learning within one hybrid model.},
added-at = {2019-12-10T06:39:37.000+0100},
author = {Grathwohl, Will and Wang, Kuan-Chieh and Jacobsen, Jörn-Henrik and Duvenaud, David and Norouzi, Mohammad and Swersky, Kevin},
biburl = {https://www.bibsonomy.org/bibtex/22f9c6df9af2436c39ef2df646a4e90c2/analyst},
description = {[1912.03263] Your Classifier is Secretly an Energy Based Model and You Should Treat it Like One},
interhash = {08286688c3587c03c2738cf2b8ccfbcd},
intrahash = {2f9c6df9af2436c39ef2df646a4e90c2},
keywords = {2019 arxiv classification iclr machine-learning},
note = {cite arxiv:1912.03263},
timestamp = {2019-12-10T06:39:37.000+0100},
title = {Your Classifier is Secretly an Energy Based Model and You Should Treat
it Like One},
url = {http://arxiv.org/abs/1912.03263},
year = 2019
}