Gradient-based optimization is the foundation of deep learning and
reinforcement learning. Even when the mechanism being optimized is unknown or
not differentiable, optimization using high-variance or biased gradient
estimates is still often the best strategy. We introduce a general framework
for learning low-variance, unbiased gradient estimators for black-box functions
of random variables. Our method uses gradients of a neural network trained
jointly with model parameters or policies, and is applicable in both discrete
and continuous settings. We demonstrate this framework for training discrete
latent-variable models. We also give an unbiased, action-conditional extension
of the advantage actor-critic reinforcement learning algorithm.
%0 Generic
%1 grathwohl2017backpropagation
%A Grathwohl, Will
%A Choi, Dami
%A Wu, Yuhuai
%A Roeder, Geoff
%A Duvenaud, David
%D 2017
%K optimization reinforcement_learning
%T Backpropagation through the Void: Optimizing control variates for black-box gradient estimation
%U http://arxiv.org/abs/1711.00123
%X Gradient-based optimization is the foundation of deep learning and
reinforcement learning. Even when the mechanism being optimized is unknown or
not differentiable, optimization using high-variance or biased gradient
estimates is still often the best strategy. We introduce a general framework
for learning low-variance, unbiased gradient estimators for black-box functions
of random variables. Our method uses gradients of a neural network trained
jointly with model parameters or policies, and is applicable in both discrete
and continuous settings. We demonstrate this framework for training discrete
latent-variable models. We also give an unbiased, action-conditional extension
of the advantage actor-critic reinforcement learning algorithm.
@misc{grathwohl2017backpropagation,
abstract = {{Gradient-based optimization is the foundation of deep learning and
reinforcement learning. Even when the mechanism being optimized is unknown or
not differentiable, optimization using high-variance or biased gradient
estimates is still often the best strategy. We introduce a general framework
for learning low-variance, unbiased gradient estimators for black-box functions
of random variables. Our method uses gradients of a neural network trained
jointly with model parameters or policies, and is applicable in both discrete
and continuous settings. We demonstrate this framework for training discrete
latent-variable models. We also give an unbiased, action-conditional extension
of the advantage actor-critic reinforcement learning algorithm.}},
added-at = {2018-12-07T09:10:16.000+0100},
archiveprefix = {arXiv},
author = {Grathwohl, Will and Choi, Dami and Wu, Yuhuai and Roeder, Geoff and Duvenaud, David},
biburl = {https://www.bibsonomy.org/bibtex/2fb4b6e697d9e183a2fa3c0afe6a5a2dc/jpvaldes},
citeulike-article-id = {14475941},
citeulike-linkout-0 = {http://arxiv.org/abs/1711.00123},
citeulike-linkout-1 = {http://arxiv.org/pdf/1711.00123},
day = 8,
eprint = {1711.00123},
interhash = {dfd2ffa8662195547815bfbbf0b5476c},
intrahash = {fb4b6e697d9e183a2fa3c0afe6a5a2dc},
keywords = {optimization reinforcement_learning},
month = nov,
posted-at = {2017-11-14 15:29:54},
priority = {2},
timestamp = {2018-12-07T09:39:28.000+0100},
title = {{Backpropagation through the Void: Optimizing control variates for black-box gradient estimation}},
url = {http://arxiv.org/abs/1711.00123},
year = 2017
}