We present a unifying framework for designing and analysing distributional
reinforcement learning (DRL) algorithms in terms of recursively estimating
statistics of the return distribution. Our key insight is that DRL algorithms
can be decomposed as the combination of some statistical estimator and a method
for imputing a return distribution consistent with that set of statistics. With
this new understanding, we are able to provide improved analyses of existing
DRL algorithms as well as construct a new algorithm (EDRL) based upon
estimation of the expectiles of the return distribution. We compare EDRL with
existing methods on a variety of MDPs to illustrate concrete aspects of our
analysis, and develop a deep RL variant of the algorithm, ER-DQN, which we
evaluate on the Atari-57 suite of games.
Description
[1902.08102] Statistics and Samples in Distributional Reinforcement Learning
%0 Journal Article
%1 rowland2019statistics
%A Rowland, Mark
%A Dadashi, Robert
%A Kumar, Saurabh
%A Munos, Rémi
%A Bellemare, Marc G.
%A Dabney, Will
%D 2019
%K reinforcement-learning sampling stats
%T Statistics and Samples in Distributional Reinforcement Learning
%U http://arxiv.org/abs/1902.08102
%X We present a unifying framework for designing and analysing distributional
reinforcement learning (DRL) algorithms in terms of recursively estimating
statistics of the return distribution. Our key insight is that DRL algorithms
can be decomposed as the combination of some statistical estimator and a method
for imputing a return distribution consistent with that set of statistics. With
this new understanding, we are able to provide improved analyses of existing
DRL algorithms as well as construct a new algorithm (EDRL) based upon
estimation of the expectiles of the return distribution. We compare EDRL with
existing methods on a variety of MDPs to illustrate concrete aspects of our
analysis, and develop a deep RL variant of the algorithm, ER-DQN, which we
evaluate on the Atari-57 suite of games.
@article{rowland2019statistics,
abstract = {We present a unifying framework for designing and analysing distributional
reinforcement learning (DRL) algorithms in terms of recursively estimating
statistics of the return distribution. Our key insight is that DRL algorithms
can be decomposed as the combination of some statistical estimator and a method
for imputing a return distribution consistent with that set of statistics. With
this new understanding, we are able to provide improved analyses of existing
DRL algorithms as well as construct a new algorithm (EDRL) based upon
estimation of the expectiles of the return distribution. We compare EDRL with
existing methods on a variety of MDPs to illustrate concrete aspects of our
analysis, and develop a deep RL variant of the algorithm, ER-DQN, which we
evaluate on the Atari-57 suite of games.},
added-at = {2019-04-23T12:48:23.000+0200},
author = {Rowland, Mark and Dadashi, Robert and Kumar, Saurabh and Munos, Rémi and Bellemare, Marc G. and Dabney, Will},
biburl = {https://www.bibsonomy.org/bibtex/25e91e1ebfdca91cd4594c892aa392c61/kirk86},
description = {[1902.08102] Statistics and Samples in Distributional Reinforcement Learning},
interhash = {8973f442e998be1c8bb8c88db5f89bdd},
intrahash = {5e91e1ebfdca91cd4594c892aa392c61},
keywords = {reinforcement-learning sampling stats},
note = {cite arxiv:1902.08102},
timestamp = {2019-04-23T12:48:23.000+0200},
title = {Statistics and Samples in Distributional Reinforcement Learning},
url = {http://arxiv.org/abs/1902.08102},
year = 2019
}