Reinforcement learning (RL) typically defines a discount factor as part of
the Markov Decision Process. The discount factor values future rewards by an
exponential scheme that leads to theoretical convergence guarantees of the
Bellman equation. However, evidence from psychology, economics and neuroscience
suggests that humans and animals instead have hyperbolic time-preferences. In
this work we revisit the fundamentals of discounting in RL and bridge this
disconnect by implementing an RL agent that acts via hyperbolic discounting. We
demonstrate that a simple approach approximates hyperbolic discount functions
while still using familiar temporal-difference learning techniques in RL.
Additionally, and independent of hyperbolic discounting, we make a surprising
discovery that simultaneously learning value functions over multiple
time-horizons is an effective auxiliary task which often improves over a strong
value-based RL agent, Rainbow.
Описание
[1902.06865] Hyperbolic Discounting and Learning over Multiple Horizons
%0 Journal Article
%1 fedus2019hyperbolic
%A Fedus, William
%A Gelada, Carles
%A Bengio, Yoshua
%A Bellemare, Marc G.
%A Larochelle, Hugo
%D 2019
%K reinforcement-learning
%T Hyperbolic Discounting and Learning over Multiple Horizons
%U http://arxiv.org/abs/1902.06865
%X Reinforcement learning (RL) typically defines a discount factor as part of
the Markov Decision Process. The discount factor values future rewards by an
exponential scheme that leads to theoretical convergence guarantees of the
Bellman equation. However, evidence from psychology, economics and neuroscience
suggests that humans and animals instead have hyperbolic time-preferences. In
this work we revisit the fundamentals of discounting in RL and bridge this
disconnect by implementing an RL agent that acts via hyperbolic discounting. We
demonstrate that a simple approach approximates hyperbolic discount functions
while still using familiar temporal-difference learning techniques in RL.
Additionally, and independent of hyperbolic discounting, we make a surprising
discovery that simultaneously learning value functions over multiple
time-horizons is an effective auxiliary task which often improves over a strong
value-based RL agent, Rainbow.
@article{fedus2019hyperbolic,
abstract = {Reinforcement learning (RL) typically defines a discount factor as part of
the Markov Decision Process. The discount factor values future rewards by an
exponential scheme that leads to theoretical convergence guarantees of the
Bellman equation. However, evidence from psychology, economics and neuroscience
suggests that humans and animals instead have hyperbolic time-preferences. In
this work we revisit the fundamentals of discounting in RL and bridge this
disconnect by implementing an RL agent that acts via hyperbolic discounting. We
demonstrate that a simple approach approximates hyperbolic discount functions
while still using familiar temporal-difference learning techniques in RL.
Additionally, and independent of hyperbolic discounting, we make a surprising
discovery that simultaneously learning value functions over multiple
time-horizons is an effective auxiliary task which often improves over a strong
value-based RL agent, Rainbow.},
added-at = {2019-07-12T14:50:01.000+0200},
author = {Fedus, William and Gelada, Carles and Bengio, Yoshua and Bellemare, Marc G. and Larochelle, Hugo},
biburl = {https://www.bibsonomy.org/bibtex/2e79a415ac31be0e2668fa408a0d3b341/kirk86},
description = {[1902.06865] Hyperbolic Discounting and Learning over Multiple Horizons},
interhash = {72a30da278ee7b68985b52aba28fa80d},
intrahash = {e79a415ac31be0e2668fa408a0d3b341},
keywords = {reinforcement-learning},
note = {cite arxiv:1902.06865},
timestamp = {2019-07-12T14:50:01.000+0200},
title = {Hyperbolic Discounting and Learning over Multiple Horizons},
url = {http://arxiv.org/abs/1902.06865},
year = 2019
}