We propose a method for meta-learning reinforcement learning algorithms by
searching over the space of computational graphs which compute the loss
function for a value-based model-free RL agent to optimize. The learned
algorithms are domain-agnostic and can generalize to new environments not seen
during training. Our method can both learn from scratch and bootstrap off known
existing algorithms, like DQN, enabling interpretable modifications which
improve performance. Learning from scratch on simple classical control and
gridworld tasks, our method rediscovers the temporal-difference (TD) algorithm.
Bootstrapped from DQN, we highlight two learned algorithms which obtain good
generalization performance over other classical control tasks, gridworld type
tasks, and Atari games. The analysis of the learned algorithm behavior shows
resemblance to recently proposed RL algorithms that address overestimation in
value-based methods.
%0 Generic
%1 coreyes2021evolving
%A Co-Reyes, John D.
%A Miao, Yingjie
%A Peng, Daiyi
%A Real, Esteban
%A Levine, Sergey
%A Le, Quoc V.
%A Lee, Honglak
%A Faust, Aleksandra
%D 2021
%K ai dqn evolving rl
%T Evolving Reinforcement Learning Algorithms
%U http://arxiv.org/abs/2101.03958
%X We propose a method for meta-learning reinforcement learning algorithms by
searching over the space of computational graphs which compute the loss
function for a value-based model-free RL agent to optimize. The learned
algorithms are domain-agnostic and can generalize to new environments not seen
during training. Our method can both learn from scratch and bootstrap off known
existing algorithms, like DQN, enabling interpretable modifications which
improve performance. Learning from scratch on simple classical control and
gridworld tasks, our method rediscovers the temporal-difference (TD) algorithm.
Bootstrapped from DQN, we highlight two learned algorithms which obtain good
generalization performance over other classical control tasks, gridworld type
tasks, and Atari games. The analysis of the learned algorithm behavior shows
resemblance to recently proposed RL algorithms that address overestimation in
value-based methods.
@misc{coreyes2021evolving,
abstract = {We propose a method for meta-learning reinforcement learning algorithms by
searching over the space of computational graphs which compute the loss
function for a value-based model-free RL agent to optimize. The learned
algorithms are domain-agnostic and can generalize to new environments not seen
during training. Our method can both learn from scratch and bootstrap off known
existing algorithms, like DQN, enabling interpretable modifications which
improve performance. Learning from scratch on simple classical control and
gridworld tasks, our method rediscovers the temporal-difference (TD) algorithm.
Bootstrapped from DQN, we highlight two learned algorithms which obtain good
generalization performance over other classical control tasks, gridworld type
tasks, and Atari games. The analysis of the learned algorithm behavior shows
resemblance to recently proposed RL algorithms that address overestimation in
value-based methods.},
added-at = {2021-01-18T14:06:23.000+0100},
author = {Co-Reyes, John D. and Miao, Yingjie and Peng, Daiyi and Real, Esteban and Levine, Sergey and Le, Quoc V. and Lee, Honglak and Faust, Aleksandra},
biburl = {https://www.bibsonomy.org/bibtex/2571b37b2cdf9a786fe11233cc2e72ce5/louissf},
description = {Evolving Reinforcement Learning Algorithms},
interhash = {b8ca7605242487531ca0050b8713129f},
intrahash = {571b37b2cdf9a786fe11233cc2e72ce5},
keywords = {ai dqn evolving rl},
note = {cite arxiv:2101.03958},
timestamp = {2021-01-18T14:06:23.000+0100},
title = {Evolving Reinforcement Learning Algorithms},
url = {http://arxiv.org/abs/2101.03958},
year = 2021
}