In this work we aim to solve a large collection of tasks using a single
reinforcement learning agent with a single set of parameters. A key challenge
is to handle the increased amount of data and extended training time. We have
developed a new distributed agent IMPALA (Importance Weighted Actor-Learner
Architecture) that not only uses resources more efficiently in single-machine
training but also scales to thousands of machines without sacrificing data
efficiency or resource utilisation. We achieve stable learning at high
throughput by combining decoupled acting and learning with a novel off-policy
correction method called V-trace. We demonstrate the effectiveness of IMPALA
for multi-task reinforcement learning on DMLab-30 (a set of 30 tasks from the
DeepMind Lab environment (Beattie et al., 2016)) and Atari-57 (all available
Atari games in Arcade Learning Environment (Bellemare et al., 2013a)). Our
results show that IMPALA is able to achieve better performance than previous
agents with less data, and crucially exhibits positive transfer between tasks
as a result of its multi-task approach.
Description
IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures
%0 Generic
%1 espeholt2018impala
%A Espeholt, Lasse
%A Soyer, Hubert
%A Munos, Remi
%A Simonyan, Karen
%A Mnih, Volodymir
%A Ward, Tom
%A Doron, Yotam
%A Firoiu, Vlad
%A Harley, Tim
%A Dunning, Iain
%A Legg, Shane
%A Kavukcuoglu, Koray
%D 2018
%K game
%T IMPALA: Scalable Distributed Deep-RL with Importance Weighted
Actor-Learner Architectures
%U http://arxiv.org/abs/1802.01561
%X In this work we aim to solve a large collection of tasks using a single
reinforcement learning agent with a single set of parameters. A key challenge
is to handle the increased amount of data and extended training time. We have
developed a new distributed agent IMPALA (Importance Weighted Actor-Learner
Architecture) that not only uses resources more efficiently in single-machine
training but also scales to thousands of machines without sacrificing data
efficiency or resource utilisation. We achieve stable learning at high
throughput by combining decoupled acting and learning with a novel off-policy
correction method called V-trace. We demonstrate the effectiveness of IMPALA
for multi-task reinforcement learning on DMLab-30 (a set of 30 tasks from the
DeepMind Lab environment (Beattie et al., 2016)) and Atari-57 (all available
Atari games in Arcade Learning Environment (Bellemare et al., 2013a)). Our
results show that IMPALA is able to achieve better performance than previous
agents with less data, and crucially exhibits positive transfer between tasks
as a result of its multi-task approach.
@misc{espeholt2018impala,
abstract = {In this work we aim to solve a large collection of tasks using a single
reinforcement learning agent with a single set of parameters. A key challenge
is to handle the increased amount of data and extended training time. We have
developed a new distributed agent IMPALA (Importance Weighted Actor-Learner
Architecture) that not only uses resources more efficiently in single-machine
training but also scales to thousands of machines without sacrificing data
efficiency or resource utilisation. We achieve stable learning at high
throughput by combining decoupled acting and learning with a novel off-policy
correction method called V-trace. We demonstrate the effectiveness of IMPALA
for multi-task reinforcement learning on DMLab-30 (a set of 30 tasks from the
DeepMind Lab environment (Beattie et al., 2016)) and Atari-57 (all available
Atari games in Arcade Learning Environment (Bellemare et al., 2013a)). Our
results show that IMPALA is able to achieve better performance than previous
agents with less data, and crucially exhibits positive transfer between tasks
as a result of its multi-task approach.},
added-at = {2019-04-09T12:47:05.000+0200},
author = {Espeholt, Lasse and Soyer, Hubert and Munos, Remi and Simonyan, Karen and Mnih, Volodymir and Ward, Tom and Doron, Yotam and Firoiu, Vlad and Harley, Tim and Dunning, Iain and Legg, Shane and Kavukcuoglu, Koray},
biburl = {https://www.bibsonomy.org/bibtex/289e6264a58af8baac041e8a184f9a475/stuart10},
description = {IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures},
interhash = {65fd12ee63c346b7332337e13ec35847},
intrahash = {89e6264a58af8baac041e8a184f9a475},
keywords = {game},
note = {cite arxiv:1802.01561},
timestamp = {2019-04-09T12:47:05.000+0200},
title = {IMPALA: Scalable Distributed Deep-RL with Importance Weighted
Actor-Learner Architectures},
url = {http://arxiv.org/abs/1802.01561},
year = 2018
}