We present a study in Distributed Deep Reinforcement Learning (DDRL) focused
on scalability of a state-of-the-art Deep Reinforcement Learning algorithm
known as Batch Asynchronous Advantage ActorCritic (BA3C). We show that using
the Adam optimization algorithm with a batch size of up to 2048 is a viable
choice for carrying out large scale machine learning computations. This,
combined with careful reexamination of the optimizer's hyperparameters, using
synchronous training on the node level (while keeping the local, single node
part of the algorithm asynchronous) and minimizing the memory footprint of the
model, allowed us to achieve linear scaling for up to 64 CPU nodes. This
corresponds to a training time of 21 minutes on 768 CPU cores, as opposed to 10
hours when using a single node with 24 cores achieved by a baseline single-node
implementation.
Description
[1801.02852] Distributed Deep Reinforcement Learning: Learn how to play Atari games in 21 minutes
%0 Conference Paper
%1 adamski2018distributed
%A Adamski, Igor
%A Adamski, Robert
%A Grel, Tomasz
%A Jędrych, Adam
%A Kaczmarek, Kamil
%A Michalewski, Henryk
%D 2018
%K A3C BA3C DRLAlgoComparison atari reinforcement_learning
%T Distributed Deep Reinforcement Learning: Learn how to play Atari games
in 21 minutes
%U http://arxiv.org/abs/1801.02852
%X We present a study in Distributed Deep Reinforcement Learning (DDRL) focused
on scalability of a state-of-the-art Deep Reinforcement Learning algorithm
known as Batch Asynchronous Advantage ActorCritic (BA3C). We show that using
the Adam optimization algorithm with a batch size of up to 2048 is a viable
choice for carrying out large scale machine learning computations. This,
combined with careful reexamination of the optimizer's hyperparameters, using
synchronous training on the node level (while keeping the local, single node
part of the algorithm asynchronous) and minimizing the memory footprint of the
model, allowed us to achieve linear scaling for up to 64 CPU nodes. This
corresponds to a training time of 21 minutes on 768 CPU cores, as opposed to 10
hours when using a single node with 24 cores achieved by a baseline single-node
implementation.
@inproceedings{adamski2018distributed,
abstract = {We present a study in Distributed Deep Reinforcement Learning (DDRL) focused
on scalability of a state-of-the-art Deep Reinforcement Learning algorithm
known as Batch Asynchronous Advantage ActorCritic (BA3C). We show that using
the Adam optimization algorithm with a batch size of up to 2048 is a viable
choice for carrying out large scale machine learning computations. This,
combined with careful reexamination of the optimizer's hyperparameters, using
synchronous training on the node level (while keeping the local, single node
part of the algorithm asynchronous) and minimizing the memory footprint of the
model, allowed us to achieve linear scaling for up to 64 CPU nodes. This
corresponds to a training time of 21 minutes on 768 CPU cores, as opposed to 10
hours when using a single node with 24 cores achieved by a baseline single-node
implementation.},
added-at = {2020-01-24T08:44:48.000+0100},
author = {Adamski, Igor and Adamski, Robert and Grel, Tomasz and Jędrych, Adam and Kaczmarek, Kamil and Michalewski, Henryk},
biburl = {https://www.bibsonomy.org/bibtex/2b43bc257f70362d2e00f3df4d4fa986b/lanteunis},
description = {[1801.02852] Distributed Deep Reinforcement Learning: Learn how to play Atari games in 21 minutes},
interhash = {75fef4161116c41df33303c5cecfb96f},
intrahash = {b43bc257f70362d2e00f3df4d4fa986b},
keywords = {A3C BA3C DRLAlgoComparison atari reinforcement_learning},
note = {cite arxiv:1801.02852},
timestamp = {2020-01-24T08:47:43.000+0100},
title = {Distributed Deep Reinforcement Learning: Learn how to play Atari games
in 21 minutes},
url = {http://arxiv.org/abs/1801.02852},
year = 2018
}