This paper presents a theoretical analysis and practical evaluation of the
main bottlenecks towards a scalable distributed solution for the training of
Deep Neuronal Networks (DNNs). The presented results show, that the current
state of the art approach, using data-parallelized Stochastic Gradient Descent
(SGD), is quickly turning into a vastly communication bound problem. In
addition, we present simple but fixed theoretic constraints, preventing
effective scaling of DNN training beyond only a few dozen nodes. This leads to
poor scalability of DNN training in most practical scenarios.
Description
[1609.06870] Distributed Training of Deep Neural Networks: Theoretical and Practical Limits of Parallel Scalability
%0 Generic
%1 KeuPfr16Distributed
%A Keuper, Janis
%A Pfreundt, Franz-Josef
%D 2016
%K deep_learning distributed scaling
%T Distributed Training of Deep Neural Networks: Theoretical and Practical
Limits of Parallel Scalability
%U http://arxiv.org/abs/1609.06870
%X This paper presents a theoretical analysis and practical evaluation of the
main bottlenecks towards a scalable distributed solution for the training of
Deep Neuronal Networks (DNNs). The presented results show, that the current
state of the art approach, using data-parallelized Stochastic Gradient Descent
(SGD), is quickly turning into a vastly communication bound problem. In
addition, we present simple but fixed theoretic constraints, preventing
effective scaling of DNN training beyond only a few dozen nodes. This leads to
poor scalability of DNN training in most practical scenarios.
@misc{KeuPfr16Distributed,
abstract = {This paper presents a theoretical analysis and practical evaluation of the
main bottlenecks towards a scalable distributed solution for the training of
Deep Neuronal Networks (DNNs). The presented results show, that the current
state of the art approach, using data-parallelized Stochastic Gradient Descent
(SGD), is quickly turning into a vastly communication bound problem. In
addition, we present simple but fixed theoretic constraints, preventing
effective scaling of DNN training beyond only a few dozen nodes. This leads to
poor scalability of DNN training in most practical scenarios.},
added-at = {2018-06-26T08:37:07.000+0200},
author = {Keuper, Janis and Pfreundt, Franz-Josef},
biburl = {https://www.bibsonomy.org/bibtex/2ba5cf7e04a201bab3bc6dee6af62a60c/loroch},
description = {[1609.06870] Distributed Training of Deep Neural Networks: Theoretical and Practical Limits of Parallel Scalability},
interhash = {311316728de58f0f06036b6af1e7c76c},
intrahash = {ba5cf7e04a201bab3bc6dee6af62a60c},
keywords = {deep_learning distributed scaling},
note = {cite arxiv:1609.06870},
timestamp = {2018-06-26T08:37:07.000+0200},
title = {Distributed Training of Deep Neural Networks: Theoretical and Practical
Limits of Parallel Scalability},
url = {http://arxiv.org/abs/1609.06870},
year = 2016
}