The ability to train large-scale neural networks has resulted in
state-of-the-art performance in many areas of computer vision. These results
have largely come from computational break throughs of two forms: model
parallelism, e.g. GPU accelerated training, which has seen quick adoption in
computer vision circles, and data parallelism, e.g. A-SGD, whose large scale
has been used mostly in industry. We report early experiments with a system
that makes use of both model parallelism and data parallelism, we call GPU
A-SGD. We show using GPU A-SGD it is possible to speed up training of large
convolutional neural networks useful for computer vision. We believe GPU A-SGD
will make it possible to train larger networks on larger training sets in a
reasonable amount of time.
Description
GPU Asynchronous Stochastic Gradient Descent to Speed Up Neural Network Training
%0 Generic
%1 paine2013asynchronous
%A Paine, Thomas
%A Jin, Hailin
%A Yang, Jianchao
%A Lin, Zhe
%A Huang, Thomas
%D 2013
%K deep dl large-scale networks neural
%T GPU Asynchronous Stochastic Gradient Descent to Speed Up Neural Network
Training
%U http://arxiv.org/abs/1312.6186
%X The ability to train large-scale neural networks has resulted in
state-of-the-art performance in many areas of computer vision. These results
have largely come from computational break throughs of two forms: model
parallelism, e.g. GPU accelerated training, which has seen quick adoption in
computer vision circles, and data parallelism, e.g. A-SGD, whose large scale
has been used mostly in industry. We report early experiments with a system
that makes use of both model parallelism and data parallelism, we call GPU
A-SGD. We show using GPU A-SGD it is possible to speed up training of large
convolutional neural networks useful for computer vision. We believe GPU A-SGD
will make it possible to train larger networks on larger training sets in a
reasonable amount of time.
@misc{paine2013asynchronous,
abstract = {The ability to train large-scale neural networks has resulted in
state-of-the-art performance in many areas of computer vision. These results
have largely come from computational break throughs of two forms: model
parallelism, e.g. GPU accelerated training, which has seen quick adoption in
computer vision circles, and data parallelism, e.g. A-SGD, whose large scale
has been used mostly in industry. We report early experiments with a system
that makes use of both model parallelism and data parallelism, we call GPU
A-SGD. We show using GPU A-SGD it is possible to speed up training of large
convolutional neural networks useful for computer vision. We believe GPU A-SGD
will make it possible to train larger networks on larger training sets in a
reasonable amount of time.},
added-at = {2019-06-04T16:20:53.000+0200},
author = {Paine, Thomas and Jin, Hailin and Yang, Jianchao and Lin, Zhe and Huang, Thomas},
biburl = {https://www.bibsonomy.org/bibtex/23da98903a9d2e9ed2c335441c0d083a0/alrigazzi},
description = {GPU Asynchronous Stochastic Gradient Descent to Speed Up Neural Network Training},
interhash = {99711f68dbb04df2498cef7111ef1126},
intrahash = {3da98903a9d2e9ed2c335441c0d083a0},
keywords = {deep dl large-scale networks neural},
note = {cite arxiv:1312.6186Comment: 6 pages, 4 figures},
timestamp = {2019-06-04T16:20:53.000+0200},
title = {GPU Asynchronous Stochastic Gradient Descent to Speed Up Neural Network
Training},
url = {http://arxiv.org/abs/1312.6186},
year = 2013
}