Efficiently executing convolutional neural nets (CNNs) is important in many
machine-learning tasks. Since the cost of moving a word of data, either between
levels of a memory hierarchy or between processors over a network, is much
higher than the cost of an arithmetic operation, minimizing data movement is
critical to performance optimization. In this paper, we present both new lower
bounds on data movement needed for CNNs, and optimal sequential algorithms that
attain these lower bounds. In most common cases, our optimal algorithms can
attain significantly more data reuse than matrix multiplication.
%0 Generic
%1 demmel2018communicationoptimal
%A Demmel, James
%A Dinh, Grace
%D 2018
%K CNN compression distributed to_read
%T Communication-Optimal Convolutional Neural Nets
%U http://arxiv.org/abs/1802.06905
%X Efficiently executing convolutional neural nets (CNNs) is important in many
machine-learning tasks. Since the cost of moving a word of data, either between
levels of a memory hierarchy or between processors over a network, is much
higher than the cost of an arithmetic operation, minimizing data movement is
critical to performance optimization. In this paper, we present both new lower
bounds on data movement needed for CNNs, and optimal sequential algorithms that
attain these lower bounds. In most common cases, our optimal algorithms can
attain significantly more data reuse than matrix multiplication.
@misc{demmel2018communicationoptimal,
abstract = {Efficiently executing convolutional neural nets (CNNs) is important in many
machine-learning tasks. Since the cost of moving a word of data, either between
levels of a memory hierarchy or between processors over a network, is much
higher than the cost of an arithmetic operation, minimizing data movement is
critical to performance optimization. In this paper, we present both new lower
bounds on data movement needed for CNNs, and optimal sequential algorithms that
attain these lower bounds. In most common cases, our optimal algorithms can
attain significantly more data reuse than matrix multiplication.},
added-at = {2018-02-21T11:57:39.000+0100},
author = {Demmel, James and Dinh, Grace},
biburl = {https://www.bibsonomy.org/bibtex/2b9528d44b07faf8aacc7d73fa48ac4e9/jk_itwm},
description = {Communication-Optimal Convolutional Neural Nets},
interhash = {866475616a014674dbdb882a076a0d85},
intrahash = {b9528d44b07faf8aacc7d73fa48ac4e9},
keywords = {CNN compression distributed to_read},
note = {cite arxiv:1802.06905},
timestamp = {2018-02-21T11:57:39.000+0100},
title = {Communication-Optimal Convolutional Neural Nets},
url = {http://arxiv.org/abs/1802.06905},
year = 2018
}