Deep convolutional networks are well-known for their high computational and
memory demands. Given limited resources, how does one design a network that
balances its size, training time, and prediction accuracy? A surprisingly
effective approach to trade accuracy for size and speed is to simply reduce the
number of channels in each convolutional layer by a fixed fraction and retrain
the network. In many cases this leads to significantly smaller networks with
only minimal changes to accuracy. In this paper, we take a step further by
empirically examining a strategy for deactivating connections between filters
in convolutional layers in a way that allows us to harvest savings both in
run-time and memory for many network architectures. More specifically, we
generalize 2D convolution to use a channel-wise sparse connection structure and
show that this leads to significantly better results than the baseline approach
for large networks including VGG and Inception V3.
Описание
[1702.06257] The Power of Sparsity in Convolutional Neural Networks
%0 Generic
%1 changpinyo2017power
%A Changpinyo, Soravit
%A Sandler, Mark
%A Zhmoginov, Andrey
%D 2017
%K cnn deep_learning incremental_learning training sparse sparsity
%T The Power of Sparsity in Convolutional Neural Networks
%U http://arxiv.org/abs/1702.06257
%X Deep convolutional networks are well-known for their high computational and
memory demands. Given limited resources, how does one design a network that
balances its size, training time, and prediction accuracy? A surprisingly
effective approach to trade accuracy for size and speed is to simply reduce the
number of channels in each convolutional layer by a fixed fraction and retrain
the network. In many cases this leads to significantly smaller networks with
only minimal changes to accuracy. In this paper, we take a step further by
empirically examining a strategy for deactivating connections between filters
in convolutional layers in a way that allows us to harvest savings both in
run-time and memory for many network architectures. More specifically, we
generalize 2D convolution to use a channel-wise sparse connection structure and
show that this leads to significantly better results than the baseline approach
for large networks including VGG and Inception V3.
@misc{changpinyo2017power,
abstract = {Deep convolutional networks are well-known for their high computational and
memory demands. Given limited resources, how does one design a network that
balances its size, training time, and prediction accuracy? A surprisingly
effective approach to trade accuracy for size and speed is to simply reduce the
number of channels in each convolutional layer by a fixed fraction and retrain
the network. In many cases this leads to significantly smaller networks with
only minimal changes to accuracy. In this paper, we take a step further by
empirically examining a strategy for deactivating connections between filters
in convolutional layers in a way that allows us to harvest savings both in
run-time and memory for many network architectures. More specifically, we
generalize 2D convolution to use a channel-wise sparse connection structure and
show that this leads to significantly better results than the baseline approach
for large networks including VGG and Inception V3.},
added-at = {2018-06-13T17:25:28.000+0200},
author = {Changpinyo, Soravit and Sandler, Mark and Zhmoginov, Andrey},
biburl = {https://www.bibsonomy.org/bibtex/2b7a79736629e4119cd846051b9634bfc/loroch},
description = {[1702.06257] The Power of Sparsity in Convolutional Neural Networks},
interhash = {cb6e48a9eb57eafaa2f341f56ca90078},
intrahash = {b7a79736629e4119cd846051b9634bfc},
keywords = {cnn deep_learning incremental_learning training sparse sparsity},
note = {cite arxiv:1702.06257},
timestamp = {2018-06-16T10:46:03.000+0200},
title = {The Power of Sparsity in Convolutional Neural Networks},
url = {http://arxiv.org/abs/1702.06257},
year = 2017
}