We explore techniques to significantly improve the compute efficiency and
performance of Deep Convolution Networks without impacting their accuracy. To
improve the compute efficiency, we focus on achieving high accuracy with
extremely low-precision (2-bit) weight networks, and to accelerate the
execution time, we aggressively skip operations on zero-values. We achieve the
highest reported accuracy of 76.6% Top-1/93% Top-5 on the Imagenet object
classification challenge with low-precision networkgithub release of
the source code coming soon while reducing the compute requirement by ~3x
compared to a full-precision network that achieves similar accuracy.
Furthermore, to fully exploit the benefits of our low-precision networks, we
build a deep learning accelerator core, dLAC, that can achieve up to 1
TFLOP/mm^2 equivalent for single-precision floating-point operations (~2
TFLOP/mm^2 for half-precision).
%0 Generic
%1 venkatesh2016accelerating
%A Venkatesh, Ganesh
%A Nurvitadhi, Eriko
%A Marr, Debbie
%D 2016
%K acceleration architecture cnn deep_learning quantization sparse ternary zero_skipping
%T Accelerating Deep Convolutional Networks using low-precision and
sparsity
%U http://arxiv.org/abs/1610.00324
%X We explore techniques to significantly improve the compute efficiency and
performance of Deep Convolution Networks without impacting their accuracy. To
improve the compute efficiency, we focus on achieving high accuracy with
extremely low-precision (2-bit) weight networks, and to accelerate the
execution time, we aggressively skip operations on zero-values. We achieve the
highest reported accuracy of 76.6% Top-1/93% Top-5 on the Imagenet object
classification challenge with low-precision networkgithub release of
the source code coming soon while reducing the compute requirement by ~3x
compared to a full-precision network that achieves similar accuracy.
Furthermore, to fully exploit the benefits of our low-precision networks, we
build a deep learning accelerator core, dLAC, that can achieve up to 1
TFLOP/mm^2 equivalent for single-precision floating-point operations (~2
TFLOP/mm^2 for half-precision).
@misc{venkatesh2016accelerating,
abstract = {We explore techniques to significantly improve the compute efficiency and
performance of Deep Convolution Networks without impacting their accuracy. To
improve the compute efficiency, we focus on achieving high accuracy with
extremely low-precision (2-bit) weight networks, and to accelerate the
execution time, we aggressively skip operations on zero-values. We achieve the
highest reported accuracy of 76.6% Top-1/93% Top-5 on the Imagenet object
classification challenge with low-precision network\footnote{github release of
the source code coming soon} while reducing the compute requirement by ~3x
compared to a full-precision network that achieves similar accuracy.
Furthermore, to fully exploit the benefits of our low-precision networks, we
build a deep learning accelerator core, dLAC, that can achieve up to 1
TFLOP/mm^2 equivalent for single-precision floating-point operations (~2
TFLOP/mm^2 for half-precision).},
added-at = {2018-06-16T11:09:17.000+0200},
author = {Venkatesh, Ganesh and Nurvitadhi, Eriko and Marr, Debbie},
biburl = {https://www.bibsonomy.org/bibtex/2a516c127b80e43566bb647de6228678f/loroch},
description = {1610.00324.pdf},
interhash = {f0458cc5447386825564470f7e33693a},
intrahash = {a516c127b80e43566bb647de6228678f},
keywords = {acceleration architecture cnn deep_learning quantization sparse ternary zero_skipping},
note = {cite arxiv:1610.00324},
timestamp = {2018-06-16T11:11:45.000+0200},
title = {Accelerating Deep Convolutional Networks using low-precision and
sparsity},
url = {http://arxiv.org/abs/1610.00324},
year = 2016
}