Compression of deep neural networks (DNNs) for memory- and
computation-efficient compact feature representations becomes a critical
problem particularly for deployment of DNNs on resource-limited platforms. In
this paper, we investigate lossy compression of DNNs by weight quantization and
lossless source coding for memory-efficient inference. Whereas the previous
work addressed non-universal scalar quantization and entropy coding of DNN
weights, we for the first time introduce universal DNN compression by universal
vector quantization and universal source coding. In particular, we examine
universal randomized lattice quantization of DNNs, which randomizes DNN weights
by uniform random dithering before lattice quantization and can perform
near-optimally on any source without relying on knowledge of its probability
distribution. Entropy coding schemes such as Huffman codes require prior
calculation of source statistics, which is computationally consuming. Instead,
we propose universal lossless source coding schemes such as variants of
Lempel-Ziv-Welch or the Burrows-Wheeler transform. Finally, we present the
methods of fine-tuning vector quantized DNNs to recover the performance loss
after quantization. Our experimental results show that the proposed universal
DNN compression scheme achieves compression ratios of 124.80, 47.10 and 42.46
for LeNet5, 32-layer ResNet and AlexNet, respectively.
%0 Generic
%1 choi2018universal
%A Choi, Yoojin
%A El-Khamy, Mostafa
%A Lee, Jungwon
%D 2018
%K compression
%T Universal Deep Neural Network Compression
%U http://arxiv.org/abs/1802.02271
%X Compression of deep neural networks (DNNs) for memory- and
computation-efficient compact feature representations becomes a critical
problem particularly for deployment of DNNs on resource-limited platforms. In
this paper, we investigate lossy compression of DNNs by weight quantization and
lossless source coding for memory-efficient inference. Whereas the previous
work addressed non-universal scalar quantization and entropy coding of DNN
weights, we for the first time introduce universal DNN compression by universal
vector quantization and universal source coding. In particular, we examine
universal randomized lattice quantization of DNNs, which randomizes DNN weights
by uniform random dithering before lattice quantization and can perform
near-optimally on any source without relying on knowledge of its probability
distribution. Entropy coding schemes such as Huffman codes require prior
calculation of source statistics, which is computationally consuming. Instead,
we propose universal lossless source coding schemes such as variants of
Lempel-Ziv-Welch or the Burrows-Wheeler transform. Finally, we present the
methods of fine-tuning vector quantized DNNs to recover the performance loss
after quantization. Our experimental results show that the proposed universal
DNN compression scheme achieves compression ratios of 124.80, 47.10 and 42.46
for LeNet5, 32-layer ResNet and AlexNet, respectively.
@misc{choi2018universal,
abstract = {Compression of deep neural networks (DNNs) for memory- and
computation-efficient compact feature representations becomes a critical
problem particularly for deployment of DNNs on resource-limited platforms. In
this paper, we investigate lossy compression of DNNs by weight quantization and
lossless source coding for memory-efficient inference. Whereas the previous
work addressed non-universal scalar quantization and entropy coding of DNN
weights, we for the first time introduce universal DNN compression by universal
vector quantization and universal source coding. In particular, we examine
universal randomized lattice quantization of DNNs, which randomizes DNN weights
by uniform random dithering before lattice quantization and can perform
near-optimally on any source without relying on knowledge of its probability
distribution. Entropy coding schemes such as Huffman codes require prior
calculation of source statistics, which is computationally consuming. Instead,
we propose universal lossless source coding schemes such as variants of
Lempel-Ziv-Welch or the Burrows-Wheeler transform. Finally, we present the
methods of fine-tuning vector quantized DNNs to recover the performance loss
after quantization. Our experimental results show that the proposed universal
DNN compression scheme achieves compression ratios of 124.80, 47.10 and 42.46
for LeNet5, 32-layer ResNet and AlexNet, respectively.},
added-at = {2018-02-10T13:03:36.000+0100},
author = {Choi, Yoojin and El-Khamy, Mostafa and Lee, Jungwon},
biburl = {https://www.bibsonomy.org/bibtex/22bff0c1518dc5c3913e94440c76e2c07/jk_itwm},
description = {1802.02271.pdf},
interhash = {933e7d7516ec9d5353404f5e64a50293},
intrahash = {2bff0c1518dc5c3913e94440c76e2c07},
keywords = {compression},
note = {cite arxiv:1802.02271},
timestamp = {2018-02-10T13:03:36.000+0100},
title = {Universal Deep Neural Network Compression},
url = {http://arxiv.org/abs/1802.02271},
year = 2018
}