The early layers of a deep neural net have the fewest parameters, but take up
the most computation. In this extended abstract, we propose to only train the
hidden layers for a set portion of the training run, freezing them out
one-by-one and excluding them from the backward pass. Through experiments on
CIFAR, we empirically demonstrate that FreezeOut yields savings of up to 20%
wall-clock time during training with 3% loss in accuracy for DenseNets, a 20%
speedup without loss of accuracy for ResNets, and no improvement for VGG
networks. Our code is publicly available at
https://github.com/ajbrock/FreezeOut
Description
[1706.04983] FreezeOut: Accelerate Training by Progressively Freezing Layers
%0 Generic
%1 brock2017freezeout
%A Brock, Andrew
%A Lim, Theodore
%A Ritchie, J. M.
%A Weston, Nick
%D 2017
%K 2017 arxiv deep-learning
%T FreezeOut: Accelerate Training by Progressively Freezing Layers
%U http://arxiv.org/abs/1706.04983
%X The early layers of a deep neural net have the fewest parameters, but take up
the most computation. In this extended abstract, we propose to only train the
hidden layers for a set portion of the training run, freezing them out
one-by-one and excluding them from the backward pass. Through experiments on
CIFAR, we empirically demonstrate that FreezeOut yields savings of up to 20%
wall-clock time during training with 3% loss in accuracy for DenseNets, a 20%
speedup without loss of accuracy for ResNets, and no improvement for VGG
networks. Our code is publicly available at
https://github.com/ajbrock/FreezeOut
@misc{brock2017freezeout,
abstract = {The early layers of a deep neural net have the fewest parameters, but take up
the most computation. In this extended abstract, we propose to only train the
hidden layers for a set portion of the training run, freezing them out
one-by-one and excluding them from the backward pass. Through experiments on
CIFAR, we empirically demonstrate that FreezeOut yields savings of up to 20%
wall-clock time during training with 3% loss in accuracy for DenseNets, a 20%
speedup without loss of accuracy for ResNets, and no improvement for VGG
networks. Our code is publicly available at
https://github.com/ajbrock/FreezeOut},
added-at = {2018-06-10T09:51:53.000+0200},
author = {Brock, Andrew and Lim, Theodore and Ritchie, J. M. and Weston, Nick},
biburl = {https://www.bibsonomy.org/bibtex/20bec23d0fcbbf846cbca98e8b014bc33/achakraborty},
description = {[1706.04983] FreezeOut: Accelerate Training by Progressively Freezing Layers},
interhash = {31fb3f71a0ab68c915b1f4a3e6651c5c},
intrahash = {0bec23d0fcbbf846cbca98e8b014bc33},
keywords = {2017 arxiv deep-learning},
note = {cite arxiv:1706.04983Comment: Extended Abstract},
timestamp = {2018-06-10T09:52:06.000+0200},
title = {FreezeOut: Accelerate Training by Progressively Freezing Layers},
url = {http://arxiv.org/abs/1706.04983},
year = 2017
}