The recent "Lottery Ticket Hypothesis" paper by Frankle & Carbin showed that
a simple approach to creating sparse networks (keep the large weights) results
in models that are trainable from scratch, but only when starting from the same
initial weights. The performance of these networks often exceeds the
performance of the non-sparse base model, but for reasons that were not well
understood. In this paper we study the three critical components of the Lottery
Ticket (LT) algorithm, showing that each may be varied significantly without
impacting the overall results. Ablating these factors leads to new insights for
why LT networks perform as well as they do. We show why setting weights to zero
is important, how signs are all you need to make the re-initialized network
train, and why masking behaves like training. Finally, we discover the
existence of Supermasks, or masks that can be applied to an untrained, randomly
initialized network to produce a model with performance far better than chance
(86% on MNIST, 41% on CIFAR-10).
Description
[1905.01067] Deconstructing Lottery Tickets: Zeros, Signs, and the Supermask
%0 Generic
%1 zhou2019deconstructing
%A Zhou, Hattie
%A Lan, Janice
%A Liu, Rosanne
%A Yosinski, Jason
%D 2019
%K nas pruning
%T Deconstructing Lottery Tickets: Zeros, Signs, and the Supermask
%U http://arxiv.org/abs/1905.01067
%X The recent "Lottery Ticket Hypothesis" paper by Frankle & Carbin showed that
a simple approach to creating sparse networks (keep the large weights) results
in models that are trainable from scratch, but only when starting from the same
initial weights. The performance of these networks often exceeds the
performance of the non-sparse base model, but for reasons that were not well
understood. In this paper we study the three critical components of the Lottery
Ticket (LT) algorithm, showing that each may be varied significantly without
impacting the overall results. Ablating these factors leads to new insights for
why LT networks perform as well as they do. We show why setting weights to zero
is important, how signs are all you need to make the re-initialized network
train, and why masking behaves like training. Finally, we discover the
existence of Supermasks, or masks that can be applied to an untrained, randomly
initialized network to produce a model with performance far better than chance
(86% on MNIST, 41% on CIFAR-10).
@misc{zhou2019deconstructing,
abstract = {The recent "Lottery Ticket Hypothesis" paper by Frankle & Carbin showed that
a simple approach to creating sparse networks (keep the large weights) results
in models that are trainable from scratch, but only when starting from the same
initial weights. The performance of these networks often exceeds the
performance of the non-sparse base model, but for reasons that were not well
understood. In this paper we study the three critical components of the Lottery
Ticket (LT) algorithm, showing that each may be varied significantly without
impacting the overall results. Ablating these factors leads to new insights for
why LT networks perform as well as they do. We show why setting weights to zero
is important, how signs are all you need to make the re-initialized network
train, and why masking behaves like training. Finally, we discover the
existence of Supermasks, or masks that can be applied to an untrained, randomly
initialized network to produce a model with performance far better than chance
(86% on MNIST, 41% on CIFAR-10).},
added-at = {2019-05-09T12:21:13.000+0200},
author = {Zhou, Hattie and Lan, Janice and Liu, Rosanne and Yosinski, Jason},
biburl = {https://www.bibsonomy.org/bibtex/21fbbc84c4d73ea345e1c74d5bfeb7d50/straybird321},
description = {[1905.01067] Deconstructing Lottery Tickets: Zeros, Signs, and the Supermask},
interhash = {2494e71abc195dd7f041a8bc98fce481},
intrahash = {1fbbc84c4d73ea345e1c74d5bfeb7d50},
keywords = {nas pruning},
note = {cite arxiv:1905.01067},
timestamp = {2019-05-09T12:21:13.000+0200},
title = {Deconstructing Lottery Tickets: Zeros, Signs, and the Supermask},
url = {http://arxiv.org/abs/1905.01067},
year = 2019
}