The lottery ticket hypothesis proposes that over-parameterization of deep
neural networks (DNNs) aids training by increasing the probability of a "lucky"
sub-network initialization being present rather than by helping the
optimization process. This phenomenon is intriguing and suggests that
initialization strategies for DNNs can be improved substantially, but the
lottery ticket hypothesis has only previously been tested in the context of
supervised learning for natural image tasks. Here, we evaluate whether "winning
ticket" initializations exist in two different domains: reinforcement learning
(RL) and in natural language processing (NLP). For RL, we analyzed a number of
discrete-action space tasks, including both classic control and pixel control.
For NLP, we examined both recurrent LSTM models and large-scale Transformer
models. Consistent with work in supervised image classification, we confirm
that winning ticket initializations generally outperform parameter-matched
random initializations, even at extreme pruning rates. Together, these results
suggest that the lottery ticket hypothesis is not restricted to supervised
learning of natural images, but rather represents a broader phenomenon in DNNs.
Description
[1906.02768] Playing the lottery with rewards and multiple languages: lottery tickets in RL and NLP
%0 Journal Article
%1 yu2019playing
%A Yu, Haonan
%A Edunov, Sergey
%A Tian, Yuandong
%A Morcos, Ari S.
%D 2019
%K compression deep-learning sparsity theory
%T Playing the lottery with rewards and multiple languages: lottery tickets
in RL and NLP
%U http://arxiv.org/abs/1906.02768
%X The lottery ticket hypothesis proposes that over-parameterization of deep
neural networks (DNNs) aids training by increasing the probability of a "lucky"
sub-network initialization being present rather than by helping the
optimization process. This phenomenon is intriguing and suggests that
initialization strategies for DNNs can be improved substantially, but the
lottery ticket hypothesis has only previously been tested in the context of
supervised learning for natural image tasks. Here, we evaluate whether "winning
ticket" initializations exist in two different domains: reinforcement learning
(RL) and in natural language processing (NLP). For RL, we analyzed a number of
discrete-action space tasks, including both classic control and pixel control.
For NLP, we examined both recurrent LSTM models and large-scale Transformer
models. Consistent with work in supervised image classification, we confirm
that winning ticket initializations generally outperform parameter-matched
random initializations, even at extreme pruning rates. Together, these results
suggest that the lottery ticket hypothesis is not restricted to supervised
learning of natural images, but rather represents a broader phenomenon in DNNs.
@article{yu2019playing,
abstract = {The lottery ticket hypothesis proposes that over-parameterization of deep
neural networks (DNNs) aids training by increasing the probability of a "lucky"
sub-network initialization being present rather than by helping the
optimization process. This phenomenon is intriguing and suggests that
initialization strategies for DNNs can be improved substantially, but the
lottery ticket hypothesis has only previously been tested in the context of
supervised learning for natural image tasks. Here, we evaluate whether "winning
ticket" initializations exist in two different domains: reinforcement learning
(RL) and in natural language processing (NLP). For RL, we analyzed a number of
discrete-action space tasks, including both classic control and pixel control.
For NLP, we examined both recurrent LSTM models and large-scale Transformer
models. Consistent with work in supervised image classification, we confirm
that winning ticket initializations generally outperform parameter-matched
random initializations, even at extreme pruning rates. Together, these results
suggest that the lottery ticket hypothesis is not restricted to supervised
learning of natural images, but rather represents a broader phenomenon in DNNs.},
added-at = {2019-06-11T01:09:47.000+0200},
author = {Yu, Haonan and Edunov, Sergey and Tian, Yuandong and Morcos, Ari S.},
biburl = {https://www.bibsonomy.org/bibtex/206c046e05182b59f2acd986b5ceb7c00/kirk86},
description = {[1906.02768] Playing the lottery with rewards and multiple languages: lottery tickets in RL and NLP},
interhash = {6c8cef16c7f9dae4c2f29260db98b79d},
intrahash = {06c046e05182b59f2acd986b5ceb7c00},
keywords = {compression deep-learning sparsity theory},
note = {cite arxiv:1906.02768},
timestamp = {2019-06-11T01:09:47.000+0200},
title = {Playing the lottery with rewards and multiple languages: lottery tickets
in RL and NLP},
url = {http://arxiv.org/abs/1906.02768},
year = 2019
}