This paper introduces Adaptive Computation Time (ACT), an algorithm that
allows recurrent neural networks to learn how many computational steps to take
between receiving an input and emitting an output. ACT requires minimal changes
to the network architecture, is deterministic and differentiable, and does not
add any noise to the parameter gradients. Experimental results are provided for
four synthetic problems: determining the parity of binary vectors, applying
binary logic operations, adding integers, and sorting real numbers. Overall,
performance is dramatically improved by the use of ACT, which successfully
adapts the number of computational steps to the requirements of the problem. We
also present character-level language modelling results on the Hutter prize
Wikipedia dataset. In this case ACT does not yield large gains in performance;
however it does provide intriguing insight into the structure of the data, with
more computation allocated to harder-to-predict transitions, such as spaces
between words and ends of sentences. This suggests that ACT or other adaptive
computation methods could provide a generic method for inferring segment
boundaries in sequence data.
Description
[1603.08983] Adaptive Computation Time for Recurrent Neural Networks
%0 Generic
%1 graves2016adaptive
%A Graves, Alex
%D 2016
%K act rnn
%T Adaptive Computation Time for Recurrent Neural Networks
%U http://arxiv.org/abs/1603.08983
%X This paper introduces Adaptive Computation Time (ACT), an algorithm that
allows recurrent neural networks to learn how many computational steps to take
between receiving an input and emitting an output. ACT requires minimal changes
to the network architecture, is deterministic and differentiable, and does not
add any noise to the parameter gradients. Experimental results are provided for
four synthetic problems: determining the parity of binary vectors, applying
binary logic operations, adding integers, and sorting real numbers. Overall,
performance is dramatically improved by the use of ACT, which successfully
adapts the number of computational steps to the requirements of the problem. We
also present character-level language modelling results on the Hutter prize
Wikipedia dataset. In this case ACT does not yield large gains in performance;
however it does provide intriguing insight into the structure of the data, with
more computation allocated to harder-to-predict transitions, such as spaces
between words and ends of sentences. This suggests that ACT or other adaptive
computation methods could provide a generic method for inferring segment
boundaries in sequence data.
@misc{graves2016adaptive,
abstract = {This paper introduces Adaptive Computation Time (ACT), an algorithm that
allows recurrent neural networks to learn how many computational steps to take
between receiving an input and emitting an output. ACT requires minimal changes
to the network architecture, is deterministic and differentiable, and does not
add any noise to the parameter gradients. Experimental results are provided for
four synthetic problems: determining the parity of binary vectors, applying
binary logic operations, adding integers, and sorting real numbers. Overall,
performance is dramatically improved by the use of ACT, which successfully
adapts the number of computational steps to the requirements of the problem. We
also present character-level language modelling results on the Hutter prize
Wikipedia dataset. In this case ACT does not yield large gains in performance;
however it does provide intriguing insight into the structure of the data, with
more computation allocated to harder-to-predict transitions, such as spaces
between words and ends of sentences. This suggests that ACT or other adaptive
computation methods could provide a generic method for inferring segment
boundaries in sequence data.},
added-at = {2018-03-23T12:53:34.000+0100},
author = {Graves, Alex},
biburl = {https://www.bibsonomy.org/bibtex/2423864e2c7e894daa1b765dce3805e8e/rcb},
description = {[1603.08983] Adaptive Computation Time for Recurrent Neural Networks},
interhash = {960640a28f3e578a76d374b587717749},
intrahash = {423864e2c7e894daa1b765dce3805e8e},
keywords = {act rnn},
note = {cite arxiv:1603.08983},
timestamp = {2018-03-23T12:53:34.000+0100},
title = {Adaptive Computation Time for Recurrent Neural Networks},
url = {http://arxiv.org/abs/1603.08983},
year = 2016
}