For most deep learning practitioners, sequence modeling is synonymous with
recurrent networks. Yet recent results indicate that convolutional
architectures can outperform recurrent networks on tasks such as audio
synthesis and machine translation. Given a new sequence modeling task or
dataset, which architecture should one use? We conduct a systematic evaluation
of generic convolutional and recurrent architectures for sequence modeling. The
models are evaluated across a broad range of standard tasks that are commonly
used to benchmark recurrent networks. Our results indicate that a simple
convolutional architecture outperforms canonical recurrent networks such as
LSTMs across a diverse range of tasks and datasets, while demonstrating longer
effective memory. We conclude that the common association between sequence
modeling and recurrent networks should be reconsidered, and convolutional
networks should be regarded as a natural starting point for sequence modeling
tasks. To assist related work, we have made code available at
http://github.com/locuslab/TCN .
Description
An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling
%0 Generic
%1 bai2018empirical
%A Bai, Shaojie
%A Kolter, J. Zico
%A Koltun, Vladlen
%D 2018
%K cnn deep_learning sequence_modeling
%T An Empirical Evaluation of Generic Convolutional and Recurrent Networks
for Sequence Modeling
%U http://arxiv.org/abs/1803.01271
%X For most deep learning practitioners, sequence modeling is synonymous with
recurrent networks. Yet recent results indicate that convolutional
architectures can outperform recurrent networks on tasks such as audio
synthesis and machine translation. Given a new sequence modeling task or
dataset, which architecture should one use? We conduct a systematic evaluation
of generic convolutional and recurrent architectures for sequence modeling. The
models are evaluated across a broad range of standard tasks that are commonly
used to benchmark recurrent networks. Our results indicate that a simple
convolutional architecture outperforms canonical recurrent networks such as
LSTMs across a diverse range of tasks and datasets, while demonstrating longer
effective memory. We conclude that the common association between sequence
modeling and recurrent networks should be reconsidered, and convolutional
networks should be regarded as a natural starting point for sequence modeling
tasks. To assist related work, we have made code available at
http://github.com/locuslab/TCN .
@misc{bai2018empirical,
abstract = {For most deep learning practitioners, sequence modeling is synonymous with
recurrent networks. Yet recent results indicate that convolutional
architectures can outperform recurrent networks on tasks such as audio
synthesis and machine translation. Given a new sequence modeling task or
dataset, which architecture should one use? We conduct a systematic evaluation
of generic convolutional and recurrent architectures for sequence modeling. The
models are evaluated across a broad range of standard tasks that are commonly
used to benchmark recurrent networks. Our results indicate that a simple
convolutional architecture outperforms canonical recurrent networks such as
LSTMs across a diverse range of tasks and datasets, while demonstrating longer
effective memory. We conclude that the common association between sequence
modeling and recurrent networks should be reconsidered, and convolutional
networks should be regarded as a natural starting point for sequence modeling
tasks. To assist related work, we have made code available at
http://github.com/locuslab/TCN .},
added-at = {2020-07-14T20:45:28.000+0200},
author = {Bai, Shaojie and Kolter, J. Zico and Koltun, Vladlen},
biburl = {https://www.bibsonomy.org/bibtex/2a3f887593af7810527f7f70aa9cc786b/dallmann},
description = {An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling},
interhash = {916d486217a893422a2518fa736577cd},
intrahash = {a3f887593af7810527f7f70aa9cc786b},
keywords = {cnn deep_learning sequence_modeling},
note = {cite arxiv:1803.01271},
timestamp = {2020-07-14T20:45:28.000+0200},
title = {An Empirical Evaluation of Generic Convolutional and Recurrent Networks
for Sequence Modeling},
url = {http://arxiv.org/abs/1803.01271},
year = 2018
}