In this work we explore recent advances in Recurrent Neural Networks for
large scale Language Modeling, a task central to language understanding. We
extend current models to deal with two key challenges present in this task:
corpora and vocabulary sizes, and complex, long term structure of language. We
perform an exhaustive study on techniques such as character Convolutional
Neural Networks or Long-Short Term Memory, on the One Billion Word Benchmark.
Our best single model significantly improves state-of-the-art perplexity from
51.3 down to 30.0 (whilst reducing the number of parameters by a factor of 20),
while an ensemble of models sets a new record by improving perplexity from 41.0
down to 23.7. We also release these models for the NLP and ML community to
study and improve upon.
%0 Generic
%1 jozefowicz2016exploring
%A Jozefowicz, Rafal
%A Vinyals, Oriol
%A Schuster, Mike
%A Shazeer, Noam
%A Wu, Yonghui
%D 2016
%K LM RNN language
%T Exploring the Limits of Language Modeling
%U http://arxiv.org/abs/1602.02410
%X In this work we explore recent advances in Recurrent Neural Networks for
large scale Language Modeling, a task central to language understanding. We
extend current models to deal with two key challenges present in this task:
corpora and vocabulary sizes, and complex, long term structure of language. We
perform an exhaustive study on techniques such as character Convolutional
Neural Networks or Long-Short Term Memory, on the One Billion Word Benchmark.
Our best single model significantly improves state-of-the-art perplexity from
51.3 down to 30.0 (whilst reducing the number of parameters by a factor of 20),
while an ensemble of models sets a new record by improving perplexity from 41.0
down to 23.7. We also release these models for the NLP and ML community to
study and improve upon.
@misc{jozefowicz2016exploring,
abstract = {In this work we explore recent advances in Recurrent Neural Networks for
large scale Language Modeling, a task central to language understanding. We
extend current models to deal with two key challenges present in this task:
corpora and vocabulary sizes, and complex, long term structure of language. We
perform an exhaustive study on techniques such as character Convolutional
Neural Networks or Long-Short Term Memory, on the One Billion Word Benchmark.
Our best single model significantly improves state-of-the-art perplexity from
51.3 down to 30.0 (whilst reducing the number of parameters by a factor of 20),
while an ensemble of models sets a new record by improving perplexity from 41.0
down to 23.7. We also release these models for the NLP and ML community to
study and improve upon.},
added-at = {2017-08-18T11:08:42.000+0200},
author = {Jozefowicz, Rafal and Vinyals, Oriol and Schuster, Mike and Shazeer, Noam and Wu, Yonghui},
biburl = {https://www.bibsonomy.org/bibtex/2e24ccc844d77cb5bdf9be2a67ced795a/daschloer},
description = {Exploring the Limits of Language Modeling},
interhash = {385a75751ec609d262666d6f69bbe0c2},
intrahash = {e24ccc844d77cb5bdf9be2a67ced795a},
keywords = {LM RNN language},
note = {cite arxiv:1602.02410},
timestamp = {2017-10-04T16:28:59.000+0200},
title = {Exploring the Limits of Language Modeling},
url = {http://arxiv.org/abs/1602.02410},
year = 2016
}