Syntactic constituency parsing is a fundamental problem in natural language
processing and has been the subject of intensive research and engineering for
decades. As a result, the most accurate parsers are domain specific, complex,
and inefficient. In this paper we show that the domain agnostic
attention-enhanced sequence-to-sequence model achieves state-of-the-art results
on the most widely used syntactic constituency parsing dataset, when trained on
a large synthetic corpus that was annotated using existing parsers. It also
matches the performance of standard parsers when trained only on a small
human-annotated dataset, which shows that this model is highly data-efficient,
in contrast to sequence-to-sequence models without the attention mechanism. Our
parser is also fast, processing over a hundred sentences per second with an
unoptimized CPU implementation.
%0 Generic
%1 vinyals2014grammar
%A Vinyals, Oriol
%A Kaiser, Lukasz
%A Koo, Terry
%A Petrov, Slav
%A Sutskever, Ilya
%A Hinton, Geoffrey
%D 2014
%K nlp
%T Grammar as a Foreign Language
%U http://arxiv.org/abs/1412.7449
%X Syntactic constituency parsing is a fundamental problem in natural language
processing and has been the subject of intensive research and engineering for
decades. As a result, the most accurate parsers are domain specific, complex,
and inefficient. In this paper we show that the domain agnostic
attention-enhanced sequence-to-sequence model achieves state-of-the-art results
on the most widely used syntactic constituency parsing dataset, when trained on
a large synthetic corpus that was annotated using existing parsers. It also
matches the performance of standard parsers when trained only on a small
human-annotated dataset, which shows that this model is highly data-efficient,
in contrast to sequence-to-sequence models without the attention mechanism. Our
parser is also fast, processing over a hundred sentences per second with an
unoptimized CPU implementation.
@misc{vinyals2014grammar,
abstract = {Syntactic constituency parsing is a fundamental problem in natural language
processing and has been the subject of intensive research and engineering for
decades. As a result, the most accurate parsers are domain specific, complex,
and inefficient. In this paper we show that the domain agnostic
attention-enhanced sequence-to-sequence model achieves state-of-the-art results
on the most widely used syntactic constituency parsing dataset, when trained on
a large synthetic corpus that was annotated using existing parsers. It also
matches the performance of standard parsers when trained only on a small
human-annotated dataset, which shows that this model is highly data-efficient,
in contrast to sequence-to-sequence models without the attention mechanism. Our
parser is also fast, processing over a hundred sentences per second with an
unoptimized CPU implementation.},
added-at = {2019-01-15T12:09:50.000+0100},
author = {Vinyals, Oriol and Kaiser, Lukasz and Koo, Terry and Petrov, Slav and Sutskever, Ilya and Hinton, Geoffrey},
biburl = {https://www.bibsonomy.org/bibtex/27520219e6dcc41d90551fa95b2818a0e/bechr7},
description = {Grammar as a Foreign Language},
interhash = {c5b0edb5edc545270318aeb86184fce7},
intrahash = {7520219e6dcc41d90551fa95b2818a0e},
keywords = {nlp},
note = {cite arxiv:1412.7449},
timestamp = {2019-01-15T12:09:50.000+0100},
title = {Grammar as a Foreign Language},
url = {http://arxiv.org/abs/1412.7449},
year = 2014
}