Echo State Networks (ESNs) have been shown to be effective for a number
of tasks, including motor control, dynamic time series prediction,
and memorizing musical sequences. However, their performance on natural
language tasks has been largely unexplored until now. Simple Recurrent
Networks (SRNs) have a long history in language modeling and show
a striking similarity in architecture to ESNs. A comparison of SRNs
and ESNs on a natural language task is therefore a natural choice
for experimentation. Elman applies SRNs to a standard task in statistical
NLP: predicting the next word in a corpus, given the previous words.
Using a simple context-free grammar and an SRN with backpropagation
through time (BPTT), Elman showed that the network was able to learn
internal representations that were sensitive to linguistic processes
that were useful for the prediction task. Here, using ESNs, we show
that training such internal representations is unnecessary to achieve
levels of performance comparable to SRNs. We also compare the processing
capabilities of ESNs to bigrams and trigrams. Due to some unexpected
regularities of Elman's grammar, these statistical techniques are
capable of maintaining dependencies over greater distances than might
be initially expected. However, we show that the memory of ESNs in
this word-prediction task, although noisy, extends significantly
beyond that of bigrams and trigrams, enabling ESNs to make good predictions
of verb agreement at distances over which these methods operate at
chance. Overall, our results indicate a surprising ability of ESNs
to learn a grammar, suggesting that they form useful internal representations
without learning them.
%0 Journal Article
%1 Tong2007
%A Tong, Matthew H
%A Bickett, Adam D
%A Christiansen, Eric M
%A Cottrell, Garrison W
%D 2007
%J Neural Networks
%K (Computer);TC Artificial Humans; Intelligence; Language; Learning; Models, Nerve Net; Networks Neural Neurological;
%N 3
%P 424--432
%R 10.1016/j.neunet.2007.04.013
%T Learning grammatical structure with Echo State Networks.
%U http://dx.doi.org/10.1016/j.neunet.2007.04.013
%V 20
%X Echo State Networks (ESNs) have been shown to be effective for a number
of tasks, including motor control, dynamic time series prediction,
and memorizing musical sequences. However, their performance on natural
language tasks has been largely unexplored until now. Simple Recurrent
Networks (SRNs) have a long history in language modeling and show
a striking similarity in architecture to ESNs. A comparison of SRNs
and ESNs on a natural language task is therefore a natural choice
for experimentation. Elman applies SRNs to a standard task in statistical
NLP: predicting the next word in a corpus, given the previous words.
Using a simple context-free grammar and an SRN with backpropagation
through time (BPTT), Elman showed that the network was able to learn
internal representations that were sensitive to linguistic processes
that were useful for the prediction task. Here, using ESNs, we show
that training such internal representations is unnecessary to achieve
levels of performance comparable to SRNs. We also compare the processing
capabilities of ESNs to bigrams and trigrams. Due to some unexpected
regularities of Elman's grammar, these statistical techniques are
capable of maintaining dependencies over greater distances than might
be initially expected. However, we show that the memory of ESNs in
this word-prediction task, although noisy, extends significantly
beyond that of bigrams and trigrams, enabling ESNs to make good predictions
of verb agreement at distances over which these methods operate at
chance. Overall, our results indicate a surprising ability of ESNs
to learn a grammar, suggesting that they form useful internal representations
without learning them.
@article{Tong2007,
abstract = {Echo State Networks (ESNs) have been shown to be effective for a number
of tasks, including motor control, dynamic time series prediction,
and memorizing musical sequences. However, their performance on natural
language tasks has been largely unexplored until now. Simple Recurrent
Networks (SRNs) have a long history in language modeling and show
a striking similarity in architecture to ESNs. A comparison of SRNs
and ESNs on a natural language task is therefore a natural choice
for experimentation. Elman applies SRNs to a standard task in statistical
NLP: predicting the next word in a corpus, given the previous words.
Using a simple context-free grammar and an SRN with backpropagation
through time (BPTT), Elman showed that the network was able to learn
internal representations that were sensitive to linguistic processes
that were useful for the prediction task. Here, using ESNs, we show
that training such internal representations is unnecessary to achieve
levels of performance comparable to SRNs. We also compare the processing
capabilities of ESNs to bigrams and trigrams. Due to some unexpected
regularities of Elman's grammar, these statistical techniques are
capable of maintaining dependencies over greater distances than might
be initially expected. However, we show that the memory of ESNs in
this word-prediction task, although noisy, extends significantly
beyond that of bigrams and trigrams, enabling ESNs to make good predictions
of verb agreement at distances over which these methods operate at
chance. Overall, our results indicate a surprising ability of ESNs
to learn a grammar, suggesting that they form useful internal representations
without learning them.},
added-at = {2007-12-16T20:00:22.000+0100},
author = {Tong, Matthew H and Bickett, Adam D and Christiansen, Eric M and Cottrell, Garrison W},
biburl = {https://www.bibsonomy.org/bibtex/24dd7b007c1982ed662de285833bc93e7/perceptron},
doi = {10.1016/j.neunet.2007.04.013},
interhash = {58b04bb06a61be1ae0436b9c1311cb37},
intrahash = {4dd7b007c1982ed662de285833bc93e7},
journal = {Neural Networks},
keywords = {(Computer);TC Artificial Humans; Intelligence; Language; Learning; Models, Nerve Net; Networks Neural Neurological;},
number = 3,
owner = {dvanderelst},
pages = {424--432},
pii = {S0893-6080(07)00035-4},
pmid = {17556116},
timestamp = {2007-12-16T20:00:27.000+0100},
title = {Learning grammatical structure with Echo State Networks.},
url = {http://dx.doi.org/10.1016/j.neunet.2007.04.013},
volume = 20,
year = 2007
}