Many real-world sequence learning tasks require the prediction of sequences of labels from noisy, unsegmented input data. In speech recognition, for example, an acoustic signal is transcribed into words or sub-word units. Recurrent neural networks (RNNs) are powerful sequence learners that would seem well suited to such tasks. However, because they require pre-segmented training data, and post-processing to transform their outputs into label sequences, their applicability has so far been limited. This paper presents a novel method for training RNNs to label unsegmented sequences directly, thereby solving both problems. An experiment on the TIMIT speech corpus demonstrates its advantages over both a baseline HMM and a hybrid HMM-RNN.
%0 Conference Paper
%1 graves2006connectionist
%A Graves, Alex
%A Fernández, Santiago
%A Gomez, Faustino
%A Schmidhuber, Jürgen
%B Proceedings of the 23rd International Conference on Machine Learning
%C New York, NY, USA
%D 2006
%I ACM
%K chm1320
%P 369--376
%R 10.1145/1143844.1143891
%T Connectionist Temporal Classification: Labelling Unsegmented Sequence Data with Recurrent Neural Networks
%U http://dx.doi.org/10.1145/1143844.1143891
%X Many real-world sequence learning tasks require the prediction of sequences of labels from noisy, unsegmented input data. In speech recognition, for example, an acoustic signal is transcribed into words or sub-word units. Recurrent neural networks (RNNs) are powerful sequence learners that would seem well suited to such tasks. However, because they require pre-segmented training data, and post-processing to transform their outputs into label sequences, their applicability has so far been limited. This paper presents a novel method for training RNNs to label unsegmented sequences directly, thereby solving both problems. An experiment on the TIMIT speech corpus demonstrates its advantages over both a baseline HMM and a hybrid HMM-RNN.
%@ 1-59593-383-2
@inproceedings{graves2006connectionist,
abstract = {{Many real-world sequence learning tasks require the prediction of sequences of labels from noisy, unsegmented input data. In speech recognition, for example, an acoustic signal is transcribed into words or sub-word units. Recurrent neural networks (RNNs) are powerful sequence learners that would seem well suited to such tasks. However, because they require pre-segmented training data, and post-processing to transform their outputs into label sequences, their applicability has so far been limited. This paper presents a novel method for training RNNs to label unsegmented sequences directly, thereby solving both problems. An experiment on the TIMIT speech corpus demonstrates its advantages over both a baseline HMM and a hybrid HMM-RNN.}},
added-at = {2017-07-19T15:29:59.000+0200},
address = {New York, NY, USA},
author = {Graves, Alex and Fern\'{a}ndez, Santiago and Gomez, Faustino and Schmidhuber, J\"{u}rgen},
biburl = {https://www.bibsonomy.org/bibtex/29ce1d217d3e7d79722b1ba4aa07aeec1/andreashdez},
booktitle = {Proceedings of the 23rd International Conference on Machine Learning},
citeulike-article-id = {2624480},
citeulike-linkout-0 = {http://portal.acm.org/citation.cfm?id=1143891},
citeulike-linkout-1 = {http://dx.doi.org/10.1145/1143844.1143891},
doi = {10.1145/1143844.1143891},
interhash = {1aa5d87a1fa14f70223abb075f93bac5},
intrahash = {9ce1d217d3e7d79722b1ba4aa07aeec1},
isbn = {1-59593-383-2},
keywords = {chm1320},
location = {Pittsburgh, Pennsylvania, USA},
pages = {369--376},
posted-at = {2017-05-24 22:23:37},
priority = {2},
publisher = {ACM},
series = {ICML '06},
timestamp = {2017-07-19T15:31:02.000+0200},
title = {{Connectionist Temporal Classification: Labelling Unsegmented Sequence Data with Recurrent Neural Networks}},
url = {http://dx.doi.org/10.1145/1143844.1143891},
year = 2006
}