In this study, we propose advancing all-neural speech recognition by directly
incorporating attention modeling within the Connectionist Temporal
Classification (CTC) framework. In particular, we derive new context vectors
using time convolution features to model attention as part of the CTC network.
To further improve attention modeling, we utilize content information extracted
from a network representing an implicit language model. Finally, we introduce
vector based attention weights that are applied on context vectors across both
time and their individual components. We evaluate our system on a 3400 hours
Microsoft Cortana voice assistant task and demonstrate that our proposed model
consistently outperforms the baseline model achieving about 20% relative
reduction in word error rates.
Description
[1803.05563] Advancing Connectionist Temporal Classification With Attention Modeling
%0 Generic
%1 das2018advancing
%A Das, Amit
%A Li, Jinyu
%A Zhao, Rui
%A Gong, Yifan
%D 2018
%K attention ctc
%T Advancing Connectionist Temporal Classification With Attention Modeling
%U http://arxiv.org/abs/1803.05563
%X In this study, we propose advancing all-neural speech recognition by directly
incorporating attention modeling within the Connectionist Temporal
Classification (CTC) framework. In particular, we derive new context vectors
using time convolution features to model attention as part of the CTC network.
To further improve attention modeling, we utilize content information extracted
from a network representing an implicit language model. Finally, we introduce
vector based attention weights that are applied on context vectors across both
time and their individual components. We evaluate our system on a 3400 hours
Microsoft Cortana voice assistant task and demonstrate that our proposed model
consistently outperforms the baseline model achieving about 20% relative
reduction in word error rates.
@misc{das2018advancing,
abstract = {In this study, we propose advancing all-neural speech recognition by directly
incorporating attention modeling within the Connectionist Temporal
Classification (CTC) framework. In particular, we derive new context vectors
using time convolution features to model attention as part of the CTC network.
To further improve attention modeling, we utilize content information extracted
from a network representing an implicit language model. Finally, we introduce
vector based attention weights that are applied on context vectors across both
time and their individual components. We evaluate our system on a 3400 hours
Microsoft Cortana voice assistant task and demonstrate that our proposed model
consistently outperforms the baseline model achieving about 20% relative
reduction in word error rates.},
added-at = {2018-03-16T14:45:44.000+0100},
author = {Das, Amit and Li, Jinyu and Zhao, Rui and Gong, Yifan},
biburl = {https://www.bibsonomy.org/bibtex/2f493fa91cba2a8db550ef82b063f3248/rcb},
description = {[1803.05563] Advancing Connectionist Temporal Classification With Attention Modeling},
interhash = {c81729e5524f2cfe66709bd008e7a17d},
intrahash = {f493fa91cba2a8db550ef82b063f3248},
keywords = {attention ctc},
note = {cite arxiv:1803.05563Comment: Accepted at ICASSP 2018},
timestamp = {2018-03-16T14:45:44.000+0100},
title = {Advancing Connectionist Temporal Classification With Attention Modeling},
url = {http://arxiv.org/abs/1803.05563},
year = 2018
}