Most current attempts at automatic speech recognition are formulated in an artificial intelligence framework. In this paper we approach the problem from an information-theoretic point of view. We describe the overall structure of a linguistic statistical decoder (LSD) for the recognition of continuous speech. The input to the decoder is a string of phonetic symbols estimated by an acoustic processor (AP). For each phonetic string, the decoder finds the most likely input sentence. The decoder consists of four major subparts: 1) a statistical model of the language being recognized; 2) a phonemic dictionary and statistical phonological rules characterizing the speaker; 3) a phonetic matching algorithm that computes the similarity between phonetic strings, using the performance characteristics of the AP; 4) a word level search control. The details of each of the subparts and their interaction during the decoding process are discussed.
%0 Journal Article
%1 Jelinek1975
%A Jelinek, Frederick
%A Bahl, Lalit R.
%A Mercer, Robert L.
%D 1975
%J IEEE Transactions on Information Theory
%K Decoding;Speech control;Automatic intelligence;Automatic languages;Speech recognition;Artificial recognition;Character recognition;Decoding;Dictionaries;Loudspeakers;Natural recognition;Vocabulary speech
%N 3
%P 250-256
%R 10.1109/TIT.1975.1055384
%T Design of a linguistic statistical decoder for the recognition of continuous speech
%V 21
%X Most current attempts at automatic speech recognition are formulated in an artificial intelligence framework. In this paper we approach the problem from an information-theoretic point of view. We describe the overall structure of a linguistic statistical decoder (LSD) for the recognition of continuous speech. The input to the decoder is a string of phonetic symbols estimated by an acoustic processor (AP). For each phonetic string, the decoder finds the most likely input sentence. The decoder consists of four major subparts: 1) a statistical model of the language being recognized; 2) a phonemic dictionary and statistical phonological rules characterizing the speaker; 3) a phonetic matching algorithm that computes the similarity between phonetic strings, using the performance characteristics of the AP; 4) a word level search control. The details of each of the subparts and their interaction during the decoding process are discussed.
@article{Jelinek1975,
abstract = {Most current attempts at automatic speech recognition are formulated in an artificial intelligence framework. In this paper we approach the problem from an information-theoretic point of view. We describe the overall structure of a linguistic statistical decoder (LSD) for the recognition of continuous speech. The input to the decoder is a string of phonetic symbols estimated by an acoustic processor (AP). For each phonetic string, the decoder finds the most likely input sentence. The decoder consists of four major subparts: 1) a statistical model of the language being recognized; 2) a phonemic dictionary and statistical phonological rules characterizing the speaker; 3) a phonetic matching algorithm that computes the similarity between phonetic strings, using the performance characteristics of the AP; 4) a word level search control. The details of each of the subparts and their interaction during the decoding process are discussed.},
added-at = {2021-02-01T10:51:23.000+0100},
author = {Jelinek, Frederick and Bahl, Lalit R. and Mercer, Robert L.},
biburl = {https://www.bibsonomy.org/bibtex/2cc3755cb9053e78805572e8849b5d38a/m-toman},
doi = {10.1109/TIT.1975.1055384},
file = {:pdfs/jelinek_transinftheo_1975.pdf:PDF},
interhash = {b87fd07f97cf06c0edeb831654d90e59},
intrahash = {cc3755cb9053e78805572e8849b5d38a},
issn = {0018-9448},
journal = {IEEE Transactions on Information Theory},
keywords = {Decoding;Speech control;Automatic intelligence;Automatic languages;Speech recognition;Artificial recognition;Character recognition;Decoding;Dictionaries;Loudspeakers;Natural recognition;Vocabulary speech},
month = may,
number = 3,
owner = {schabus},
pages = {250-256},
timestamp = {2021-02-01T10:51:23.000+0100},
title = {Design of a linguistic statistical decoder for the recognition of continuous speech},
volume = 21,
year = 1975
}