The past decade has witnessed exciting work in the field of Statistical Machine Translation (SMT). However, accurate evaluation of its potential in real-life contexts is still an open question. In this study, we investigate the behavior of an SMT engine faced with a corpus far different from the one it has been trained on. We show that terminological databases are obvious resources that should be used to boost the performance of a statistical engine. We propose and evaluate one way of integrating terminology into a SMT engine which yields a significant reduction in word error rate.
%0 Journal Article
%1 Langlais2004
%A Langlais, Philippe
%A Carl, Michael
%D 2004
%J Terminology
%K Estad{\'{\i}}stica,Internet,Traducci{\'{o}}n,Traducci{\'{o}}n autom{\'{a}}tica,World web wide
%N 1
%P 131--154
%T General-purpose statistical translation engine and domain specific texts: Would it work?
%U http://www.iai.uni-sb.de/~carl/papers/GP-SMT.ps
%V 10
%X The past decade has witnessed exciting work in the field of Statistical Machine Translation (SMT). However, accurate evaluation of its potential in real-life contexts is still an open question. In this study, we investigate the behavior of an SMT engine faced with a corpus far different from the one it has been trained on. We show that terminological databases are obvious resources that should be used to boost the performance of a statistical engine. We propose and evaluate one way of integrating terminology into a SMT engine which yields a significant reduction in word error rate.
%Z Language: eng
@article{Langlais2004,
abstract = {The past decade has witnessed exciting work in the field of Statistical Machine Translation (SMT). However, accurate evaluation of its potential in real-life contexts is still an open question. In this study, we investigate the behavior of an SMT engine faced with a corpus far different from the one it has been trained on. We show that terminological databases are obvious resources that should be used to boost the performance of a statistical engine. We propose and evaluate one way of integrating terminology into a SMT engine which yields a significant reduction in word error rate. },
added-at = {2015-12-01T11:33:23.000+0100},
annote = {Language: eng},
author = {Langlais, Philippe and Carl, Michael},
biburl = {https://www.bibsonomy.org/bibtex/29c7782dfd07355b28a9fb7b442bb9407/sofiagruiz92},
interhash = {41727fa155b52fb22a8cbd5ce9de406a},
intrahash = {9c7782dfd07355b28a9fb7b442bb9407},
journal = {Terminology},
keywords = {Estad{\'{\i}}stica,Internet,Traducci{\'{o}}n,Traducci{\'{o}}n autom{\'{a}}tica,World web wide},
number = 1,
pages = {131--154},
timestamp = {2015-12-01T11:33:23.000+0100},
title = {{General-purpose statistical translation engine and domain specific texts: Would it work?}},
url = {http://www.iai.uni-sb.de/{~}carl/papers/GP-SMT.ps},
volume = 10,
year = 2004
}