One of the biggest challenges hindering progress in low-resource and
multilingual machine translation is the lack of good evaluation benchmarks.
Current evaluation benchmarks either lack good coverage of low-resource
languages, consider only restricted domains, or are low quality because they
are constructed using semi-automatic procedures. In this work, we introduce the
FLORES-101 evaluation benchmark, consisting of 3001 sentences extracted from
English Wikipedia and covering a variety of different topics and domains. These
sentences have been translated in 101 languages by professional translators
through a carefully controlled process. The resulting dataset enables better
assessment of model quality on the long tail of low-resource languages,
including the evaluation of many-to-many multilingual translation systems, as
all translations are multilingually aligned. By publicly releasing such a
high-quality and high-coverage dataset, we hope to foster progress in the
machine translation community and beyond.
Description
The FLORES-101 Evaluation Benchmark for Low-Resource and Multilingual Machine Translation
%0 Generic
%1 goyal2021flores101
%A Goyal, Naman
%A Gao, Cynthia
%A Chaudhary, Vishrav
%A Chen, Peng-Jen
%A Wenzek, Guillaume
%A Ju, Da
%A Krishnan, Sanjana
%A Ranzato, Marc'Aurelio
%A Guzman, Francisco
%A Fan, Angela
%D 2021
%K HornMT dataset
%T The FLORES-101 Evaluation Benchmark for Low-Resource and Multilingual
Machine Translation
%U http://arxiv.org/abs/2106.03193
%X One of the biggest challenges hindering progress in low-resource and
multilingual machine translation is the lack of good evaluation benchmarks.
Current evaluation benchmarks either lack good coverage of low-resource
languages, consider only restricted domains, or are low quality because they
are constructed using semi-automatic procedures. In this work, we introduce the
FLORES-101 evaluation benchmark, consisting of 3001 sentences extracted from
English Wikipedia and covering a variety of different topics and domains. These
sentences have been translated in 101 languages by professional translators
through a carefully controlled process. The resulting dataset enables better
assessment of model quality on the long tail of low-resource languages,
including the evaluation of many-to-many multilingual translation systems, as
all translations are multilingually aligned. By publicly releasing such a
high-quality and high-coverage dataset, we hope to foster progress in the
machine translation community and beyond.
@misc{goyal2021flores101,
abstract = {One of the biggest challenges hindering progress in low-resource and
multilingual machine translation is the lack of good evaluation benchmarks.
Current evaluation benchmarks either lack good coverage of low-resource
languages, consider only restricted domains, or are low quality because they
are constructed using semi-automatic procedures. In this work, we introduce the
FLORES-101 evaluation benchmark, consisting of 3001 sentences extracted from
English Wikipedia and covering a variety of different topics and domains. These
sentences have been translated in 101 languages by professional translators
through a carefully controlled process. The resulting dataset enables better
assessment of model quality on the long tail of low-resource languages,
including the evaluation of many-to-many multilingual translation systems, as
all translations are multilingually aligned. By publicly releasing such a
high-quality and high-coverage dataset, we hope to foster progress in the
machine translation community and beyond.},
added-at = {2021-12-11T23:32:03.000+0100},
author = {Goyal, Naman and Gao, Cynthia and Chaudhary, Vishrav and Chen, Peng-Jen and Wenzek, Guillaume and Ju, Da and Krishnan, Sanjana and Ranzato, Marc'Aurelio and Guzman, Francisco and Fan, Angela},
biburl = {https://www.bibsonomy.org/bibtex/23e9038e94dbdd62f4e61276f1e929d04/asmelash},
description = {The FLORES-101 Evaluation Benchmark for Low-Resource and Multilingual Machine Translation},
interhash = {1605f84324796cfd4051a7fcbdb83b6b},
intrahash = {3e9038e94dbdd62f4e61276f1e929d04},
keywords = {HornMT dataset},
note = {cite arxiv:2106.03193},
timestamp = {2021-12-11T23:32:03.000+0100},
title = {The FLORES-101 Evaluation Benchmark for Low-Resource and Multilingual
Machine Translation},
url = {http://arxiv.org/abs/2106.03193},
year = 2021
}