Hierarchical attention networks have recently achieved remarkable performance
for document classification in a given language. However, when multilingual
document collections are considered, training such models separately for each
language entails linear parameter growth and lack of cross-language transfer.
Learning a single multilingual model with fewer parameters is therefore a
challenging but potentially beneficial objective. To this end, we propose
multilingual hierarchical attention networks for learning document structures,
with shared encoders and/or attention mechanisms across languages, using
multi-task learning and an aligned semantic space as input. We evaluate the
proposed models on multilingual document classification with disjoint label
sets, on a large dataset which we provide, with 600k news documents in 8
languages, and 5k labels. The multilingual models outperform strong monolingual
ones in low-resource as well as full-resource settings, and use fewer
parameters, thus confirming their computational efficiency and the utility of
cross-language transfer.
Beschreibung
Multilingual Hierarchical Attention Networks for Document Classification
%0 Generic
%1 pappas2017multilingual
%A Pappas, Nikolaos
%A Popescu-Belis, Andrei
%D 2017
%K classification multilingualism
%T Multilingual Hierarchical Attention Networks for Document Classification
%U http://arxiv.org/abs/1707.00896
%X Hierarchical attention networks have recently achieved remarkable performance
for document classification in a given language. However, when multilingual
document collections are considered, training such models separately for each
language entails linear parameter growth and lack of cross-language transfer.
Learning a single multilingual model with fewer parameters is therefore a
challenging but potentially beneficial objective. To this end, we propose
multilingual hierarchical attention networks for learning document structures,
with shared encoders and/or attention mechanisms across languages, using
multi-task learning and an aligned semantic space as input. We evaluate the
proposed models on multilingual document classification with disjoint label
sets, on a large dataset which we provide, with 600k news documents in 8
languages, and 5k labels. The multilingual models outperform strong monolingual
ones in low-resource as well as full-resource settings, and use fewer
parameters, thus confirming their computational efficiency and the utility of
cross-language transfer.
@preprint{pappas2017multilingual,
abstract = {Hierarchical attention networks have recently achieved remarkable performance
for document classification in a given language. However, when multilingual
document collections are considered, training such models separately for each
language entails linear parameter growth and lack of cross-language transfer.
Learning a single multilingual model with fewer parameters is therefore a
challenging but potentially beneficial objective. To this end, we propose
multilingual hierarchical attention networks for learning document structures,
with shared encoders and/or attention mechanisms across languages, using
multi-task learning and an aligned semantic space as input. We evaluate the
proposed models on multilingual document classification with disjoint label
sets, on a large dataset which we provide, with 600k news documents in 8
languages, and 5k labels. The multilingual models outperform strong monolingual
ones in low-resource as well as full-resource settings, and use fewer
parameters, thus confirming their computational efficiency and the utility of
cross-language transfer.},
added-at = {2017-07-07T16:29:01.000+0200},
author = {Pappas, Nikolaos and Popescu-Belis, Andrei},
biburl = {https://www.bibsonomy.org/bibtex/289121702659711775cd1650f381fcc44/nik0spapp},
description = {Multilingual Hierarchical Attention Networks for Document Classification},
interhash = {a6c20bc48a5f5c33b38a7cdd8fe72935},
intrahash = {89121702659711775cd1650f381fcc44},
keywords = {classification multilingualism},
note = {cite arxiv:1707.00896},
timestamp = {2017-07-07T16:33:42.000+0200},
title = {Multilingual Hierarchical Attention Networks for Document Classification},
url = {http://arxiv.org/abs/1707.00896},
year = 2017
}