In this work we present the experiments which lead to the creation of our
BERT and ELECTRA based German language models, GBERT and GELECTRA. By varying
the input training data, model size, and the presence of Whole Word Masking
(WWM) we were able to attain SoTA performance across a set of document
classification and named entity recognition (NER) tasks for both models of base
and large size. We adopt an evaluation driven approach in training these models
and our results indicate that both adding more data and utilizing WWM improve
model performance. By benchmarking against existing German models, we show that
these models are the best German models to date. Our trained models will be
made publicly available to the research community.
%0 Generic
%1 chan2020germans
%A Chan, Branden
%A Schweter, Stefan
%A Möller, Timo
%D 2020
%K bert gbert languagemodel lm
%T German's Next Language Model
%U http://arxiv.org/abs/2010.10906
%X In this work we present the experiments which lead to the creation of our
BERT and ELECTRA based German language models, GBERT and GELECTRA. By varying
the input training data, model size, and the presence of Whole Word Masking
(WWM) we were able to attain SoTA performance across a set of document
classification and named entity recognition (NER) tasks for both models of base
and large size. We adopt an evaluation driven approach in training these models
and our results indicate that both adding more data and utilizing WWM improve
model performance. By benchmarking against existing German models, we show that
these models are the best German models to date. Our trained models will be
made publicly available to the research community.
@misc{chan2020germans,
abstract = {In this work we present the experiments which lead to the creation of our
BERT and ELECTRA based German language models, GBERT and GELECTRA. By varying
the input training data, model size, and the presence of Whole Word Masking
(WWM) we were able to attain SoTA performance across a set of document
classification and named entity recognition (NER) tasks for both models of base
and large size. We adopt an evaluation driven approach in training these models
and our results indicate that both adding more data and utilizing WWM improve
model performance. By benchmarking against existing German models, we show that
these models are the best German models to date. Our trained models will be
made publicly available to the research community.},
added-at = {2023-01-19T10:34:13.000+0100},
author = {Chan, Branden and Schweter, Stefan and Möller, Timo},
biburl = {https://www.bibsonomy.org/bibtex/26dcbc2196481189fd2666b645980e4df/albinzehe},
description = {German's Next Language Model},
interhash = {094bcd9b41d50f003184a4b742c1909f},
intrahash = {6dcbc2196481189fd2666b645980e4df},
keywords = {bert gbert languagemodel lm},
note = {cite arxiv:2010.10906Comment: Accepted by COLING2020},
timestamp = {2023-01-19T10:34:13.000+0100},
title = {German's Next Language Model},
url = {http://arxiv.org/abs/2010.10906},
year = 2020
}