Language models pretrained on text from a wide variety of sources form the
foundation of today's NLP. In light of the success of these broad-coverage
models, we investigate whether it is still helpful to tailor a pretrained model
to the domain of a target task. We present a study across four domains
(biomedical and computer science publications, news, and reviews) and eight
classification tasks, showing that a second phase of pretraining in-domain
(domain-adaptive pretraining) leads to performance gains, under both high- and
low-resource settings. Moreover, adapting to the task's unlabeled data
(task-adaptive pretraining) improves performance even after domain-adaptive
pretraining. Finally, we show that adapting to a task corpus augmented using
simple data selection strategies is an effective alternative, especially when
resources for domain-adaptive pretraining might be unavailable. Overall, we
consistently find that multi-phase adaptive pretraining offers large gains in
task performance.
%0 Generic
%1 gururangan2020pretraining
%A Gururangan, Suchin
%A Marasović, Ana
%A Swayamdipta, Swabha
%A Lo, Kyle
%A Beltagy, Iz
%A Downey, Doug
%A Smith, Noah A.
%D 2020
%K adaptation deep deeplearning domain language learning machine model nlp pretraining training
%T Don't Stop Pretraining: Adapt Language Models to Domains and Tasks
%U http://arxiv.org/abs/2004.10964
%X Language models pretrained on text from a wide variety of sources form the
foundation of today's NLP. In light of the success of these broad-coverage
models, we investigate whether it is still helpful to tailor a pretrained model
to the domain of a target task. We present a study across four domains
(biomedical and computer science publications, news, and reviews) and eight
classification tasks, showing that a second phase of pretraining in-domain
(domain-adaptive pretraining) leads to performance gains, under both high- and
low-resource settings. Moreover, adapting to the task's unlabeled data
(task-adaptive pretraining) improves performance even after domain-adaptive
pretraining. Finally, we show that adapting to a task corpus augmented using
simple data selection strategies is an effective alternative, especially when
resources for domain-adaptive pretraining might be unavailable. Overall, we
consistently find that multi-phase adaptive pretraining offers large gains in
task performance.
@misc{gururangan2020pretraining,
abstract = {Language models pretrained on text from a wide variety of sources form the
foundation of today's NLP. In light of the success of these broad-coverage
models, we investigate whether it is still helpful to tailor a pretrained model
to the domain of a target task. We present a study across four domains
(biomedical and computer science publications, news, and reviews) and eight
classification tasks, showing that a second phase of pretraining in-domain
(domain-adaptive pretraining) leads to performance gains, under both high- and
low-resource settings. Moreover, adapting to the task's unlabeled data
(task-adaptive pretraining) improves performance even after domain-adaptive
pretraining. Finally, we show that adapting to a task corpus augmented using
simple data selection strategies is an effective alternative, especially when
resources for domain-adaptive pretraining might be unavailable. Overall, we
consistently find that multi-phase adaptive pretraining offers large gains in
task performance.},
added-at = {2020-09-22T08:40:53.000+0200},
author = {Gururangan, Suchin and Marasović, Ana and Swayamdipta, Swabha and Lo, Kyle and Beltagy, Iz and Downey, Doug and Smith, Noah A.},
biburl = {https://www.bibsonomy.org/bibtex/2f2447bc1b0f0882e52e02d5351051969/jaeschke},
description = {2004.10964.pdf},
interhash = {5c763619429ef5a5a82ccf5af07f01ac},
intrahash = {f2447bc1b0f0882e52e02d5351051969},
keywords = {adaptation deep deeplearning domain language learning machine model nlp pretraining training},
note = {cite arxiv:2004.10964Comment: ACL 2020},
timestamp = {2021-05-19T08:35:34.000+0200},
title = {Don't Stop Pretraining: Adapt Language Models to Domains and Tasks},
url = {http://arxiv.org/abs/2004.10964},
year = 2020
}