In this work, we release COVID-Twitter-BERT (CT-BERT), a transformer-based
model, pretrained on a large corpus of Twitter messages on the topic of
COVID-19. Our model shows a 10-30% marginal improvement compared to its base
model, BERT-Large, on five different classification datasets. The largest
improvements are on the target domain. Pretrained transformer models, such as
CT-BERT, are trained on a specific target domain and can be used for a wide
variety of natural language processing tasks, including classification,
question-answering and chatbots. CT-BERT is optimised to be used on COVID-19
content, in particular social media posts from Twitter.
Description
COVID-Twitter-BERT: A Natural Language Processing Model to Analyse COVID-19 Content on Twitter
%0 Generic
%1 muller2020covidtwitterbert
%A Müller, Martin
%A Salathé, Marcel
%A Kummervold, Per E
%D 2020
%K antrag bert covid deconspire language model nlp twitter
%T COVID-Twitter-BERT: A Natural Language Processing Model to Analyse
COVID-19 Content on Twitter
%U http://arxiv.org/abs/2005.07503
%X In this work, we release COVID-Twitter-BERT (CT-BERT), a transformer-based
model, pretrained on a large corpus of Twitter messages on the topic of
COVID-19. Our model shows a 10-30% marginal improvement compared to its base
model, BERT-Large, on five different classification datasets. The largest
improvements are on the target domain. Pretrained transformer models, such as
CT-BERT, are trained on a specific target domain and can be used for a wide
variety of natural language processing tasks, including classification,
question-answering and chatbots. CT-BERT is optimised to be used on COVID-19
content, in particular social media posts from Twitter.
@misc{muller2020covidtwitterbert,
abstract = {In this work, we release COVID-Twitter-BERT (CT-BERT), a transformer-based
model, pretrained on a large corpus of Twitter messages on the topic of
COVID-19. Our model shows a 10-30% marginal improvement compared to its base
model, BERT-Large, on five different classification datasets. The largest
improvements are on the target domain. Pretrained transformer models, such as
CT-BERT, are trained on a specific target domain and can be used for a wide
variety of natural language processing tasks, including classification,
question-answering and chatbots. CT-BERT is optimised to be used on COVID-19
content, in particular social media posts from Twitter.},
added-at = {2020-09-14T23:04:08.000+0200},
author = {Müller, Martin and Salathé, Marcel and Kummervold, Per E},
biburl = {https://www.bibsonomy.org/bibtex/28cd18e9d5662cb1036a86e066a6e0df4/schwemmlein},
description = {COVID-Twitter-BERT: A Natural Language Processing Model to Analyse COVID-19 Content on Twitter},
interhash = {7e5919fdd413eae4c980e7897c98c356},
intrahash = {8cd18e9d5662cb1036a86e066a6e0df4},
keywords = {antrag bert covid deconspire language model nlp twitter},
note = {cite arxiv:2005.07503},
timestamp = {2020-09-14T23:04:08.000+0200},
title = {COVID-Twitter-BERT: A Natural Language Processing Model to Analyse
COVID-19 Content on Twitter},
url = {http://arxiv.org/abs/2005.07503},
year = 2020
}