Crowdsourcing has been the prevalent paradigm for creating natural language
understanding datasets in recent years. A common crowdsourcing practice is to
recruit a small number of high-quality workers, and have them massively
generate examples. Having only a few workers generate the majority of examples
raises concerns about data diversity, especially when workers freely generate
sentences. In this paper, we perform a series of experiments showing these
concerns are evident in three recent NLP datasets. We show that model
performance improves when training with annotator identifiers as features, and
that models are able to recognize the most productive annotators. Moreover, we
show that often models do not generalize well to examples from annotators that
did not contribute to the training set. Our findings suggest that annotator
bias should be monitored during dataset creation, and that test set annotators
should be disjoint from training set annotators.
Description
Are We Modeling the Task or the Annotator? An Investigation of Annotator Bias in Natural Language Understanding Datasets
%0 Generic
%1 geva2019modeling
%A Geva, Mor
%A Goldberg, Yoav
%A Berant, Jonathan
%D 2019
%K annotation dataset task
%T Are We Modeling the Task or the Annotator? An Investigation of Annotator
Bias in Natural Language Understanding Datasets
%U http://arxiv.org/abs/1908.07898
%X Crowdsourcing has been the prevalent paradigm for creating natural language
understanding datasets in recent years. A common crowdsourcing practice is to
recruit a small number of high-quality workers, and have them massively
generate examples. Having only a few workers generate the majority of examples
raises concerns about data diversity, especially when workers freely generate
sentences. In this paper, we perform a series of experiments showing these
concerns are evident in three recent NLP datasets. We show that model
performance improves when training with annotator identifiers as features, and
that models are able to recognize the most productive annotators. Moreover, we
show that often models do not generalize well to examples from annotators that
did not contribute to the training set. Our findings suggest that annotator
bias should be monitored during dataset creation, and that test set annotators
should be disjoint from training set annotators.
@misc{geva2019modeling,
abstract = {Crowdsourcing has been the prevalent paradigm for creating natural language
understanding datasets in recent years. A common crowdsourcing practice is to
recruit a small number of high-quality workers, and have them massively
generate examples. Having only a few workers generate the majority of examples
raises concerns about data diversity, especially when workers freely generate
sentences. In this paper, we perform a series of experiments showing these
concerns are evident in three recent NLP datasets. We show that model
performance improves when training with annotator identifiers as features, and
that models are able to recognize the most productive annotators. Moreover, we
show that often models do not generalize well to examples from annotators that
did not contribute to the training set. Our findings suggest that annotator
bias should be monitored during dataset creation, and that test set annotators
should be disjoint from training set annotators.},
added-at = {2021-06-01T16:42:26.000+0200},
author = {Geva, Mor and Goldberg, Yoav and Berant, Jonathan},
biburl = {https://www.bibsonomy.org/bibtex/29c97822e47ebfbe24c3aa51bed152ee8/parismic},
description = {Are We Modeling the Task or the Annotator? An Investigation of Annotator Bias in Natural Language Understanding Datasets},
interhash = {ca54bb16a70f0cd268c3e9639547e2fa},
intrahash = {9c97822e47ebfbe24c3aa51bed152ee8},
keywords = {annotation dataset task},
note = {cite arxiv:1908.07898Comment: EMNLP-IJCNLP 2019},
timestamp = {2021-06-01T16:42:26.000+0200},
title = {Are We Modeling the Task or the Annotator? An Investigation of Annotator
Bias in Natural Language Understanding Datasets},
url = {http://arxiv.org/abs/1908.07898},
year = 2019
}