In the last year, new models and methods for pretraining and transfer
learning have driven striking performance improvements across a range of
language understanding tasks. The GLUE benchmark, introduced a little over one
year ago, offers a single-number metric that summarizes progress on a diverse
set of such tasks, but performance on the benchmark has recently surpassed the
level of non-expert humans, suggesting limited headroom for further research.
In this paper we present SuperGLUE, a new benchmark styled after GLUE with a
new set of more difficult language understanding tasks, a software toolkit, and
a public leaderboard. SuperGLUE is available at super.gluebenchmark.com.
Description
[1905.00537] SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems
%0 Generic
%1 wang2019superglue
%A Wang, Alex
%A Pruksachatkun, Yada
%A Nangia, Nikita
%A Singh, Amanpreet
%A Michael, Julian
%A Hill, Felix
%A Levy, Omer
%A Bowman, Samuel R.
%D 2019
%K idea:bee_audio_llm idea:big_data_geo_2 superglue transformer
%T SuperGLUE: A Stickier Benchmark for General-Purpose Language
Understanding Systems
%U http://arxiv.org/abs/1905.00537
%X In the last year, new models and methods for pretraining and transfer
learning have driven striking performance improvements across a range of
language understanding tasks. The GLUE benchmark, introduced a little over one
year ago, offers a single-number metric that summarizes progress on a diverse
set of such tasks, but performance on the benchmark has recently surpassed the
level of non-expert humans, suggesting limited headroom for further research.
In this paper we present SuperGLUE, a new benchmark styled after GLUE with a
new set of more difficult language understanding tasks, a software toolkit, and
a public leaderboard. SuperGLUE is available at super.gluebenchmark.com.
@misc{wang2019superglue,
abstract = {In the last year, new models and methods for pretraining and transfer
learning have driven striking performance improvements across a range of
language understanding tasks. The GLUE benchmark, introduced a little over one
year ago, offers a single-number metric that summarizes progress on a diverse
set of such tasks, but performance on the benchmark has recently surpassed the
level of non-expert humans, suggesting limited headroom for further research.
In this paper we present SuperGLUE, a new benchmark styled after GLUE with a
new set of more difficult language understanding tasks, a software toolkit, and
a public leaderboard. SuperGLUE is available at super.gluebenchmark.com.},
added-at = {2023-05-03T11:39:06.000+0200},
author = {Wang, Alex and Pruksachatkun, Yada and Nangia, Nikita and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R.},
biburl = {https://www.bibsonomy.org/bibtex/260ea09a487da3a54832010ec576102b6/annakrause},
description = {[1905.00537] SuperGLUE: A Stickier Benchmark for General-Purpose Language Understanding Systems},
interhash = {80a8d4ee72826f1a620e8cc17e78ee34},
intrahash = {60ea09a487da3a54832010ec576102b6},
keywords = {idea:bee_audio_llm idea:big_data_geo_2 superglue transformer},
note = {cite arxiv:1905.00537Comment: NeurIPS 2019, super.gluebenchmark.com updating acknowledegments},
timestamp = {2023-05-03T11:39:06.000+0200},
title = {SuperGLUE: A Stickier Benchmark for General-Purpose Language
Understanding Systems},
url = {http://arxiv.org/abs/1905.00537},
year = 2019
}