State-of-the-art models for multi-hop question answering typically augment
large-scale language models like BERT with additional, intuitively useful
capabilities such as named entity recognition, graph-based reasoning, and
question decomposition. However, does their strong performance on popular
multi-hop datasets really justify this added design complexity? Our results
suggest that the answer may be no, because even our simple pipeline based on
BERT, named Quark, performs surprisingly well. Specifically, on HotpotQA, Quark
outperforms these models on both question answering and support identification
(and achieves performance very close to a RoBERTa model). Our pipeline has
three steps: 1) use BERT to identify potentially relevant sentences
independently of each other; 2) feed the set of selected sentences as context
into a standard BERT span prediction model to choose an answer; and 3) use the
sentence selection model, now with the chosen answer, to produce supporting
sentences. The strong performance of Quark resurfaces the importance of
carefully exploring simple model designs before using popular benchmarks to
justify the value of complex techniques.
Description
[2004.06753] A Simple Yet Strong Pipeline for HotpotQA
%0 Generic
%1 groeneveld2020simple
%A Groeneveld, Dirk
%A Khot, Tushar
%A Mausam,
%A Sabharwal, Ashish
%D 2020
%K bert masterthesis qna
%T A Simple Yet Strong Pipeline for HotpotQA
%U http://arxiv.org/abs/2004.06753
%X State-of-the-art models for multi-hop question answering typically augment
large-scale language models like BERT with additional, intuitively useful
capabilities such as named entity recognition, graph-based reasoning, and
question decomposition. However, does their strong performance on popular
multi-hop datasets really justify this added design complexity? Our results
suggest that the answer may be no, because even our simple pipeline based on
BERT, named Quark, performs surprisingly well. Specifically, on HotpotQA, Quark
outperforms these models on both question answering and support identification
(and achieves performance very close to a RoBERTa model). Our pipeline has
three steps: 1) use BERT to identify potentially relevant sentences
independently of each other; 2) feed the set of selected sentences as context
into a standard BERT span prediction model to choose an answer; and 3) use the
sentence selection model, now with the chosen answer, to produce supporting
sentences. The strong performance of Quark resurfaces the importance of
carefully exploring simple model designs before using popular benchmarks to
justify the value of complex techniques.
@misc{groeneveld2020simple,
abstract = {State-of-the-art models for multi-hop question answering typically augment
large-scale language models like BERT with additional, intuitively useful
capabilities such as named entity recognition, graph-based reasoning, and
question decomposition. However, does their strong performance on popular
multi-hop datasets really justify this added design complexity? Our results
suggest that the answer may be no, because even our simple pipeline based on
BERT, named Quark, performs surprisingly well. Specifically, on HotpotQA, Quark
outperforms these models on both question answering and support identification
(and achieves performance very close to a RoBERTa model). Our pipeline has
three steps: 1) use BERT to identify potentially relevant sentences
independently of each other; 2) feed the set of selected sentences as context
into a standard BERT span prediction model to choose an answer; and 3) use the
sentence selection model, now with the chosen answer, to produce supporting
sentences. The strong performance of Quark resurfaces the importance of
carefully exploring simple model designs before using popular benchmarks to
justify the value of complex techniques.},
added-at = {2020-09-03T16:44:27.000+0200},
author = {Groeneveld, Dirk and Khot, Tushar and Mausam and Sabharwal, Ashish},
biburl = {https://www.bibsonomy.org/bibtex/27bee4121ddd0caa999d3a2421d135015/festplatte},
description = {[2004.06753] A Simple Yet Strong Pipeline for HotpotQA},
interhash = {4ccd61aabe6ae47848d019fd6044b3e5},
intrahash = {7bee4121ddd0caa999d3a2421d135015},
keywords = {bert masterthesis qna},
note = {cite arxiv:2004.06753},
timestamp = {2020-09-03T16:44:27.000+0200},
title = {A Simple Yet Strong Pipeline for HotpotQA},
url = {http://arxiv.org/abs/2004.06753},
year = 2020
}