We show that generating English Wikipedia articles can be approached as a multi- document summarization of source documents. We use extractive summarization to coarsely identify salient information and a neural abstractive model to generate the article. For the abstractive model, we introduce a decoder-only architecture that can scalably attend to very long sequences, much longer than typical encoder- decoder architectures used in sequence transduction. We show that this model can generate fluent, coherent multi-sentence paragraphs and even whole Wikipedia articles. When given reference documents, we show it can extract relevant factual information as reflected in perplexity, ROUGE scores and human evaluations.
%0 Journal Article
%1 liu2018generating
%A Liu, Peter J.
%A Saleh, Mohammad
%A Pot, Etienne
%A Goodrich, Ben
%A Sepassi, Ryan
%A Kaiser, Lukasz
%A Shazeer, Noam
%D 2018
%J arXiv:1801.10198 cs
%K wikipedia
%T Generating Wikipedia by Summarizing Long Sequences
%U http://arxiv.org/abs/1801.10198
%X We show that generating English Wikipedia articles can be approached as a multi- document summarization of source documents. We use extractive summarization to coarsely identify salient information and a neural abstractive model to generate the article. For the abstractive model, we introduce a decoder-only architecture that can scalably attend to very long sequences, much longer than typical encoder- decoder architectures used in sequence transduction. We show that this model can generate fluent, coherent multi-sentence paragraphs and even whole Wikipedia articles. When given reference documents, we show it can extract relevant factual information as reflected in perplexity, ROUGE scores and human evaluations.
@article{liu2018generating,
abstract = {We show that generating English Wikipedia articles can be approached as a multi- document summarization of source documents. We use extractive summarization to coarsely identify salient information and a neural abstractive model to generate the article. For the abstractive model, we introduce a decoder-only architecture that can scalably attend to very long sequences, much longer than typical encoder- decoder architectures used in sequence transduction. We show that this model can generate fluent, coherent multi-sentence paragraphs and even whole Wikipedia articles. When given reference documents, we show it can extract relevant factual information as reflected in perplexity, {ROUGE} scores and human evaluations.},
added-at = {2018-02-19T17:27:32.000+0100},
author = {Liu, Peter J. and Saleh, Mohammad and Pot, Etienne and Goodrich, Ben and Sepassi, Ryan and Kaiser, Lukasz and Shazeer, Noam},
biburl = {https://www.bibsonomy.org/bibtex/2c04db6f79b2e942ebff0f9ea43e99402/schneeschmelze},
eprint = {1801.10198},
eprinttype = {arxiv},
interhash = {46c82f4b2b6b9d45e3397204b46edd80},
intrahash = {c04db6f79b2e942ebff0f9ea43e99402},
journal = {{arXiv}:1801.10198 [cs]},
keywords = {wikipedia},
language = {en},
timestamp = {2018-02-19T17:27:32.000+0100},
title = {Generating Wikipedia by Summarizing Long Sequences},
url = {http://arxiv.org/abs/1801.10198},
urldate = {2018-02-19},
year = 2018
}