We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery. The resulting literature graph consists of more than 280M nodes, representing papers, authors, entities and various interactions between them (e.g., authorships, citations, entity mentions). We reduce literature graph construction into familiar NLP tasks (e.g., entity extraction and linking), point out research challenges due to differences from standard formulations of these tasks, and report empirical results for each task. The methods described in this paper are used to enable semantic features in www.semanticscholar.org.
Description
Construction of the Literature Graph in Semantic Scholar - ACL Anthology
Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers)
%0 Conference Paper
%1 ammar2018construction
%A Ammar, Waleed
%A Groeneveld, Dirk
%A Bhagavatula, Chandra
%A Beltagy, Iz
%A Crawford, Miles
%A Downey, Doug
%A Dunkelberger, Jason
%A Elgohary, Ahmed
%A Feldman, Sergey
%A Ha, Vu
%A Kinney, Rodney
%A Kohlmeier, Sebastian
%A Lo, Kyle
%A Murray, Tyler
%A Ooi, Hsu-Han
%A Peters, Matthew
%A Power, Joanna
%A Skjonsberg, Sam
%A Wang, Lucy
%A Wilhelm, Chris
%A Yuan, Zheng
%A van Zuylen, Madeleine
%A Etzioni, Oren
%B Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers)
%C New Orleans - Louisiana
%D 2018
%I Association for Computational Linguistics
%K citations constructing graph inline literature scholar semantic
%P 84--91
%R 10.18653/v1/N18-3011
%T Construction of the Literature Graph in Semantic Scholar
%U https://www.aclweb.org/anthology/N18-3011
%X We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery. The resulting literature graph consists of more than 280M nodes, representing papers, authors, entities and various interactions between them (e.g., authorships, citations, entity mentions). We reduce literature graph construction into familiar NLP tasks (e.g., entity extraction and linking), point out research challenges due to differences from standard formulations of these tasks, and report empirical results for each task. The methods described in this paper are used to enable semantic features in www.semanticscholar.org.
@inproceedings{ammar2018construction,
abstract = {We describe a deployed scalable system for organizing published scientific literature into a heterogeneous graph to facilitate algorithmic manipulation and discovery. The resulting literature graph consists of more than 280M nodes, representing papers, authors, entities and various interactions between them (e.g., authorships, citations, entity mentions). We reduce literature graph construction into familiar NLP tasks (e.g., entity extraction and linking), point out research challenges due to differences from standard formulations of these tasks, and report empirical results for each task. The methods described in this paper are used to enable semantic features in \url{www.semanticscholar.org}.},
added-at = {2020-04-29T16:42:43.000+0200},
address = {New Orleans - Louisiana},
author = {Ammar, Waleed and Groeneveld, Dirk and Bhagavatula, Chandra and Beltagy, Iz and Crawford, Miles and Downey, Doug and Dunkelberger, Jason and Elgohary, Ahmed and Feldman, Sergey and Ha, Vu and Kinney, Rodney and Kohlmeier, Sebastian and Lo, Kyle and Murray, Tyler and Ooi, Hsu-Han and Peters, Matthew and Power, Joanna and Skjonsberg, Sam and Wang, Lucy and Wilhelm, Chris and Yuan, Zheng and van Zuylen, Madeleine and Etzioni, Oren},
biburl = {https://www.bibsonomy.org/bibtex/27975b1d621f06ef4d5185336f7ccd528/nosebrain},
booktitle = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 3 (Industry Papers)},
description = {Construction of the Literature Graph in Semantic Scholar - ACL Anthology},
doi = {10.18653/v1/N18-3011},
interhash = {1849dca44475c3d55589eb0d85f7151b},
intrahash = {7975b1d621f06ef4d5185336f7ccd528},
keywords = {citations constructing graph inline literature scholar semantic},
month = jun,
pages = {84--91},
publisher = {Association for Computational Linguistics},
timestamp = {2020-04-29T16:42:43.000+0200},
title = {Construction of the Literature Graph in Semantic Scholar},
url = {https://www.aclweb.org/anthology/N18-3011},
year = 2018
}