Scholarly data is growing continuously containing information about the articles from a plethora of venues including conferences, journals, etc. Many initiatives have been taken to make scholarly data available in the form of Knowledge Graphs (KGs). These efforts to standardize these data and make them accessible have also led to many challenges such as exploration of scholarly articles, ambiguous authors, etc. This study more specifically targets the problem of Author Name Disambiguation (AND) on Scholarly KGs and presents a novel framework, Literally Author Name Disambiguation (LAND), which utilizes Knowledge Graph Embeddings (KGEs) using multimodal literal information generated from these KGs. This framework is based on three components: (1) multimodal KGEs, (2) a blocking procedure, and finally, (3) hierarchical Agglomerative Clustering. Extensive experiments have been conducted on two newly created KGs: (i) KG containing information from Scientometrics Journal from 1978 onwards (OC-782K), and (ii) a KG extracted from a well-known benchmark for AND provided by AMiner (AMiner-534K). The results show that our proposed architecture outperforms our baselines of 8–14\% in terms of F1 score and shows competitive performances on a challenging benchmark such as AMiner. The code and the datasets are publicly available through Github (https://github.com/sntcristian/and-kge) and Zenodo (https://doi.org/10.5281/zenodo.6309855) respectively.
Full Text PDF:C\:\\Users\\CSA\\Zotero\\storage\\M775WUKF\\Santini et al. - 2022 - A knowledge graph embeddings based approach for au.pdf:application/pdf
%0 Journal Article
%1 santini_knowledge_2022
%A Santini, Cristian
%A Gesese, Genet Asefa
%A Peroni, Silvio
%A Gangemi, Aldo
%A Sack, Harald
%A Alam, Mehwish
%D 2022
%J Scientometrics
%K Author Bibliographic Citation Clustering, Disambiguation, Knowledge Name Open citations, data, embeddings fiziseown graph ise
%R 10.1007/s11192-022-04426-2
%T A knowledge graph embeddings based approach for author name disambiguation using literals
%U https://doi.org/10.1007/s11192-022-04426-2
%X Scholarly data is growing continuously containing information about the articles from a plethora of venues including conferences, journals, etc. Many initiatives have been taken to make scholarly data available in the form of Knowledge Graphs (KGs). These efforts to standardize these data and make them accessible have also led to many challenges such as exploration of scholarly articles, ambiguous authors, etc. This study more specifically targets the problem of Author Name Disambiguation (AND) on Scholarly KGs and presents a novel framework, Literally Author Name Disambiguation (LAND), which utilizes Knowledge Graph Embeddings (KGEs) using multimodal literal information generated from these KGs. This framework is based on three components: (1) multimodal KGEs, (2) a blocking procedure, and finally, (3) hierarchical Agglomerative Clustering. Extensive experiments have been conducted on two newly created KGs: (i) KG containing information from Scientometrics Journal from 1978 onwards (OC-782K), and (ii) a KG extracted from a well-known benchmark for AND provided by AMiner (AMiner-534K). The results show that our proposed architecture outperforms our baselines of 8–14\% in terms of F1 score and shows competitive performances on a challenging benchmark such as AMiner. The code and the datasets are publicly available through Github (https://github.com/sntcristian/and-kge) and Zenodo (https://doi.org/10.5281/zenodo.6309855) respectively.
@article{santini_knowledge_2022,
abstract = {Scholarly data is growing continuously containing information about the articles from a plethora of venues including conferences, journals, etc. Many initiatives have been taken to make scholarly data available in the form of Knowledge Graphs (KGs). These efforts to standardize these data and make them accessible have also led to many challenges such as exploration of scholarly articles, ambiguous authors, etc. This study more specifically targets the problem of Author Name Disambiguation (AND) on Scholarly KGs and presents a novel framework, Literally Author Name Disambiguation (LAND), which utilizes Knowledge Graph Embeddings (KGEs) using multimodal literal information generated from these KGs. This framework is based on three components: (1) multimodal KGEs, (2) a blocking procedure, and finally, (3) hierarchical Agglomerative Clustering. Extensive experiments have been conducted on two newly created KGs: (i) KG containing information from Scientometrics Journal from 1978 onwards (OC-782K), and (ii) a KG extracted from a well-known benchmark for AND provided by AMiner (AMiner-534K). The results show that our proposed architecture outperforms our baselines of 8–14\% in terms of F1 score and shows competitive performances on a challenging benchmark such as AMiner. The code and the datasets are publicly available through Github (https://github.com/sntcristian/and-kge) and Zenodo (https://doi.org/10.5281/zenodo.6309855) respectively.},
added-at = {2022-09-08T16:32:22.000+0200},
author = {Santini, Cristian and Gesese, Genet Asefa and Peroni, Silvio and Gangemi, Aldo and Sack, Harald and Alam, Mehwish},
biburl = {https://www.bibsonomy.org/bibtex/2db6f4fcb70219b39f9637a8b35eb54fc/vivienvetter},
doi = {10.1007/s11192-022-04426-2},
file = {Full Text PDF:C\:\\Users\\CSA\\Zotero\\storage\\M775WUKF\\Santini et al. - 2022 - A knowledge graph embeddings based approach for au.pdf:application/pdf},
interhash = {0aef9afcdec02f8ce0def7eadb53ae6d},
intrahash = {db6f4fcb70219b39f9637a8b35eb54fc},
issn = {1588-2861},
journal = {Scientometrics},
keywords = {Author Bibliographic Citation Clustering, Disambiguation, Knowledge Name Open citations, data, embeddings fiziseown graph ise},
language = {en},
month = jul,
timestamp = {2022-09-08T16:32:22.000+0200},
title = {A knowledge graph embeddings based approach for author name disambiguation using literals},
url = {https://doi.org/10.1007/s11192-022-04426-2},
urldate = {2022-07-27},
year = 2022
}