This paper addresses one of the largest and most complexdata curation workflows in existence: Wikipedia and Wikidata with ahigh number of users and curators adding factual information from exter-nal sources via a non-systematic Wiki workflow to Wikipedia’s infoboxesand Wikidata items. We present high-level analyses of the current state,the challenges and limitations in this workflow and supplement it witha quantitative and semantic analysis of the resulting data spaces by de-ploying DBpedia’s integration and extraction capabilities. Based on ananalysis of millions of references from Wikipedia infoboxes in differentlanguages, we can find the most important sources which can be usedto enrich other knowledge bases with information of better quality. Aninitial tool is presented, the GlobalFactSync browser, as a prototype todiscuss further measures to develop a more systematic approach for datacuration in the WikiVerse.
%0 Conference Paper
%1 hellmann2020gfs
%A Hellmann, Sebastian
%A Frey, Johannes
%A Hofer, Marvin
%A Dojchinovski, Milan
%A Węcel, Krzystof
%A Lewoniewski, Wlodzimierz
%B Proceedings of the Conference on Digital Curation Technologies
%D 2020
%K frey group_aksw hellmann hofer kilt
%T Towards a Systematic Approach to Sync Factual Data across Wikipedia, Wikidata and External Data Sources.
%U https://svn.aksw.org/papers/2020/qurator_gfs/public.pdf
%X This paper addresses one of the largest and most complexdata curation workflows in existence: Wikipedia and Wikidata with ahigh number of users and curators adding factual information from exter-nal sources via a non-systematic Wiki workflow to Wikipedia’s infoboxesand Wikidata items. We present high-level analyses of the current state,the challenges and limitations in this workflow and supplement it witha quantitative and semantic analysis of the resulting data spaces by de-ploying DBpedia’s integration and extraction capabilities. Based on ananalysis of millions of references from Wikipedia infoboxes in differentlanguages, we can find the most important sources which can be usedto enrich other knowledge bases with information of better quality. Aninitial tool is presented, the GlobalFactSync browser, as a prototype todiscuss further measures to develop a more systematic approach for datacuration in the WikiVerse.
@inproceedings{hellmann2020gfs,
abstract = {This paper addresses one of the largest and most complexdata curation workflows in existence: Wikipedia and Wikidata with ahigh number of users and curators adding factual information from exter-nal sources via a non-systematic Wiki workflow to Wikipedia’s infoboxesand Wikidata items. We present high-level analyses of the current state,the challenges and limitations in this workflow and supplement it witha quantitative and semantic analysis of the resulting data spaces by de-ploying DBpedia’s integration and extraction capabilities. Based on ananalysis of millions of references from Wikipedia infoboxes in differentlanguages, we can find the most important sources which can be usedto enrich other knowledge bases with information of better quality. Aninitial tool is presented, the GlobalFactSync browser, as a prototype todiscuss further measures to develop a more systematic approach for datacuration in the WikiVerse.},
added-at = {2024-06-18T09:46:32.000+0200},
author = {Hellmann, Sebastian and Frey, Johannes and Hofer, Marvin and Dojchinovski, Milan and Węcel, Krzystof and Lewoniewski, Wlodzimierz},
biburl = {https://www.bibsonomy.org/bibtex/2136b32fe330390ea8d417a395d6d394e/aksw},
booktitle = {Proceedings of the Conference on Digital Curation Technologies},
interhash = {4dc8fbb0056b5491541899d9ec7fb872},
intrahash = {136b32fe330390ea8d417a395d6d394e},
keywords = {frey group_aksw hellmann hofer kilt},
timestamp = {2024-06-18T09:46:32.000+0200},
title = {Towards a Systematic Approach to Sync Factual Data across Wikipedia, Wikidata and External Data Sources.},
url = {https://svn.aksw.org/papers/2020/qurator_gfs/public.pdf},
year = 2020
}