The creation of an RDF knowledge graph for a particular application commonly involves a pipeline of tools that transform a set ofinput data sources into an RDF knowledge graph in a process called dataset augmentation. The components of such augmentation pipelines often require extensive configuration to lead to satisfactory results. Thus, non-experts are often unable to use them. Wepresent an efficient supervised algorithm based on genetic programming for learning knowledge graph augmentation pipelines of arbitrary length. Our approach uses multi-expression learning to learn augmentation pipelines able to achieve a high F-measure on the training data. Our evaluation suggests that our approach can efficiently learn a larger class of RDF dataset augmentation tasks than the state of the art while using only a single training example. Even on the most complex augmentation problem we posed, our approach consistently achieves an average F1-measure of 99% in under 500 iterations with an average runtime of 16 seconds
%0 Conference Paper
%1 ht22_adagio
%A Dreßler, Kevin
%A Sherif, Mohamed Ahmed
%A Ngomo, Axel-Cyrille Ngonga
%B Proceedings of the 33rd ACM Conference on Hypertext and Hypermedia
%D 2022
%K 2022 RAKI SFB901 deer dice kevin knowgraphs limes ngonga sherif simba
%R 10.1145/3511095.3531287
%T ADAGIO - Automated Data Augmentation of Knowledge Graphs Using Multi-expression Learning
%U https://papers.dice-research.org/2022/HT_ADAGIO/public.pdf
%X The creation of an RDF knowledge graph for a particular application commonly involves a pipeline of tools that transform a set ofinput data sources into an RDF knowledge graph in a process called dataset augmentation. The components of such augmentation pipelines often require extensive configuration to lead to satisfactory results. Thus, non-experts are often unable to use them. Wepresent an efficient supervised algorithm based on genetic programming for learning knowledge graph augmentation pipelines of arbitrary length. Our approach uses multi-expression learning to learn augmentation pipelines able to achieve a high F-measure on the training data. Our evaluation suggests that our approach can efficiently learn a larger class of RDF dataset augmentation tasks than the state of the art while using only a single training example. Even on the most complex augmentation problem we posed, our approach consistently achieves an average F1-measure of 99% in under 500 iterations with an average runtime of 16 seconds
@inproceedings{ht22_adagio,
abstract = {The creation of an RDF knowledge graph for a particular application commonly involves a pipeline of tools that transform a set ofinput data sources into an RDF knowledge graph in a process called dataset augmentation. The components of such augmentation pipelines often require extensive configuration to lead to satisfactory results. Thus, non-experts are often unable to use them. Wepresent an efficient supervised algorithm based on genetic programming for learning knowledge graph augmentation pipelines of arbitrary length. Our approach uses multi-expression learning to learn augmentation pipelines able to achieve a high F-measure on the training data. Our evaluation suggests that our approach can efficiently learn a larger class of RDF dataset augmentation tasks than the state of the art while using only a single training example. Even on the most complex augmentation problem we posed, our approach consistently achieves an average F1-measure of 99% in under 500 iterations with an average runtime of 16 seconds},
added-at = {2023-04-25T16:34:32.000+0200},
author = {Dre{\ss}ler, Kevin and Sherif, Mohamed Ahmed and Ngomo, Axel-Cyrille Ngonga},
bdsk-url-1 = {https://papers.dice-research.org/2022/HT_ADAGIO/public.pdf},
bdsk-url-2 = {https://doi.org/10.1145/3511095.3531287},
biburl = {https://www.bibsonomy.org/bibtex/2f87fc8c04badfd17356dcd61c1c847b7/dice-research},
booktitle = {Proceedings of the 33rd ACM Conference on Hypertext and Hypermedia},
doi = {10.1145/3511095.3531287},
interhash = {f5fe7a97978393d64814c50bc281ce09},
intrahash = {f87fc8c04badfd17356dcd61c1c847b7},
keywords = {2022 RAKI SFB901 deer dice kevin knowgraphs limes ngonga sherif simba},
timestamp = {2024-09-18T16:18:29.000+0200},
title = {ADAGIO - Automated Data Augmentation of Knowledge Graphs Using Multi-expression Learning},
url = {https://papers.dice-research.org/2022/HT_ADAGIO/public.pdf},
year = 2022
}