One approach to continuously achieve a certain data quality level is to use an integration pipeline that continuously checks and monitors the quality of a data set according to defined metrics. This approach is inspired by Continuous Integration pipelines, that have been introduced in the area of software development and DevOps to perform continuous source code checks. By investigating in possible tools to use and discussing the specific requirements for RDF data sets, an integration pipeline is derived that joins current approaches of the areas of software-development and semantic-web as well as reuses existing tools. As these tools have not been built explicitly for CI usage, we evaluate their usability and propose possible workarounds and improvements. Furthermore, a real-world usage scenario is discussed, outlining the benefit of the usage of such a pipeline.
%0 Conference Paper
%1 meissner-semantics-2016-DevOps
%A Meissner, Roy
%A Junghanns, Kurt
%B 12th International Conference on Semantic Systems Proceedings (SEMANTiCS 2016)
%C Leipzig, Germany
%D 2016
%K 2016 es group_aksw junghanns meissner slidewiki
%R 10.1145/2993318.2993351
%T Using DevOps Principles to Continuously Monitor RDF Data Quality
%U https://svn.aksw.org/papers/2016/Semantics_DevOps/public.pdf
%X One approach to continuously achieve a certain data quality level is to use an integration pipeline that continuously checks and monitors the quality of a data set according to defined metrics. This approach is inspired by Continuous Integration pipelines, that have been introduced in the area of software development and DevOps to perform continuous source code checks. By investigating in possible tools to use and discussing the specific requirements for RDF data sets, an integration pipeline is derived that joins current approaches of the areas of software-development and semantic-web as well as reuses existing tools. As these tools have not been built explicitly for CI usage, we evaluate their usability and propose possible workarounds and improvements. Furthermore, a real-world usage scenario is discussed, outlining the benefit of the usage of such a pipeline.
@inproceedings{meissner-semantics-2016-DevOps,
abstract = {One approach to continuously achieve a certain data quality level is to use an integration pipeline that continuously checks and monitors the quality of a data set according to defined metrics. This approach is inspired by Continuous Integration pipelines, that have been introduced in the area of software development and DevOps to perform continuous source code checks. By investigating in possible tools to use and discussing the specific requirements for RDF data sets, an integration pipeline is derived that joins current approaches of the areas of software-development and semantic-web as well as reuses existing tools. As these tools have not been built explicitly for CI usage, we evaluate their usability and propose possible workarounds and improvements. Furthermore, a real-world usage scenario is discussed, outlining the benefit of the usage of such a pipeline.},
added-at = {2024-03-04T14:14:40.000+0100},
address = {Leipzig, Germany},
author = {Meissner, Roy and Junghanns, Kurt},
biburl = {https://www.bibsonomy.org/bibtex/211982efe4c34ca82aea80cc2843ce72b/aksw},
booktitle = {12th International Conference on Semantic Systems Proceedings (SEMANTiCS 2016)},
doi = {10.1145/2993318.2993351},
interhash = {35737278ec6a9bfff2eb3b1b96a08010},
intrahash = {11982efe4c34ca82aea80cc2843ce72b},
issn = {1613-0073},
keywords = {2016 es group_aksw junghanns meissner slidewiki},
month = sep,
owner = {meissner},
series = {CEUR Workshop Proceedings},
timestamp = {2024-03-04T14:14:40.000+0100},
title = {Using DevOps Principles to Continuously Monitor RDF Data Quality},
url = {https://svn.aksw.org/papers/2016/Semantics_DevOps/public.pdf},
year = 2016
}