Over the last decade, we observed a steadily increasing amount of RDF datasets made available on the web of data. The decentralized nature of the web, however, makes it hard to identify all these datasets. Even more so, when downloadable data distributions are discovered, only insufficient metadata is available to describe the datasets properly, thus posing barriers on its usefulness and reuse. In this paper, we describe an attempt to exhaustively identify the whole linked open data cloud by harvesting metadata from multiple sources, providing insights about duplicated data and the general quality of the available metadata. This was only possible by using a probabilistic data structure called Bloom filter.
Finally, we published a dump file containing metadata which can further be used to enrich existent datasets.
%0 Conference Paper
%1 BaronKKPEH2017IDOL
%A Baron Neto, Ciro
%A Kontokostas, Dimitris
%A Kirschenbaum, Amit
%A Publio, Gustavo
%A Esteves, Diego
%A Hellmann, Sebastian
%B Proceedings of the 13th International Conference on Semantic Systems (SEMANTiCS 2017)
%D 2017
%K aligned aligned-project baron esteves group_aksw hellmann kilt kilt_publications kirschenbaum kontokostas publio sdw
%T IDOL: Comprehensive & Complete LOD Insights
%U https://svn.aksw.org/papers/2017/SEMANTiCS_IDOL/public.pdf
%X Over the last decade, we observed a steadily increasing amount of RDF datasets made available on the web of data. The decentralized nature of the web, however, makes it hard to identify all these datasets. Even more so, when downloadable data distributions are discovered, only insufficient metadata is available to describe the datasets properly, thus posing barriers on its usefulness and reuse. In this paper, we describe an attempt to exhaustively identify the whole linked open data cloud by harvesting metadata from multiple sources, providing insights about duplicated data and the general quality of the available metadata. This was only possible by using a probabilistic data structure called Bloom filter.
Finally, we published a dump file containing metadata which can further be used to enrich existent datasets.
@inproceedings{BaronKKPEH2017IDOL,
abstract = {Over the last decade, we observed a steadily increasing amount of RDF datasets made available on the web of data. The decentralized nature of the web, however, makes it hard to identify all these datasets. Even more so, when downloadable data distributions are discovered, only insufficient metadata is available to describe the datasets properly, thus posing barriers on its usefulness and reuse. In this paper, we describe an attempt to exhaustively identify the whole linked open data cloud by harvesting metadata from multiple sources, providing insights about duplicated data and the general quality of the available metadata. This was only possible by using a probabilistic data structure called Bloom filter.
Finally, we published a dump file containing metadata which can further be used to enrich existent datasets.},
added-at = {2024-06-18T09:44:22.000+0200},
author = {Baron Neto, Ciro and Kontokostas, Dimitris and Kirschenbaum, Amit and Publio, Gustavo and Esteves, Diego and Hellmann, Sebastian},
biburl = {https://www.bibsonomy.org/bibtex/22af68e822c37160ed1ea3819a35504cc/aksw},
booktitle = {Proceedings of the 13th International Conference on Semantic Systems ({SEMANTiCS} 2017)},
interhash = {2342f253437ba0b1a206efce05563f54},
intrahash = {2af68e822c37160ed1ea3819a35504cc},
keywords = {aligned aligned-project baron esteves group_aksw hellmann kilt kilt_publications kirschenbaum kontokostas publio sdw},
timestamp = {2024-06-18T09:44:22.000+0200},
title = {IDOL: Comprehensive \& Complete LOD Insights},
url = {https://svn.aksw.org/papers/2017/SEMANTiCS_IDOL/public.pdf},
year = 2017
}