Handling huge amount of data scalably is a matter of concern for a long time. Same is true for semantic web data. Current semantic web frameworks lack this ability. In this paper, we describe a framework that we built using Hadoop to store and retrieve large number of RDF triples. We describe our schema to store RDF data in Hadoop Distribute File System. We also present our algorithms to answer a SPARQL query. We make use of Hadoop's MapReduce framework to actually answer the queries. Our results reveal that we can store huge amount of semantic web data in Hadoop clusters built mostly by cheap commodity class hardware and still can answer queries fast enough. We conclude that ours is a scalable framework, able to handle large amount of RDF data efficiently.
%0 Book Section
%1 citeulike:7145436
%A Husain, Mohammad Farhan
%A Doshi, Pankil
%A Khan, Latifur
%A Thuraisingham, Bhavani
%B Cloud Computing
%C Berlin, Heidelberg
%D 2009
%E Jaatun, Martin G.
%E Zhao, Gansen
%E Rong, Chunming
%I Springer Berlin Heidelberg
%K MapReduce awm2010 awmhadoop graph hadoop hadoop-group rdf retrieval storage
%P 680--686
%R 10.1007/978-3-642-10665-1\_72
%T Storage and Retrieval of Large RDF Graph Using Hadoop and MapReduce
%U http://dx.doi.org/10.1007/978-3-642-10665-1\_72
%V 5931
%X Handling huge amount of data scalably is a matter of concern for a long time. Same is true for semantic web data. Current semantic web frameworks lack this ability. In this paper, we describe a framework that we built using Hadoop to store and retrieve large number of RDF triples. We describe our schema to store RDF data in Hadoop Distribute File System. We also present our algorithms to answer a SPARQL query. We make use of Hadoop's MapReduce framework to actually answer the queries. Our results reveal that we can store huge amount of semantic web data in Hadoop clusters built mostly by cheap commodity class hardware and still can answer queries fast enough. We conclude that ours is a scalable framework, able to handle large amount of RDF data efficiently.
%& 72
%@ 978-3-642-10664-4
@incollection{citeulike:7145436,
abstract = {Handling huge amount of data scalably is a matter of concern for a long time. Same is true for semantic web data. Current semantic web frameworks lack this ability. In this paper, we describe a framework that we built using Hadoop to store and retrieve large number of RDF triples. We describe our schema to store RDF data in Hadoop Distribute File System. We also present our algorithms to answer a SPARQL query. We make use of Hadoop's MapReduce framework to actually answer the queries. Our results reveal that we can store huge amount of semantic web data in Hadoop clusters built mostly by cheap commodity class hardware and still can answer queries fast enough. We conclude that ours is a scalable framework, able to handle large amount of RDF data efficiently.},
added-at = {2010-06-21T12:50:40.000+0200},
address = {Berlin, Heidelberg},
author = {Husain, Mohammad Farhan and Doshi, Pankil and Khan, Latifur and Thuraisingham, Bhavani},
biburl = {https://www.bibsonomy.org/bibtex/21ff62a04b48311beade4646ddbfa539f/flowolf},
booktitle = {Cloud Computing },
chapter = 72,
citeulike-article-id = {7145436},
citeulike-linkout-0 = {http://dx.doi.org/10.1007/978-3-642-10665-1\_72},
citeulike-linkout-1 = {http://www.springerlink.com/content/l805560670136163},
doi = {10.1007/978-3-642-10665-1\_72},
editor = {Jaatun, Martin G. and Zhao, Gansen and Rong, Chunming},
interhash = {89b7bbc1e943d76458b702a7c722e30e},
intrahash = {1ff62a04b48311beade4646ddbfa539f},
isbn = {978-3-642-10664-4},
keywords = {MapReduce awm2010 awmhadoop graph hadoop hadoop-group rdf retrieval storage},
pages = {680--686},
posted-at = {2010-05-10 08:47:48},
priority = {2},
publisher = {Springer Berlin Heidelberg},
timestamp = {2010-06-21T13:09:28.000+0200},
title = {Storage and Retrieval of Large RDF Graph Using Hadoop and MapReduce},
url = {http://dx.doi.org/10.1007/978-3-642-10665-1\_72},
volume = 5931,
year = 2009
}