Graphs are increasingly used to model data in many disciplines. Structure search which matches a query graph against a data graph, is a common information retrieval paradigm for graph structured data. A crucial factor in optimizing such searches is the ability to estimate the frequency of substructures within a query graph. In this work, we present and evaluate two techniques for estimating the frequency of subgraphs from a summary of the data graph. In the first technique, we assume that edge occurrences on edge sequences are position independent and summarize only the most informative dependencies. In the second technique, we prune small subgraphs based on a valuation scheme that blends information about their importance and estimation power. In both techniques, we assume conditional independence to estimate the frequencies of larger subgraphs. We validate the effectiveness of our techniques using experiments on real and synthetic datasets
%0 Conference Paper
%1 maduko2008graph
%A Maduko, Angela
%A Anyanwu, Kemafor
%A Sheth, Amit
%A Schliekelman, Paul
%B Proceedings of the 5th European Semantic Web Conference
%C Berlin, Heidelberg
%D 2008
%E Hauswirth, Manfred
%E Koubarakis, Manolis
%E Bechhofer, Sean
%I Springer Verlag
%K cardinality result estimation graph subgraph summaries query-processing-2
%T Graph Summaries for Subgraph Frequency Estimation
%U http://data.semanticweb.org/conference/eswc/2008/papers/330
%X Graphs are increasingly used to model data in many disciplines. Structure search which matches a query graph against a data graph, is a common information retrieval paradigm for graph structured data. A crucial factor in optimizing such searches is the ability to estimate the frequency of substructures within a query graph. In this work, we present and evaluate two techniques for estimating the frequency of subgraphs from a summary of the data graph. In the first technique, we assume that edge occurrences on edge sequences are position independent and summarize only the most informative dependencies. In the second technique, we prune small subgraphs based on a valuation scheme that blends information about their importance and estimation power. In both techniques, we assume conditional independence to estimate the frequencies of larger subgraphs. We validate the effectiveness of our techniques using experiments on real and synthetic datasets
@inproceedings{maduko2008graph,
abstract = {Graphs are increasingly used to model data in many disciplines. Structure search which matches a query graph against a data graph, is a common information retrieval paradigm for graph structured data. A crucial factor in optimizing such searches is the ability to estimate the frequency of substructures within a query graph. In this work, we present and evaluate two techniques for estimating the frequency of subgraphs from a summary of the data graph. In the first technique, we assume that edge occurrences on edge sequences are position independent and summarize only the most informative dependencies. In the second technique, we prune small subgraphs based on a valuation scheme that blends information about their importance and estimation power. In both techniques, we assume conditional independence to estimate the frequencies of larger subgraphs. We validate the effectiveness of our techniques using experiments on real and synthetic datasets},
added-at = {2008-05-28T14:50:06.000+0200},
address = {Berlin, Heidelberg},
author = {Maduko, Angela and Anyanwu, Kemafor and Sheth, Amit and Schliekelman, Paul},
biburl = {https://www.bibsonomy.org/bibtex/22a2a25f0c3ca04f9657805a465179975/eswc2008},
booktitle = {Proceedings of the 5th European Semantic Web Conference},
editor = {Hauswirth, Manfred and Koubarakis, Manolis and Bechhofer, Sean},
interhash = {fbeb1b5500bd7857136e14e25518bea1},
intrahash = {2a2a25f0c3ca04f9657805a465179975},
keywords = {cardinality result estimation graph subgraph summaries query-processing-2},
month = {June},
publisher = {Springer Verlag},
series = {LNCS},
timestamp = {2008-05-28T14:50:06.000+0200},
title = {Graph Summaries for Subgraph Frequency Estimation},
url = {http://data.semanticweb.org/conference/eswc/2008/papers/330},
year = 2008
}