d'Amato, C.; Fanizzi, N. & Esposito, F.
(2008):
Query Answering and Ontology Population: an Inductive Approach.
In: Proceedings of the 5th European Semantic Web Conference,
Berlin, Heidelberg.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
In the context of Semantic Web, deductive reasoning is used for making explicit the implicit knowledge of a knowledge base (KB). Anyway, purely logic-based approaches can fail when data comes from distributed sources, where contradictions usually turn out. Inductive instance-based learning methods can be effectively used in such a case, since they are well known to be efficient and fault tolerant. In this paper we propose an inductive method for improving the concept retrieval and for the performing the ontology population in a (semi-)automatic way. By casting concept retrieval to a classification problem with the goal of assessing the individual memberships w.r.t. the query concepts, we propose an extension of the k-Nearest Neighbor algorithm for Description Logic KBs. It is based on the exploitation of an entropy-based dissimilarity measure. The procedure retrieves individuals belonging to query concepts, by analogy with other training instances, on the grounds of the classification of the nearest ones w.r.t. the dissimilarity measure. We experimentally show that the behavior of the classifier is comparable with the one of a standard reasoner. Moreover we show that new knowledge (not logically derivable) is induced. It can be suggested to the knowledge engineer for validation, during the ontology population task.
@inproceedings{d'amato2008query,
author = {d'Amato, Claudia and Fanizzi, Nicola and Esposito, Floriana},
title = {Query Answering and Ontology Population: an Inductive Approach},
editor = {Hauswirth, Manfred and Koubarakis, Manolis and Bechhofer, Sean},
booktitle = {Proceedings of the 5th European Semantic Web Conference},
series = {LNCS},
publisher = {Springer Verlag},
address = {Berlin, Heidelberg},
year = {2008},
url = {http://data.semanticweb.org/conference/eswc/2008/papers/252},
keywords = {similalrity inductive learning unswering uncertainty ontology logic description population measure query},
abstract = {In the context of Semantic Web, deductive reasoning is used for making explicit the implicit knowledge of a knowledge base (KB). Anyway, purely logic-based approaches can fail when data comes from distributed sources, where contradictions usually turn out. Inductive instance-based learning methods can be effectively used in such a case, since they are well known to be efficient and fault tolerant. In this paper we propose an inductive method for improving the concept retrieval and for the performing the ontology population in a (semi-)automatic way. By casting concept retrieval to a classification problem with the goal of assessing the individual memberships w.r.t. the query concepts, we propose an extension of the k-Nearest Neighbor algorithm for Description Logic KBs. It is based on the exploitation of an entropy-based dissimilarity measure. The procedure retrieves individuals belonging to query concepts, by analogy with other training instances, on the grounds of the classification of the nearest ones w.r.t. the dissimilarity measure. We experimentally show that the behavior of the classifier is comparable with the one of a standard reasoner. Moreover we show that new knowledge (not logically derivable) is induced. It can be suggested to the knowledge engineer for validation, during the ontology population task.}
}
%0 = inproceedings
%A = d'Amato, Claudia and Fanizzi, Nicola and Esposito, Floriana
%B = Proceedings of the 5th European Semantic Web Conference
%C = Berlin, Heidelberg
%D = 2008
%I = Springer Verlag
%T = Query Answering and Ontology Population: an Inductive Approach
%U = http://data.semanticweb.org/conference/eswc/2008/papers/252
Fanizzi, N.; d'Amato, C. & Esposito, F.
(2008):
Conceptual Clustering and its Application to Concept Drift and Novelty Detection.
In: Proceedings of the 5th European Semantic Web Conference,
Berlin, Heidelberg.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
We present a method based on clustering techniques to detect concept drift or novelty in a knowledge based expressed in Description Logics. The method exploits an effective and language-independent semi-distance measure defined for the space of individuals, that is based on a finite number of dimensions corresponding to a committee of discriminating features (represented by concept descriptions). A maximally discriminating group of features can be obtained with the randomized optimization methods described in the paper. An experimentation with some ontologies proves the feasibility of our method and its effectiveness in terms of clustering validity indices. Then, with a supervised learning phase, each cluster can be assigned with a refined or newly constructed intensional definition expressed in the adopted language. We propose a method for exploiting the clustering results for concept drift and novelty detection
@inproceedings{fanizzi2008conceptual,
author = {Fanizzi, Nicola and d'Amato, Claudia and Esposito, Floriana},
title = {Conceptual Clustering and its Application to Concept Drift and Novelty Detection},
editor = {Hauswirth, Manfred and Koubarakis, Manolis and Bechhofer, Sean},
booktitle = {Proceedings of the 5th European Semantic Web Conference},
series = {LNCS},
publisher = {Springer Verlag},
address = {Berlin, Heidelberg},
year = {2008},
url = {http://data.semanticweb.org/conference/eswc/2008/papers/273},
keywords = {clustering drift conceptual similarity semantic concept setection novelty learning},
abstract = {We present a method based on clustering techniques to detect concept drift or novelty in a knowledge based expressed in Description Logics. The method exploits an effective and language-independent semi-distance measure defined for the space of individuals, that is based on a finite number of dimensions corresponding to a committee of discriminating features (represented by concept descriptions). A maximally discriminating group of features can be obtained with the randomized optimization methods described in the paper. An experimentation with some ontologies proves the feasibility of our method and its effectiveness in terms of clustering validity indices. Then, with a supervised learning phase, each cluster can be assigned with a refined or newly constructed intensional definition expressed in the adopted language. We propose a method for exploiting the clustering results for concept drift and novelty detection}
}
%0 = inproceedings
%A = Fanizzi, Nicola and d'Amato, Claudia and Esposito, Floriana
%B = Proceedings of the 5th European Semantic Web Conference
%C = Berlin, Heidelberg
%D = 2008
%I = Springer Verlag
%T = Conceptual Clustering and its Application to Concept Drift and Novelty Detection
%U = http://data.semanticweb.org/conference/eswc/2008/papers/273
Grimnes, G.; Edwards, P. & Preece, A.
(2008):
Distance Based clustering of Semantic Web Resources.
In: Proceedings of the 5th European Semantic Web Conference,
Berlin, Heidelberg.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
The original Semantic Web vision was explicit in the need for intelligent autonomous agents that would represent users and help them navigate the Semantic Web. We argue that an essential feature for such agents is the capability to analyse data and learn. In this paper we outline the challenges and issues surrounding the application of clustering algorithms to Semantic Web data. We present several ways to extract instances from a large RDF graph and computing the distance between these. We evaluate our approaches on three different data-sets, one representing a typical relational database to RDF conversion, one based on data from a ontologically rich Semantic Web enabled application, and one consisting of a crawl of FOAF documents; applying both supervised and unsupervised evaluation metrics. Our evaluation did not support choosing a single combination of instance extraction method and similarity metric as superior in all cases, and as expected the behaviour depends greatly on the data being clustered. Instead, we attempt to identify characteristics of data that make particular methods more suitable.
@inproceedings{grimnes2008distance,
author = {Grimnes, Gunnar and Edwards, Peter and Preece, Alun},
title = {Distance Based clustering of Semantic Web Resources},
editor = {Hauswirth, Manfred and Koubarakis, Manolis and Bechhofer, Sean},
booktitle = {Proceedings of the 5th European Semantic Web Conference},
series = {LNCS},
publisher = {Springer Verlag},
address = {Berlin, Heidelberg},
year = {2008},
url = {http://data.semanticweb.org/conference/eswc/2008/papers/246},
keywords = {rdf measure clustering distance learning},
abstract = {The original Semantic Web vision was explicit in the need for intelligent autonomous agents that would represent users and help them navigate the Semantic Web. We argue that an essential feature for such agents is the capability to analyse data and learn. In this paper we outline the challenges and issues surrounding the application of clustering algorithms to Semantic Web data. We present several ways to extract instances from a large RDF graph and computing the distance between these. We evaluate our approaches on three different data-sets, one representing a typical relational database to RDF conversion, one based on data from a ontologically rich Semantic Web enabled application, and one consisting of a crawl of FOAF documents; applying both supervised and unsupervised evaluation metrics. Our evaluation did not support choosing a single combination of instance extraction method and similarity metric as superior in all cases, and as expected the behaviour depends greatly on the data being clustered. Instead, we attempt to identify characteristics of data that make particular methods more suitable.}
}
%0 = inproceedings
%A = Grimnes, Gunnar and Edwards, Peter and Preece, Alun
%B = Proceedings of the 5th European Semantic Web Conference
%C = Berlin, Heidelberg
%D = 2008
%I = Springer Verlag
%T = Distance Based clustering of Semantic Web Resources
%U = http://data.semanticweb.org/conference/eswc/2008/papers/246
Kiefer, C.; Bernstein, A. & Locher, A.
(2008):
Adding Data Mining Support to SPARQL via Statistical Relational Learning Methods.
In: Proceedings of the 5th European Semantic Web Conference,
Berlin, Heidelberg.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
In machine learning/data mining, people have been exploring how to learn models of relational data for a long time. The rational behind this is that exploiting the rich and complex structure of relational data enables to build better models by taking into account the additional information provided by the links between objects. These links are usually hard to model by traditional propositional learning techniques. We extend this idea to the Semantic Web. In this paper we introduce a novel approach we call SPARQL-ML to perform data mining for Semantic Web data. Our approach is based on traditional SPARQL and statistical relational learning methods, such as Relational Probability Trees and Relational Bayesian Classifiers. We analyze our approach thoroughly conducting three sets of experiments on synthetic as well as real-world datasets. Our analytical results show that our approach can be used for any Semantic Web dataset to perform instance-based learning and classification. A comparison to kernel methods used in Support Vector Machines shows that our approach is superior in terms of classification accuracy. Moreover, we show how our approach can be used for Semantic Web service classification and automatic semantic annotation.
@inproceedings{kiefer2008adding,
author = {Kiefer, Christoph and Bernstein, Abraham and Locher, André},
title = {Adding Data Mining Support to SPARQL via Statistical Relational Learning Methods},
editor = {Hauswirth, Manfred and Koubarakis, Manolis and Bechhofer, Sean},
booktitle = {Proceedings of the 5th European Semantic Web Conference},
series = {LNCS},
publisher = {Springer Verlag},
address = {Berlin, Heidelberg},
year = {2008},
url = {http://data.semanticweb.org/conference/eswc/2008/papers/32},
keywords = {sparql data statistical evaluation mining relational learning query-processing-1},
abstract = {In machine learning/data mining, people have been exploring how to learn models of relational data for a long time. The rational behind this is that exploiting the rich and complex structure of relational data enables to build better models by taking into account the additional information provided by the links between objects. These links are usually hard to model by traditional propositional learning techniques. We extend this idea to the Semantic Web. In this paper we introduce a novel approach we call SPARQL-ML to perform data mining for Semantic Web data. Our approach is based on traditional SPARQL and statistical relational learning methods, such as Relational Probability Trees and Relational Bayesian Classifiers. We analyze our approach thoroughly conducting three sets of experiments on synthetic as well as real-world datasets. Our analytical results show that our approach can be used for any Semantic Web dataset to perform instance-based learning and classification. A comparison to kernel methods used in Support Vector Machines shows that our approach is superior in terms of classification accuracy. Moreover, we show how our approach can be used for Semantic Web service classification and automatic semantic annotation.}
}
%0 = inproceedings
%A = Kiefer, Christoph and Bernstein, Abraham and Locher, André
%B = Proceedings of the 5th European Semantic Web Conference
%C = Berlin, Heidelberg
%D = 2008
%I = Springer Verlag
%T = Adding Data Mining Support to SPARQL via Statistical Relational Learning Methods
%U = http://data.semanticweb.org/conference/eswc/2008/papers/32
Kiefer, C. & Bernstein, A.
(2008):
The Creation and Evaluation of iSPARQL Strategies for Matchmaking.
In: Proceedings of the 5th European Semantic Web Conference,
Berlin, Heidelberg.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
This research explores our novel method for Semantic Web service matchmaking based on iSPARQL queries, which enable the user to query the Semantic Web with techniques from traditional information retrieval. The strategies for matchmaking which we develop and evaluate in the paper make use of a plethora of similarity measures and combination functions from SimPack - our library of similarity measures for the use in ontologies. We show how our combination of structured and imprecise querying can be used to perform hybrid Semantic Web service matchmaking in simple and amazingly fast fashion. We analyze our approach thoroughly on a large OWL-S service test collection, and show how our initial strategies can be improved by applying machine learning algorithms such as regression, decision trees, or support vector machines to result in the most effective strategies for matchmaking.
@inproceedings{kiefer2008creation,
author = {Kiefer, Christoph and Bernstein, Abraham},
title = {The Creation and Evaluation of iSPARQL Strategies for Matchmaking},
editor = {Hauswirth, Manfred and Koubarakis, Manolis and Bechhofer, Sean},
booktitle = {Proceedings of the 5th European Semantic Web Conference},
series = {LNCS},
publisher = {Springer Verlag},
address = {Berlin, Heidelberg},
year = {2008},
url = {http://data.semanticweb.org/conference/eswc/2008/papers/133},
keywords = {retrieval information matchmaking machine learning sparql evaluation query-processing-1},
abstract = {This research explores our novel method for Semantic Web service matchmaking based on iSPARQL queries, which enable the user to query the Semantic Web with techniques from traditional information retrieval. The strategies for matchmaking which we develop and evaluate in the paper make use of a plethora of similarity measures and combination functions from SimPack -- our library of similarity measures for the use in ontologies. We show how our combination of structured and imprecise querying can be used to perform hybrid Semantic Web service matchmaking in simple and amazingly fast fashion. We analyze our approach thoroughly on a large OWL-S service test collection, and show how our initial strategies can be improved by applying machine learning algorithms such as regression, decision trees, or support vector machines to result in the most effective strategies for matchmaking.}
}
%0 = inproceedings
%A = Kiefer, Christoph and Bernstein, Abraham
%B = Proceedings of the 5th European Semantic Web Conference
%C = Berlin, Heidelberg
%D = 2008
%I = Springer Verlag
%T = The Creation and Evaluation of iSPARQL Strategies for Matchmaking
%U = http://data.semanticweb.org/conference/eswc/2008/papers/133
Spiliopoulos, V.; Valarakos, A. & Vouros, G.
(2008):
CSR: Discovering Subsumption Relations for the Alignment of Ontologies.
In: Proceedings of the 5th European Semantic Web Conference,
Berlin, Heidelberg.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
For the effective alignment of ontologies, the computation of equivalence relations between elements of ontologies is not enough: Subsumption relations play a crucial role as well. In this paper we propose the "Classification-Based Learning of Subsumption Relations for the Alignment of Ontologies" (CSR) method. Given a pair of concepts from two ontologies, the objective of CSR is to identify patterns of concepts' features that provide evidence for the subsumption relation among them. This is achieved by means of a classification task, using state of the art supervised machine learning methods. For the learning of the classifiers, CSR generates training datasets from the source ontologies', considering each ontology in isolation: This allows the method to tune itself to the idiosyncrasies of each of the source ontologies. The paper describes thoroughly the method, provides experimental results over an extended version of benchmarking series and discusses the potential of the method.
@inproceedings{spiliopoulos2008discovering,
author = {Spiliopoulos, Vassilis and Valarakos, Alexandros and Vouros, George},
title = {CSR: Discovering Subsumption Relations for the Alignment of Ontologies},
editor = {Hauswirth, Manfred and Koubarakis, Manolis and Bechhofer, Sean},
booktitle = {Proceedings of the 5th European Semantic Web Conference},
series = {LNCS},
publisher = {Springer Verlag},
address = {Berlin, Heidelberg},
year = {2008},
url = {http://data.semanticweb.org/conference/eswc/2008/papers/107},
keywords = {classification supervised machine learning subsumption alignment ontology binary ontology-alignment},
abstract = {For the effective alignment of ontologies, the computation of equivalence relations between elements of ontologies is not enough: Subsumption relations play a crucial role as well. In this paper we propose the "Classification-Based Learning of Subsumption Relations for the Alignment of Ontologies" (CSR) method. Given a pair of concepts from two ontologies, the objective of CSR is to identify patterns of concepts' features that provide evidence for the subsumption relation among them. This is achieved by means of a classification task, using state of the art supervised machine learning methods. For the learning of the classifiers, CSR generates training datasets from the source ontologies', considering each ontology in isolation: This allows the method to tune itself to the idiosyncrasies of each of the source ontologies. The paper describes thoroughly the method, provides experimental results over an extended version of benchmarking series and discusses the potential of the method.}
}
%0 = inproceedings
%A = Spiliopoulos, Vassilis and Valarakos, Alexandros and Vouros, George
%B = Proceedings of the 5th European Semantic Web Conference
%C = Berlin, Heidelberg
%D = 2008
%I = Springer Verlag
%T = CSR: Discovering Subsumption Relations for the Alignment of Ontologies
%U = http://data.semanticweb.org/conference/eswc/2008/papers/107