Given a large collection of transactions containing items, a basic
common data mining problem is to extract the so-called frequent
itemsets (i.e., set of items appearing in at least a given number of
transactions). In this paper, we propose a structure called free-sets,
from which we can approximate any itemset support (i.e., the number of
transactions containing the itemset) and we formalize this notion in
the framework of -adequate representation 10.We show that frequent
free-sets can be efficiently extracted using pruning strategies
developed for frequent itemset discovery, and that they can be used to
approximate the support of any frequent itemset. Experiments run on
real dense data sets show a significant reduction of the size of the
output when compared with standard frequent itemsets extraction.
Furthermore, the experiments show that the extraction of frequent
free-sets is still possible when the extraction of frequent itemsets
becomes intractable. Finally, we show that the error made when
approximating frequent itemset support remains very low in practice.
1
%0 Book Section
%1 boulicaut2000approximation
%A Boulicaut, Jean-Francois
%A Bykowski, Artur
%A Rigotti, Christophe
%B Principles of Data Mining and Knowledge Discovery
%D 2000
%I Springer
%K imported
%P 75--85
%T Approximation of frequency queries by means of free-sets
%U http://link.springer.com/chapter/10.1007/3-540-45372-5_8
%X Given a large collection of transactions containing items, a basic
common data mining problem is to extract the so-called frequent
itemsets (i.e., set of items appearing in at least a given number of
transactions). In this paper, we propose a structure called free-sets,
from which we can approximate any itemset support (i.e., the number of
transactions containing the itemset) and we formalize this notion in
the framework of -adequate representation 10.We show that frequent
free-sets can be efficiently extracted using pruning strategies
developed for frequent itemset discovery, and that they can be used to
approximate the support of any frequent itemset. Experiments run on
real dense data sets show a significant reduction of the size of the
output when compared with standard frequent itemsets extraction.
Furthermore, the experiments show that the extraction of frequent
free-sets is still possible when the extraction of frequent itemsets
becomes intractable. Finally, we show that the error made when
approximating frequent itemset support remains very low in practice.
1
@incollection{boulicaut2000approximation,
abstract = {Given a large collection of transactions containing items, a basic
common data mining problem is to extract the so-called frequent
itemsets (i.e., set of items appearing in at least a given number of
transactions). In this paper, we propose a structure called free-sets,
from which we can approximate any itemset support (i.e., the number of
transactions containing the itemset) and we formalize this notion in
the framework of -adequate representation [10].We show that frequent
free-sets can be efficiently extracted using pruning strategies
developed for frequent itemset discovery, and that they can be used to
approximate the support of any frequent itemset. Experiments run on
real dense data sets show a significant reduction of the size of the
output when compared with standard frequent itemsets extraction.
Furthermore, the experiments show that the extraction of frequent
free-sets is still possible when the extraction of frequent itemsets
becomes intractable. Finally, we show that the error made when
approximating frequent itemset support remains very low in practice.
1},
added-at = {2013-08-04T16:29:50.000+0200},
author = {Boulicaut, Jean-Francois and Bykowski, Artur and Rigotti, Christophe},
biburl = {https://www.bibsonomy.org/bibtex/2fea5cba7dc02da87b91f181dee87bac4/francesco.k},
booktitle = {Principles of Data Mining and Knowledge Discovery},
citations = {141},
citedbyid = {12998834731688042943},
file = {file://Approximation of Frequency Queries.pdf:pdf},
interhash = {7b3e7b0c982db3afa3b6633cade025b4},
intrahash = {fea5cba7dc02da87b91f181dee87bac4},
keywords = {imported},
mailhosts = {insa-lyon.fr},
md5sum = {73d406b9eded94890ef07c85bf112ffe},
pages = {75--85},
pdfmeat = {timestamp: 2013-08-04 16:29:15; queries: 3; inode: 1949864},
publisher = {Springer},
timestamp = {2013-08-04T16:29:51.000+0200},
title = {Approximation of frequency queries by means of free-sets},
url = {http://link.springer.com/chapter/10.1007/3-540-45372-5_8},
year = 2000
}