@article{keyhere, title = {What is the dimension of your binary data?}, author = {Nikolaj Tatti and Taneli Mielikäinen and Aristides Gionis and Heikki Mannila}, year = 2006, url = {http://eprints.pascal-network.org/archive/00002228/}, typesource = {Simple CitationSource}, source = {}, asin = {}, pubmed = {}, doi = {}, description = {PASCAL -}, biburl = {http://www.bibsonomy.org/bibtex/2e8e5a5608e9b3e0d76f54137b107bd2e/wnpxrz}, keywords = {dimension binary data imported} } @inproceedings{hollmen03:mixture, title = {Mixture Models and Frequent Sets: Combining Global and Local Methods for 0--1 Data.}, address = {San Fransisco}, author = {Jaakko Hollm{\'e}n and Jouni K. Sepp{\"a}nen and Heikki Mannila}, booktitle = {SIAM International Conference on Data Mining (SDM'03)}, month = {May}, year = 2003, url = {citeseer.ist.psu.edu/698125.html}, description = {Mixture Models and Frequent Sets: Combining Global and Local Methods for 0-1 Data. (ResearchIndex)}, biburl = {http://www.bibsonomy.org/bibtex/220b9e565c0156cbc035e7e9f112558b2/wnpxrz}, keywords = {model binary set mixture data frequent imported} } @article{Tatti2006Safe, title = {Safe projections of binary data sets.}, author = {Nikolaj Tatti}, journal = {Acta Inf.}, number = {8-9}, pages = {617-638}, volume = 42, year = 2006, url = {http://dblp.uni-trier.de/db/journals/acta/acta42.html#Tatti06}, ee = {http://dx.doi.org/10.1007/s00236-006-0009-9}, date = {2006-05-10}, description = {dblp}, biburl = {http://www.bibsonomy.org/bibtex/206a0059b5439582a083b0c4724d88751/wnpxrz}, keywords = {proj:tags proj:bk binary paper projection data toread proj:et} } @inproceedings{Tatti2006, title = {What is the Dimension of Your Binary Data?}, address = {Washington, DC, USA}, author = {Nikolaj Tatti and Taneli Mielikainen and Aristides Gionis and Heikki Mannila}, booktitle = {ICDM '06: Proceedings of the Sixth International Conference on Data Mining}, pages = {603--612}, publisher = {IEEE Computer Society}, year = 2006, url = {http://portal.acm.org/citation.cfm?id=1193362}, isbn = {0-7695-2701-9}, doi = {http://dx.doi.org/10.1109/ICDM.2006.167}, description = {What is the Dimension of Your Binary Data?}, abstract = {Many 0/1 datasets have a very large number of variables; however, they are sparse and the dependency structure of the variables is simpler than the number of variables would suggest. Defining the effective dimensionality of such a dataset is a nontrivial problem. We consider the problem of defining a robust measure of dimension for 0/1 datasets, and show that the basic idea of fractal dimension can be adapted for binary data. However, as such the fractal dimension is difficult to interpret. Hence we introduce the concept of normalized fractal dimension. For a dataset D, its normalized fractal dimension counts the number of independent columns needed to achieve the unnormalized fractal dimension of D. The normalized fractal dimension measures the degree of dependency structure of the data. We study the properties of the normalized fractal dimension and discuss its computation. We give empirical results on the normalized fractal dimension, comparing it against PCA.}, biburl = {http://www.bibsonomy.org/bibtex/2fa246f9a5491796291788980ccf3d930/wnpxrz}, keywords = {proj:tags proj:bk dimension binary paper data toread proj:et imported} }