We study data fusion under the assumption that data source-specific variation is irrelevant and only shared variation is relevant. Traditionally the shared variation has been sought by maximizing a dependency measure, such as correlation of linear projections in Canonical Correlation Analysis. In this traditional framework it is hard to tackle overfitting and model order selection, and thus we turn to probabilistic generative modeling which makes all tools of Bayesian inference applicable. We introduce a family of probabilistic models for the same task, and present conditions under which they seek dependency. We show that probabilistic CCA is a special case of the model family, and derive a new dependency-seeking clustering algorithm as another example. The solution is computed with variational Bayes.
Description
Scientific Commons: Preprinted with permission. Probabilistic approach to detecting dependencies between data sets (2010), 2010-05-24 [Arto Klami, Samuel Kaski]
%0 Journal Article
%1 ArtoKlami2010
%A Klami, Arto
%A Kaski, Samuel
%D 2010
%K bio imported
%T Preprinted with permission. Probabilistic approach to detecting dependencies between data sets
%U http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.164.7736
%X We study data fusion under the assumption that data source-specific variation is irrelevant and only shared variation is relevant. Traditionally the shared variation has been sought by maximizing a dependency measure, such as correlation of linear projections in Canonical Correlation Analysis. In this traditional framework it is hard to tackle overfitting and model order selection, and thus we turn to probabilistic generative modeling which makes all tools of Bayesian inference applicable. We introduce a family of probabilistic models for the same task, and present conditions under which they seek dependency. We show that probabilistic CCA is a special case of the model family, and derive a new dependency-seeking clustering algorithm as another example. The solution is computed with variational Bayes.
@article{ArtoKlami2010,
abstract = {We study data fusion under the assumption that data source-specific variation is irrelevant and only shared variation is relevant. Traditionally the shared variation has been sought by maximizing a dependency measure, such as correlation of linear projections in Canonical Correlation Analysis. In this traditional framework it is hard to tackle overfitting and model order selection, and thus we turn to probabilistic generative modeling which makes all tools of Bayesian inference applicable. We introduce a family of probabilistic models for the same task, and present conditions under which they seek dependency. We show that probabilistic CCA is a special case of the model family, and derive a new dependency-seeking clustering algorithm as another example. The solution is computed with variational Bayes.},
added-at = {2010-05-25T14:37:09.000+0200},
author = {Klami, Arto and Kaski, Samuel},
biburl = {https://www.bibsonomy.org/bibtex/21856e90e218e550c30a625cd9274e972/wnpxrz},
description = {Scientific Commons: Preprinted with permission. Probabilistic approach to detecting dependencies between data sets (2010), 2010-05-24 [Arto Klami, Samuel Kaski]},
institution = {CiteSeerX - Scientific Literature Digital Library and Search Engine [http://citeseerx.ist.psu.edu/oai2] (United States)},
interhash = {8ff43d23bfe78bc78c242a278b8c50b3},
intrahash = {1856e90e218e550c30a625cd9274e972},
keywords = {bio imported},
location = {http://www.scientificcommons.org/57569435},
timestamp = {2010-05-25T14:37:09.000+0200},
title = {Preprinted with permission. Probabilistic approach to detecting dependencies between data sets},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.164.7736},
year = 2010
}