Molecular gene-expression datasets consist of samples with
tens of thousands of measured quantities (e.g., high dimensional data).
However, there exist lower-dimensional representations that retain the
useful information. We present a novel algorithm for such dimensionality
reduction called Pathway Activity Score Learning (PASL). The major
novelty of PASL is that the constructed features directly correspond to
known molecular pathways and can be interpreted as pathway activity
scores. Hence, unlike PCA and similar methods, PASL’s latent space
has a relatively straight-forward biological interpretation. As a use-case,
PASL is applied on two collections of breast cancer and leukemia gene
expression datasets. We show that PASL does retain the predictive information for disease classification on new, unseen datasets, as well as
outperforming PLIER, a recently proposed competitive method. We also
show that differential activation pathway analysis provides complementary information to standard gene set enrichment analysis. The code is
available at https://github.com/mensxmachina/PASL.
%0 Journal Article
%1 noauthororeditor
%A Karagiannaki, Ioulia
%A Pantazis, Yannis
%A Chatzaki, Ekaterini
%A Tsamardinos, Ioannis
%D 2020
%E "Tsoumakas, G"
%E "Manolopoulos, Y"
%E "Matwin, S"
%J Discovery Science. DS 2020. Lecture Notes in Computer Science
%K mxmcausalpath
%P 246-261
%R https://doi.org/10.1007/978-3-030-61527-7_17
%T Pathway Activity Score Learning for Dimensionality Reduction of Gene Expression Data
%U https://link.springer.com/chapter/10.1007%2F978-3-030-61527-7_17
%V 12323
%X Molecular gene-expression datasets consist of samples with
tens of thousands of measured quantities (e.g., high dimensional data).
However, there exist lower-dimensional representations that retain the
useful information. We present a novel algorithm for such dimensionality
reduction called Pathway Activity Score Learning (PASL). The major
novelty of PASL is that the constructed features directly correspond to
known molecular pathways and can be interpreted as pathway activity
scores. Hence, unlike PCA and similar methods, PASL’s latent space
has a relatively straight-forward biological interpretation. As a use-case,
PASL is applied on two collections of breast cancer and leukemia gene
expression datasets. We show that PASL does retain the predictive information for disease classification on new, unseen datasets, as well as
outperforming PLIER, a recently proposed competitive method. We also
show that differential activation pathway analysis provides complementary information to standard gene set enrichment analysis. The code is
available at https://github.com/mensxmachina/PASL.
@article{noauthororeditor,
abstract = {Molecular gene-expression datasets consist of samples with
tens of thousands of measured quantities (e.g., high dimensional data).
However, there exist lower-dimensional representations that retain the
useful information. We present a novel algorithm for such dimensionality
reduction called Pathway Activity Score Learning (PASL). The major
novelty of PASL is that the constructed features directly correspond to
known molecular pathways and can be interpreted as pathway activity
scores. Hence, unlike PCA and similar methods, PASL’s latent space
has a relatively straight-forward biological interpretation. As a use-case,
PASL is applied on two collections of breast cancer and leukemia gene
expression datasets. We show that PASL does retain the predictive information for disease classification on new, unseen datasets, as well as
outperforming PLIER, a recently proposed competitive method. We also
show that differential activation pathway analysis provides complementary information to standard gene set enrichment analysis. The code is
available at https://github.com/mensxmachina/PASL.},
added-at = {2020-09-07T12:31:50.000+0200},
author = {Karagiannaki, Ioulia and Pantazis, Yannis and Chatzaki, Ekaterini and Tsamardinos, Ioannis},
biburl = {https://www.bibsonomy.org/bibtex/25aa1c97303026e34c4c5d8a76d116652/mensxmachina},
doi = {https://doi.org/10.1007/978-3-030-61527-7_17},
editor = {"Tsoumakas, G" and "Manolopoulos, Y" and "Matwin, S"},
interhash = {250e1c55d999f5493581587cf0627a28},
intrahash = {5aa1c97303026e34c4c5d8a76d116652},
journal = {Discovery Science. DS 2020. Lecture Notes in Computer Science},
keywords = {mxmcausalpath},
pages = {246-261},
timestamp = {2021-03-18T08:43:09.000+0100},
title = {Pathway Activity Score Learning for Dimensionality Reduction of Gene Expression Data},
url = {https://link.springer.com/chapter/10.1007%2F978-3-030-61527-7_17},
volume = 12323,
year = 2020
}