Development of a robust and efficient approach for
extracting useful information from microarray data
continues to be a significant and challenging task.
Microarray data are characterised by a high dimension,
high signal-to-noise ratio, and high correlations
between genes, but with a relatively small sample size.
Current methods for dimensional reduction can further
be improved for the scenario of the presence of a
single (or a few) high influential gene(s) in which its
effect in the feature subset would prohibit inclusion
of other important genes. We have formalised a robust
gene selection approach based on a hybrid between
genetic algorithm and support vector machine. The major
goal of this hybridisation was to exploit fully their
respective merits (e.g., robustness to the size of
solution space and capability of handling a very large
dimension of feature genes) for identification of key
feature genes (or molecular signatures) for a complex
biological phenotype. We have applied the approach to
the microarray data of diffuse large B cell lymphoma to
demonstrate its behaviours and properties for mining
the high-dimension data of genome-wide gene expression
profiles. The resulting classifier(s) (the optimal gene
subset(s)) has achieved the highest accuracy
(99percent) for prediction of independent microarray
samples in comparisons with marginal filters and a
hybrid between genetic algorithm and K nearest
neighbours.
department of Bioinformatics, Harbin Medical
University, Harbin 150086, People's Republic of China
college of Biological Science and Technology, Tongji
University, Shanghai 200092, People's Republic of
China
department of Computer Science, Harbin Institute of
Technology, Harbin 150080, People's Republic of
China
department of Medicine, Institute of Human Genetics,
University of Minnesota, Minneapolis?St. Paul, MN
55455, USA
department of Otorhinolaryngology/Head and Neck
Surgery, Institute of Otolaryngology, Chinese PLA
General Hospital, Beijing 100853, People's Republic of
China
department of Cardiovascular Medicine and Department of
Molecular Cardiology, The Cleveland Clinic Foundation,
Cleveland, OH 44195,
USA
http://www.elsevier.com/wps/find/journaldescription.cws_home/622838/description#description
%0 Journal Article
%1 GA-SVM_optgenesubset
%A Li, Li
%A Jiang, Wei
%A Li, Xia
%A Moser, Kathy L.
%A Guo, Zheng
%A Du, Lei
%A Wang, Qiuju
%A Topol, Eric J.
%A Wang, Qing
%A Rao, Shaoqi
%D 2005
%J Genomics
%K DNA Feature Microarray Support algorithms, gene genetic machine, programming, selection, vector
%N 1
%P 16--23
%R doi:10.1016/j.ygeno.2004.09.007
%T A robust hybrid between genetic algorithm and support
vector machine for extracting an optimal feature gene
subset
%V 85
%X Development of a robust and efficient approach for
extracting useful information from microarray data
continues to be a significant and challenging task.
Microarray data are characterised by a high dimension,
high signal-to-noise ratio, and high correlations
between genes, but with a relatively small sample size.
Current methods for dimensional reduction can further
be improved for the scenario of the presence of a
single (or a few) high influential gene(s) in which its
effect in the feature subset would prohibit inclusion
of other important genes. We have formalised a robust
gene selection approach based on a hybrid between
genetic algorithm and support vector machine. The major
goal of this hybridisation was to exploit fully their
respective merits (e.g., robustness to the size of
solution space and capability of handling a very large
dimension of feature genes) for identification of key
feature genes (or molecular signatures) for a complex
biological phenotype. We have applied the approach to
the microarray data of diffuse large B cell lymphoma to
demonstrate its behaviours and properties for mining
the high-dimension data of genome-wide gene expression
profiles. The resulting classifier(s) (the optimal gene
subset(s)) has achieved the highest accuracy
(99percent) for prediction of independent microarray
samples in comparisons with marginal filters and a
hybrid between genetic algorithm and K nearest
neighbours.
@article{GA-SVM_optgenesubset,
abstract = {Development of a robust and efficient approach for
extracting useful information from microarray data
continues to be a significant and challenging task.
Microarray data are characterised by a high dimension,
high signal-to-noise ratio, and high correlations
between genes, but with a relatively small sample size.
Current methods for dimensional reduction can further
be improved for the scenario of the presence of a
single (or a few) high influential gene(s) in which its
effect in the feature subset would prohibit inclusion
of other important genes. We have formalised a robust
gene selection approach based on a hybrid between
genetic algorithm and support vector machine. The major
goal of this hybridisation was to exploit fully their
respective merits (e.g., robustness to the size of
solution space and capability of handling a very large
dimension of feature genes) for identification of key
feature genes (or molecular signatures) for a complex
biological phenotype. We have applied the approach to
the microarray data of diffuse large B cell lymphoma to
demonstrate its behaviours and properties for mining
the high-dimension data of genome-wide gene expression
profiles. The resulting classifier(s) (the optimal gene
subset(s)) has achieved the highest accuracy
(99percent) for prediction of independent microarray
samples in comparisons with marginal filters and a
hybrid between genetic algorithm and K nearest
neighbours.},
added-at = {2008-06-19T17:35:00.000+0200},
author = {Li, Li and Jiang, Wei and Li, Xia and Moser, Kathy L. and Guo, Zheng and Du, Lei and Wang, Qiuju and Topol, Eric J. and Wang, Qing and Rao, Shaoqi},
biburl = {https://www.bibsonomy.org/bibtex/274f8bb37259d69cdcd9285a31ccedbcd/brazovayeye},
doi = {doi:10.1016/j.ygeno.2004.09.007},
interhash = {200beeab92f70076ace1321f442f35a8},
intrahash = {74f8bb37259d69cdcd9285a31ccedbcd},
journal = {Genomics},
keywords = {DNA Feature Microarray Support algorithms, gene genetic machine, programming, selection, vector},
month = {January},
notes = {department of Bioinformatics, Harbin Medical
University, Harbin 150086, People's Republic of China
college of Biological Science and Technology, Tongji
University, Shanghai 200092, People's Republic of
China
department of Computer Science, Harbin Institute of
Technology, Harbin 150080, People's Republic of
China
department of Medicine, Institute of Human Genetics,
University of Minnesota, Minneapolis?St. Paul, MN
55455, USA
department of Otorhinolaryngology/Head and Neck
Surgery, Institute of Otolaryngology, Chinese PLA
General Hospital, Beijing 100853, People's Republic of
China
department of Cardiovascular Medicine and Department of
Molecular Cardiology, The Cleveland Clinic Foundation,
Cleveland, OH 44195,
USA
http://www.elsevier.com/wps/find/journaldescription.cws_home/622838/description#description},
number = 1,
pages = {16--23},
timestamp = {2008-06-19T17:45:30.000+0200},
title = {A robust hybrid between genetic algorithm and support
vector machine for extracting an optimal feature gene
subset},
volume = 85,
year = 2005
}