| Authors: |
Zheng Rong Yang
and Rebecca Thomson
and T. Charles Hodgman
and Jonathan Dry
and Austin K. Doyle
and Ajit Narayanan
and XiKun Wu
|
| URL: |
http://www.sciencedirect.com/science/article/B6T2K-49N9DN6-2/2/0d63ebb7904ac33ae0d20ce4f6477a57 |
| Tags: |
Amino
Polish
Proteolytic
The
acid
algorithms,
analysis
cleavage
genetic
matrix,
notation,
programming,
reverse
similarity
|
| Abstract: |
We present an algorithm which is able to extract
discriminant rules from oligopeptides for protease
proteolytic cleavage activity prediction. The algorithm
is developed using previous genetic programming. Three
important components in the algorithm are a min-max
scoring function, the reverse Polish notation (RPN) and
the use of minimum description length. The min-max
scoring function is developed using amino acid
similarity matrices for measuring the similarity
between an oligopeptide and a rule, which is a complex
algebraic equation of amino acids rather than a simple
pattern sequence. The Fisher ratio is then calculated
on the scoring values using the class label associated
with the oligopeptides. The discriminant ability of
each rule can therefore be evaluated. The use of RPN
makes the evolutionary operations simpler and therefore
reduces the computational cost. To prevent overfitting,
the concept of minimum description length is used to
penalize over-complicated rules. A fitness function is
therefore composed of the Fisher ratio and the use of
minimum description length for an efficient
evolutionary process. In the application to four
protease datasets (Trypsin, Factor Xa, Hepatitis C
Virus and HIV protease cleavage site prediction), our
algorithm is superior to C5, a conventional method for
deriving decision trees. |
@article{ZhengRongYang:2003:BS,
title = {Searching for discrimination rules in protease
proteolytic cleavage activity using genetic programming
with a min-max scoring function},
author = {Zheng Rong Yang and Rebecca Thomson and T. Charles Hodgman and Jonathan Dry and Austin K. Doyle and Ajit Narayanan and XiKun Wu},
journal = {Biosystems},
month = {November},
number = {1-2},
pages = {159--176},
url = {http://www.sciencedirect.com/science/article/B6T2K-49N9DN6-2/2/0d63ebb7904ac33ae0d20ce4f6477a57},
volume = {72},
year = {2003},
abstract = {We present an algorithm which is able to extract
discriminant rules from oligopeptides for protease
proteolytic cleavage activity prediction. The algorithm
is developed using previous genetic programming. Three
important components in the algorithm are a min-max
scoring function, the reverse Polish notation (RPN) and
the use of minimum description length. The min-max
scoring function is developed using amino acid
similarity matrices for measuring the similarity
between an oligopeptide and a rule, which is a complex
algebraic equation of amino acids rather than a simple
pattern sequence. The Fisher ratio is then calculated
on the scoring values using the class label associated
with the oligopeptides. The discriminant ability of
each rule can therefore be evaluated. The use of RPN
makes the evolutionary operations simpler and therefore
reduces the computational cost. To prevent overfitting,
the concept of minimum description length is used to
penalize over-complicated rules. A fitness function is
therefore composed of the Fisher ratio and the use of
minimum description length for an efficient
evolutionary process. In the application to four
protease datasets (Trypsin, Factor Xa, Hepatitis C
Virus and HIV protease cleavage site prediction), our
algorithm is superior to C5, a conventional method for
deriving decision trees.},
doi = {doi:10.1016/S0303-2647(03)00141-2},
keywords = {Amino Polish Proteolytic The acid algorithms, analysis cleavage genetic matrix, notation, programming, reverse similarity }
}