MOTIVATION: Progressive Multiple Sequence Alignment (MSA) methods depend on reducing an MSA to a linear profile for each alignment step. However, this leads to loss of information needed for accurate alignment, and gap scoring artifacts. RESULTS: We present a graph representation of an MSA that can itself be aligned directly by pairwise dynamic programming, eliminating the need to reduce the MSA to a profile. This enables our algorithm (Partial Order Alignment (POA)) to guarantee that the optimal alignment of each new sequence versus each sequence in the MSA will be considered. Moreover, this algorithm introduces a new edit operator, homologous recombination, important for multidomain sequences. The algorithm has improved speed (linear time complexity) over existing MSA algorithms, enabling construction of massive and complex alignments (e.g. an alignment of 5000 sequences in 4 h on a Pentium II). We demonstrate the utility of this algorithm on a family of multidomain SH2 proteins, and on EST assemblies containing alternative splicing and polymorphism. AVAILABILITY: The partial order alignment program POA is available at http://www.bioinformatics.ucla.edu/poa.
%0 Journal Article
%1 Lee:2002aa
%A Lee, Christopher
%A Grasso, Catherine
%A Sharlow, Mark F
%D 2002
%J Bioinformatics
%K Adenylyltransferase, Algorithms, Alignment, Base Control DNA, Data, Databases, Expressed Factors, Genetic, Glucose-1-Phosphate Homology, Humans, Messenger, Models, Molecular Nucleotidyltransferases, Plant Proteins, Quality RNA, Sensitivity Sequence Sequence, Software, Specificity, Statistical, Tags, Time and
%N 3
%P 452--64
%T Multiple sequence alignment using partial order graphs
%V 18
%X MOTIVATION: Progressive Multiple Sequence Alignment (MSA) methods depend on reducing an MSA to a linear profile for each alignment step. However, this leads to loss of information needed for accurate alignment, and gap scoring artifacts. RESULTS: We present a graph representation of an MSA that can itself be aligned directly by pairwise dynamic programming, eliminating the need to reduce the MSA to a profile. This enables our algorithm (Partial Order Alignment (POA)) to guarantee that the optimal alignment of each new sequence versus each sequence in the MSA will be considered. Moreover, this algorithm introduces a new edit operator, homologous recombination, important for multidomain sequences. The algorithm has improved speed (linear time complexity) over existing MSA algorithms, enabling construction of massive and complex alignments (e.g. an alignment of 5000 sequences in 4 h on a Pentium II). We demonstrate the utility of this algorithm on a family of multidomain SH2 proteins, and on EST assemblies containing alternative splicing and polymorphism. AVAILABILITY: The partial order alignment program POA is available at http://www.bioinformatics.ucla.edu/poa.
@article{Lee:2002aa,
abstract = {MOTIVATION: Progressive Multiple Sequence Alignment (MSA) methods depend on reducing an MSA to a linear profile for each alignment step. However, this leads to loss of information needed for accurate alignment, and gap scoring artifacts. RESULTS: We present a graph representation of an MSA that can itself be aligned directly by pairwise dynamic programming, eliminating the need to reduce the MSA to a profile. This enables our algorithm (Partial Order Alignment (POA)) to guarantee that the optimal alignment of each new sequence versus each sequence in the MSA will be considered. Moreover, this algorithm introduces a new edit operator, homologous recombination, important for multidomain sequences. The algorithm has improved speed (linear time complexity) over existing MSA algorithms, enabling construction of massive and complex alignments (e.g. an alignment of 5000 sequences in 4 h on a Pentium II). We demonstrate the utility of this algorithm on a family of multidomain SH2 proteins, and on EST assemblies containing alternative splicing and polymorphism. AVAILABILITY: The partial order alignment program POA is available at http://www.bioinformatics.ucla.edu/poa.},
added-at = {2007-09-17T20:19:41.000+0200},
affiliation = {Department of Chemistry and Biochemistry, University of California, Los Angeles, Los Angeles, CA 90095-1570, USA. leec@mbi.ucla.edu},
author = {Lee, Christopher and Grasso, Catherine and Sharlow, Mark F},
biburl = {https://www.bibsonomy.org/bibtex/281238c67b322e3148e22776c9d9cbf3d/dzerbino},
interhash = {3f4f1dd6276f6899961c841019646a24},
intrahash = {81238c67b322e3148e22776c9d9cbf3d},
issue = {3},
journal = {Bioinformatics},
keywords = {Adenylyltransferase, Algorithms, Alignment, Base Control DNA, Data, Databases, Expressed Factors, Genetic, Glucose-1-Phosphate Homology, Humans, Messenger, Models, Molecular Nucleotidyltransferases, Plant Proteins, Quality RNA, Sensitivity Sequence Sequence, Software, Specificity, Statistical, Tags, Time and},
language = {English},
local-url = {file://localhost/Users/danielzerbino/Documents/Papers/2002/Lee/Bioinformatics%202002%20Lee.pdf},
month = Mar,
number = 3,
pages = {452--64},
pmid = {11934745},
timestamp = {2007-09-17T20:19:43.000+0200},
title = {Multiple sequence alignment using partial order graphs},
uri = {papers://055852FE-1648-42FE-91D0-8CA474D2B905/Paper/p9},
volume = 18,
year = 2002
}