We have produced an open source, freely available, algorithm (Open Parser for Systematic IUPAC Nomenclature, OPSIN) that interprets the majority of organic chemical nomenclature in a fast and precise manner. This has been achieved using an approach based on a regular grammar. This grammar is used to guide tokenization, a potentially difficult problem in chemical names. From the parsed chemical name, an XML parse tree is constructed that is operated on in a stepwise manner until the structure has been reconstructed from the name. Results from OPSIN on various computer generated name/structure pair sets are presented. These show exceptionally high precision (99.8\%+) and, when using general organic chemical nomenclature, high recall (98.7-99.2\%). This software can serve as the basis for future open source developments of chemical name interpretation.
%0 Journal Article
%1 Lowe2011
%A Lowe, Daniel M.
%A Corbett, Peter T.
%A Murray-Rust, Peter
%A Glen, Robert C.
%B Journal of Chemical Information and Modeling
%D 2011
%I American Chemical Society
%J Journal of Chemical Information and Modeling
%K *cul iupac-nomenclature oclcml open-source opsin xml
%N 3
%P 739--753
%R 10.1021/ci100384d
%T Chemical Name to Structure: OPSIN, an Open Source Solution
%U http://dx.doi.org/10.1021/ci100384d
%V 51
%X We have produced an open source, freely available, algorithm (Open Parser for Systematic IUPAC Nomenclature, OPSIN) that interprets the majority of organic chemical nomenclature in a fast and precise manner. This has been achieved using an approach based on a regular grammar. This grammar is used to guide tokenization, a potentially difficult problem in chemical names. From the parsed chemical name, an XML parse tree is constructed that is operated on in a stepwise manner until the structure has been reconstructed from the name. Results from OPSIN on various computer generated name/structure pair sets are presented. These show exceptionally high precision (99.8\%+) and, when using general organic chemical nomenclature, high recall (98.7-99.2\%). This software can serve as the basis for future open source developments of chemical name interpretation.
@article{Lowe2011,
abstract = {{We have produced an open source, freely available, algorithm (Open Parser for Systematic IUPAC Nomenclature, OPSIN) that interprets the majority of organic chemical nomenclature in a fast and precise manner. This has been achieved using an approach based on a regular grammar. This grammar is used to guide tokenization, a potentially difficult problem in chemical names. From the parsed chemical name, an XML parse tree is constructed that is operated on in a stepwise manner until the structure has been reconstructed from the name. Results from OPSIN on various computer generated name/structure pair sets are presented. These show exceptionally high precision (99.8\%+) and, when using general organic chemical nomenclature, high recall (98.7-99.2\%). This software can serve as the basis for future open source developments of chemical name interpretation.}},
added-at = {2019-03-11T21:00:05.000+0100},
author = {Lowe, Daniel M. and Corbett, Peter T. and Murray-Rust, Peter and Glen, Robert C.},
biburl = {https://www.bibsonomy.org/bibtex/297f54667d4707c58b255881831ec97f9/fairybasslet},
booktitle = {Journal of Chemical Information and Modeling},
citeulike-article-id = {8972090},
citeulike-linkout-0 = {http://dx.doi.org/10.1021/ci100384d},
citeulike-linkout-1 = {http://pubs.acs.org/doi/abs/10.1021/ci100384d},
citeulike-linkout-2 = {http://view.ncbi.nlm.nih.gov/pubmed/21384929},
citeulike-linkout-3 = {http://www.hubmed.org/display.cgi?uids=21384929},
comment = {1549-9596},
day = 28,
doi = {10.1021/ci100384d},
interhash = {974c56acf0dd197a239077a477b76e83},
intrahash = {97f54667d4707c58b255881831ec97f9},
issn = {1549-960X},
journal = {Journal of Chemical Information and Modeling},
keywords = {*cul iupac-nomenclature oclcml open-source opsin xml},
month = mar,
number = 3,
pages = {739--753},
pmid = {21384929},
posted-at = {2011-11-27 12:10:49},
priority = {2},
publisher = {American Chemical Society},
timestamp = {2019-03-11T21:06:37.000+0100},
title = {{Chemical Name to Structure: OPSIN, an Open Source Solution}},
url = {http://dx.doi.org/10.1021/ci100384d},
volume = 51,
year = 2011
}