Semantic similarity between words is becoming a generic problem for many applications of computational linguistics and artificial intelligence. This paper explores the determination of semantic similarity by a number of information sources, which consist of structural semantic information from a lexical taxonomy and information content from a corpus. To investigate how information sources could be used effectively, a variety of strategies for using various possible information sources are implemented. A new measure is then proposed which combines information sources nonlinearly. Experimental evaluation against a benchmark set of human similarity ratings demonstrates that the proposed measure significantly outperforms traditional similarity measures.
%0 Journal Article
%1 LBM03
%A Li, Yuhua
%A Bandar, Zuhair A.
%A Mclean, David
%C Piscataway, NJ, USA
%D 2003
%I IEEE Educational Activities Department
%J IEEE Trans. on Knowl. and Data Eng.
%K WT08_goalmatch ranking semantic_similarity
%N 4
%P 871--882
%R http://dx.doi.org/10.1109/TKDE.2003.1209005
%T An Approach for Measuring Semantic Similarity between Words Using Multiple Information Sources
%U http://dx.doi.org/10.1109/TKDE.2003.1209005
%V 15
%X Semantic similarity between words is becoming a generic problem for many applications of computational linguistics and artificial intelligence. This paper explores the determination of semantic similarity by a number of information sources, which consist of structural semantic information from a lexical taxonomy and information content from a corpus. To investigate how information sources could be used effectively, a variety of strategies for using various possible information sources are implemented. A new measure is then proposed which combines information sources nonlinearly. Experimental evaluation against a benchmark set of human similarity ratings demonstrates that the proposed measure significantly outperforms traditional similarity measures.
@article{LBM03,
abstract = {Semantic similarity between words is becoming a generic problem for many applications of computational linguistics and artificial intelligence. This paper explores the determination of semantic similarity by a number of information sources, which consist of structural semantic information from a lexical taxonomy and information content from a corpus. To investigate how information sources could be used effectively, a variety of strategies for using various possible information sources are implemented. A new measure is then proposed which combines information sources nonlinearly. Experimental evaluation against a benchmark set of human similarity ratings demonstrates that the proposed measure significantly outperforms traditional similarity measures.},
added-at = {2009-03-26T12:50:01.000+0100},
address = {Piscataway, NJ, USA},
author = {Li, Yuhua and Bandar, Zuhair A. and Mclean, David},
biburl = {https://www.bibsonomy.org/bibtex/2dc79dd1172250cd936302ab6c00f75b5/wt_08},
citeulike-article-id = {3850810},
doi = {http://dx.doi.org/10.1109/TKDE.2003.1209005},
interhash = {cf273fc35c8c460e481c9b51d42d7058},
intrahash = {dc79dd1172250cd936302ab6c00f75b5},
issn = {1041-4347},
journal = {IEEE Trans. on Knowl. and Data Eng.},
keywords = {WT08_goalmatch ranking semantic_similarity},
number = 4,
pages = {871--882},
posted-at = {2009-01-05 15:08:48},
priority = {5},
publisher = {IEEE Educational Activities Department},
timestamp = {2009-03-26T12:50:01.000+0100},
title = {An Approach for Measuring Semantic Similarity between Words Using Multiple Information Sources},
url = {http://dx.doi.org/10.1109/TKDE.2003.1209005},
volume = 15,
year = 2003
}