We present a method for measuring the semantic similarity of texts using a corpus-based measure of semantic word similarity and a normalized and modified version of the Longest Common Subsequence (LCS) string matching algorithm. Existing methods for computing text similarity have
focused mainly on either large documents or individual words. We focus on computing the similarity between two sentences or two short paragraphs. The proposed method can be exploited in a variety of applications involving textual knowledge representation and knowledge discovery. Evaluation
results on two different data sets show that our method outperforms several competing methods.
%0 Journal Article
%1 Islam:2008
%A Islam, Aminul
%A Inkpen, Diana
%C New York, NY, USA
%D 2008
%I ACM
%J ACM Transactions on Knowledge Discovery from Data
%K similarity
%N 2
%P 1--25
%R http://dx.doi.org/10.1145/1376815.1376819
%T Semantic text similarity using corpus-based word similarity and string similarity
%U http://dx.doi.org/10.1145/1376815.1376819
%V 2
%X We present a method for measuring the semantic similarity of texts using a corpus-based measure of semantic word similarity and a normalized and modified version of the Longest Common Subsequence (LCS) string matching algorithm. Existing methods for computing text similarity have
focused mainly on either large documents or individual words. We focus on computing the similarity between two sentences or two short paragraphs. The proposed method can be exploited in a variety of applications involving textual knowledge representation and knowledge discovery. Evaluation
results on two different data sets show that our method outperforms several competing methods.
@article{Islam:2008,
abstract = {We present a method for measuring the semantic similarity of texts using a corpus-based measure of semantic word similarity and a normalized and modified version of the Longest Common Subsequence (LCS) string matching algorithm. Existing methods for computing text similarity have
focused mainly on either large documents or individual words. We focus on computing the similarity between two sentences or two short paragraphs. The proposed method can be exploited in a variety of applications involving textual knowledge representation and knowledge discovery. Evaluation
results on two different data sets show that our method outperforms several competing methods.
},
added-at = {2009-02-13T09:32:13.000+0100},
address = {New York, NY, USA},
author = {Islam, Aminul and Inkpen, Diana},
biburl = {https://www.bibsonomy.org/bibtex/2141625e4f5eafd7300a8357246bb3cfa/diego_ma},
citeulike-article-id = {3108227},
doi = {http://dx.doi.org/10.1145/1376815.1376819},
interhash = {12bf810e5b23105c037d0a1503e417da},
intrahash = {141625e4f5eafd7300a8357246bb3cfa},
issn = {1556-4681},
journal = {ACM Transactions on Knowledge Discovery from Data},
keywords = {similarity},
month = {July},
number = 2,
pages = {1--25},
posted-at = {2008-12-02 02:56:17},
priority = {2},
publisher = {ACM},
timestamp = {2009-02-13T09:32:13.000+0100},
title = {Semantic text similarity using corpus-based word similarity and string similarity},
url = {http://dx.doi.org/10.1145/1376815.1376819},
volume = 2,
year = 2008
}