We have found a method to automatically extract the
meaning of words and phrases from the world-wide-web
using Google page counts. The approach is novel in its
unrestricted problem domain, simplicity of
implementation, and manifestly ontological
underpinnings. The world-wide-web is the largest
database on earth, and the latent semantic context
information entered by millions of independent users
averages out to provide automatic meaning of useful
quality. We demonstrate positive correlations,
evidencing an underlying semantic structure, in both
numerical symbol notations and number-name words in a
variety of natural languages and contexts. Next, we
demonstrate the ability to distinguish between colours
and numbers, and to distinguish between 17th century
Dutch painters; the ability to understand electrical
terms, religious terms, and emergency incidents; we
conduct a massive experiment in understanding WordNet
categories; and finally we demonstrate the ability to
do a simple automatic English-Spanish translation.
%0 Journal Article
%1 cs.CL/0412098
%A Cilibrasi, Rudi
%A Vitanyi, Paul M. B.
%D 2004
%K distance-measure google machine-learning
%N cs.CL/0412098
%P 370-383
%T Automatic Meaning Discovery Using Google
%U http://homepages.cwi.nl/~paulv/papers/amdug.pdf
%X We have found a method to automatically extract the
meaning of words and phrases from the world-wide-web
using Google page counts. The approach is novel in its
unrestricted problem domain, simplicity of
implementation, and manifestly ontological
underpinnings. The world-wide-web is the largest
database on earth, and the latent semantic context
information entered by millions of independent users
averages out to provide automatic meaning of useful
quality. We demonstrate positive correlations,
evidencing an underlying semantic structure, in both
numerical symbol notations and number-name words in a
variety of natural languages and contexts. Next, we
demonstrate the ability to distinguish between colours
and numbers, and to distinguish between 17th century
Dutch painters; the ability to understand electrical
terms, religious terms, and emergency incidents; we
conduct a massive experiment in understanding WordNet
categories; and finally we demonstrate the ability to
do a simple automatic English-Spanish translation.
@article{cs.CL/0412098,
abstract = {We have found a method to automatically extract the
meaning of words and phrases from the world-wide-web
using Google page counts. The approach is novel in its
unrestricted problem domain, simplicity of
implementation, and manifestly ontological
underpinnings. The world-wide-web is the largest
database on earth, and the latent semantic context
information entered by millions of independent users
averages out to provide automatic meaning of useful
quality. We demonstrate positive correlations,
evidencing an underlying semantic structure, in both
numerical symbol notations and number-name words in a
variety of natural languages and contexts. Next, we
demonstrate the ability to distinguish between colours
and numbers, and to distinguish between 17th century
Dutch painters; the ability to understand electrical
terms, religious terms, and emergency incidents; we
conduct a massive experiment in understanding WordNet
categories; and finally we demonstrate the ability to
do a simple automatic English-Spanish translation.},
added-at = {2011-11-23T16:24:13.000+0100},
author = {Cilibrasi, Rudi and Vitanyi, Paul M. B.},
biburl = {https://www.bibsonomy.org/bibtex/250308d5168f519ce89a71fa67574ac25/gromgull},
interhash = {d0a6d81e08a236b41c69d12bad6406de},
intrahash = {50308d5168f519ce89a71fa67574ac25},
keywords = {distance-measure google machine-learning},
month = {15 March},
note = {v2},
notes = {ACM-class: I.2.4; I.2.7
Date (v1): Tue, 21 Dec 2004 16:05:36 GMT (127kb,S) Date
(revised v2): Tue, 15 Mar 2005 16:53:43 GMT
(58kb)
cited by \cite{graham-rowe:2005:complearn}
Code http://www.complearn.org/},
number = {cs.CL/0412098},
pages = {370-383},
size = {31 pages},
timestamp = {2011-11-23T16:25:57.000+0100},
title = {Automatic Meaning Discovery Using Google},
url = {http://homepages.cwi.nl/~paulv/papers/amdug.pdf},
year = 2004
}