A problem which arises in the course of research on mechanical translation is the prediction of dictionary size. This article investigates the relation between empirical frequency laws and the function V(n)-the expected number of different words in an n-word sample of text. It is found that the probability-law proposed by Joos (1936) yields results which do not check well with experiments, and it is concluded that some modification of it is necessary for the purpose of vocabulary prediction.
%0 Journal Article
%1 koutsoudas1957
%A Koutsoudas, Andreas
%D 1957
%I Linguistic Society of America
%J Language
%K d4.1 dictionary law linguistics size statistics tagora text words zipf
%N 4
%P 545-552
%T Mechanical Translation and Zipf's Law
%U http://links.jstor.org/sici?sici=0097-8507%28195710%2F12%2933%3A4%3C545%3AMTAZL%3E2.0.CO%3B2-Q
%V 33
%X A problem which arises in the course of research on mechanical translation is the prediction of dictionary size. This article investigates the relation between empirical frequency laws and the function V(n)-the expected number of different words in an n-word sample of text. It is found that the probability-law proposed by Joos (1936) yields results which do not check well with experiments, and it is concluded that some modification of it is necessary for the purpose of vocabulary prediction.
@article{koutsoudas1957,
abstract = {A problem which arises in the course of research on mechanical translation is the prediction of dictionary size. This article investigates the relation between empirical frequency laws and the function V(n)-the expected number of different words in an n-word sample of text. It is found that the probability-law proposed by Joos (1936) yields results which do not check well with experiments, and it is concluded that some modification of it is necessary for the purpose of vocabulary prediction.},
added-at = {2006-12-04T17:56:39.000+0100},
author = {Koutsoudas, Andreas},
biburl = {https://www.bibsonomy.org/bibtex/2d2041174b1dca948c7183399f9a76a20/andreab},
interhash = {ec0508c04198cf5db4b823de959dc002},
intrahash = {d2041174b1dca948c7183399f9a76a20},
journal = {Language},
keywords = {d4.1 dictionary law linguistics size statistics tagora text words zipf},
number = 4,
pages = {545-552},
publisher = {Linguistic Society of America},
timestamp = {2007-03-09T18:50:40.000+0100},
title = {Mechanical Translation and Zipf's Law},
url = {http://links.jstor.org/sici?sici=0097-8507%28195710%2F12%2933%3A4%3C545%3AMTAZL%3E2.0.CO%3B2-Q},
volume = 33,
year = 1957
}