Scaling to very very large corpora for natural language disambiguation
M. Banko, and E. Brill. ACL '01: Proceedings of the 39th Annual Meeting on Association for Computational Linguistics, page 26--33. Morristown, NJ, USA, Association for Computational Linguistics, (2001)
DOI: http://dx.doi.org/10.3115/1073012.1073017
Description
With a billion word corpus, your algorithm doesn't matter - and you can skip all your clever tricks.
Also, active learning works better with huge data sets to pick interesting examples from.
%0 Conference Paper
%1 1073017
%A Banko, Michele
%A Brill, Eric
%B ACL '01: Proceedings of the 39th Annual Meeting on Association for Computational Linguistics
%C Morristown, NJ, USA
%D 2001
%I Association for Computational Linguistics
%K data machine-learning more-data natural-language-processing
%P 26--33
%R http://dx.doi.org/10.3115/1073012.1073017
%T Scaling to very very large corpora for natural language disambiguation
@inproceedings{1073017,
added-at = {2011-10-14T15:24:09.000+0200},
address = {Morristown, NJ, USA},
author = {Banko, Michele and Brill, Eric},
biburl = {https://www.bibsonomy.org/bibtex/27d0bd5b964c1fc33bd303c0ecb143d47/gromgull},
booktitle = {ACL '01: Proceedings of the 39th Annual Meeting on Association for Computational Linguistics},
description = {With a billion word corpus, your algorithm doesn't matter - and you can skip all your clever tricks.
Also, active learning works better with huge data sets to pick interesting examples from. },
doi = {http://dx.doi.org/10.3115/1073012.1073017},
interhash = {6b6b98539e848e6d0fb9b427be12dd9e},
intrahash = {7d0bd5b964c1fc33bd303c0ecb143d47},
keywords = {data machine-learning more-data natural-language-processing},
location = {Toulouse, France},
pages = {26--33},
publisher = {Association for Computational Linguistics},
timestamp = {2011-10-14T15:24:09.000+0200},
title = {Scaling to very very large corpora for natural language disambiguation},
year = 2001
}