Proportional K-Interval Discretization for Naive-Bayes Classifiers
Y. Yang, and G. Webb. Lecture Notes in Computer Science 2167: Proceedings of the 12th European Conference on Machine Learning (ECML'01), page 564-575. Berlin/Heidelberg, Springer-Verlag, (2001)
Abstract
This paper argues that two commonly-used discretization approaches, fixed k-interval discretization and entropy-based discretization have sub-optimal characteristics for naive-Bayes classification. This analysis leads to a new discretization method, Proportional k-Interval Discretization (PKID), which adjusts the number and size of discretized intervals to the number of training instances, thus seeks an appropriate trade-off between the bias and variance of the probability estimation for naive-Bayes classifiers. We justify PKID in theory, as well as test it on a wide cross-section of datasets. Our experimental results suggest that in comparison to its alternatives, PKID provides naive-Bayes classifiers competitive classification performance for smaller datasets and better classification performance for larger datasets.
%0 Conference Paper
%1 YangWebb01
%A Yang, Y.
%A Webb, G.I.
%B Lecture Notes in Computer Science 2167: Proceedings of the 12th European Conference on Machine Learning (ECML'01)
%C Berlin/Heidelberg
%D 2001
%E DeRaedt, L.
%E Flach, P. A.
%I Springer-Verlag
%K PKID
%P 564-575
%T Proportional K-Interval Discretization for Naive-Bayes Classifiers
%X This paper argues that two commonly-used discretization approaches, fixed k-interval discretization and entropy-based discretization have sub-optimal characteristics for naive-Bayes classification. This analysis leads to a new discretization method, Proportional k-Interval Discretization (PKID), which adjusts the number and size of discretized intervals to the number of training instances, thus seeks an appropriate trade-off between the bias and variance of the probability estimation for naive-Bayes classifiers. We justify PKID in theory, as well as test it on a wide cross-section of datasets. Our experimental results suggest that in comparison to its alternatives, PKID provides naive-Bayes classifiers competitive classification performance for smaller datasets and better classification performance for larger datasets.
@inproceedings{YangWebb01,
abstract = {This paper argues that two commonly-used discretization approaches, fixed k-interval discretization and entropy-based discretization have sub-optimal characteristics for naive-Bayes classification. This analysis leads to a new discretization method, Proportional k-Interval Discretization (PKID), which adjusts the number and size of discretized intervals to the number of training instances, thus seeks an appropriate trade-off between the bias and variance of the probability estimation for naive-Bayes classifiers. We justify PKID in theory, as well as test it on a wide cross-section of datasets. Our experimental results suggest that in comparison to its alternatives, PKID provides naive-Bayes classifiers competitive classification performance for smaller datasets and better classification performance for larger datasets.},
added-at = {2016-03-20T05:42:04.000+0100},
address = {Berlin/Heidelberg},
audit-trail = {http://link.springer.de/link/service/series/0558/bibs/2167/21670564.htm},
author = {Yang, Y. and Webb, G.I.},
biburl = {https://www.bibsonomy.org/bibtex/20cc097a01709315db04a8b1f3cb98c5a/giwebb},
booktitle = {Lecture Notes in Computer Science 2167: Proceedings of the 12th European Conference on Machine Learning (ECML'01)},
editor = {DeRaedt, L. and Flach, P. A.},
interhash = {8c8e8b729695426c981d8b7832eb6058},
intrahash = {0cc097a01709315db04a8b1f3cb98c5a},
keywords = {PKID},
location = {Freiburg, Germany},
pages = {564-575},
publisher = {Springer-Verlag},
timestamp = {2016-03-20T05:42:04.000+0100},
title = {Proportional K-Interval Discretization for Naive-Bayes Classifiers},
year = 2001
}