Many stream classification algorithms use the Hoeffding Inequality to identify the best split attribute during tree induction.
We show that the prerequisites of the Inequality are violated by these algorithms, and we propose corrective steps. The new stream classification core, correctedVFDT, satisfies the prerequisites of the Hoeffding Inequality and thus provides the expected performance guarantees.
The goal of our work is not to improve accuracy, but to guarantee a reliable and interpretable error bound. Nonetheless, we show that our solution achieves lower error rates regarding split attributes and sooner split decisions while maintaining a similar level of accuracy.
%0 Book Section
%1 noKey
%A Matuszyk, Pawel
%A Krempl, Georg
%A Spiliopoulou, Myra
%B Advances in Intelligent Data Analysis XII
%D 2013
%E Tucker, Allan
%E Höppner, Frank
%E Siebes, Arno
%E Swift, Stephen
%I Springer Berlin Heidelberg
%K from:matuszyk
%P 298-309
%R 10.1007/978-3-642-41398-8_26
%T Correcting the Usage of the Hoeffding Inequality in Stream Mining
%U https://kmd.cs.ovgu.de/pub/matuszyk/MatuszykEtAl13.pdf
%V 8207
%X Many stream classification algorithms use the Hoeffding Inequality to identify the best split attribute during tree induction.
We show that the prerequisites of the Inequality are violated by these algorithms, and we propose corrective steps. The new stream classification core, correctedVFDT, satisfies the prerequisites of the Hoeffding Inequality and thus provides the expected performance guarantees.
The goal of our work is not to improve accuracy, but to guarantee a reliable and interpretable error bound. Nonetheless, we show that our solution achieves lower error rates regarding split attributes and sooner split decisions while maintaining a similar level of accuracy.
%@ 978-3-642-41397-1
@incollection{noKey,
abstract = {Many stream classification algorithms use the Hoeffding Inequality to identify the best split attribute during tree induction.
We show that the prerequisites of the Inequality are violated by these algorithms, and we propose corrective steps. The new stream classification core, correctedVFDT, satisfies the prerequisites of the Hoeffding Inequality and thus provides the expected performance guarantees.
The goal of our work is not to improve accuracy, but to guarantee a reliable and interpretable error bound. Nonetheless, we show that our solution achieves lower error rates regarding split attributes and sooner split decisions while maintaining a similar level of accuracy.},
added-at = {2015-10-15T21:32:42.000+0200},
author = {Matuszyk, Pawel and Krempl, Georg and Spiliopoulou, Myra},
biburl = {https://www.bibsonomy.org/bibtex/2fb09ed0fcff3fdd9439fe68f381936db/kmd-ovgu},
booktitle = {Advances in Intelligent Data Analysis XII},
doi = {10.1007/978-3-642-41398-8_26},
editor = {Tucker, Allan and Höppner, Frank and Siebes, Arno and Swift, Stephen},
interhash = {7c61250bd3c9f634bd5f85946af660b8},
intrahash = {fb09ed0fcff3fdd9439fe68f381936db},
isbn = {978-3-642-41397-1},
keywords = {from:matuszyk},
language = {English},
pages = {298-309},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
timestamp = {2018-03-09T12:42:52.000+0100},
title = {Correcting the Usage of the Hoeffding Inequality in Stream Mining},
url = {https://kmd.cs.ovgu.de/pub/matuszyk/MatuszykEtAl13.pdf},
volume = 8207,
year = 2013
}