Recent work has shown that combining multiple versions of unstable classifiers such as trees or neural nets results in reduced test set error. One of the more effective is bagging. Here, modified training sets are formed by resampling from the original training set, classifiers constructed using these training sets and then combined by voting. Freund and Schapire propose an algorithm the basis of which is to adaptively resample and combine (hence the acronym “arcing”) so that the weights in the resampling are increased for those cases most often misclassified and the combining is done by weighted voting. Arcing is more successful than bagging in test set error reduction. We explore two arcing algorithms, compare them to each other and to bagging, and try to understand how arcing works. We introduce the definitions of bias and variance for a classifier as components of the test set error. Unstable classifiers can have low bias on a large range of data sets. Their problem is high variance. Combining multiple versions either through bagging or arcing reduces variance significantly.
%0 Journal Article
%1 breiman_arcing_1998
%A Breiman, Leo
%D 1998
%J The Annals of Statistics
%K Carlo, Decision Ensemble Markov Monte bagging, boosting, chain, coding error-correcting, methods, networks, neural output trees,
%N 3
%P 801--849
%R 10.1214/aos/1024691079
%T Arcing classifier (with discussion and a rejoinder by the author)
%U http://projecteuclid.org/euclid.aos/1024691079
%V 26
%X Recent work has shown that combining multiple versions of unstable classifiers such as trees or neural nets results in reduced test set error. One of the more effective is bagging. Here, modified training sets are formed by resampling from the original training set, classifiers constructed using these training sets and then combined by voting. Freund and Schapire propose an algorithm the basis of which is to adaptively resample and combine (hence the acronym “arcing”) so that the weights in the resampling are increased for those cases most often misclassified and the combining is done by weighted voting. Arcing is more successful than bagging in test set error reduction. We explore two arcing algorithms, compare them to each other and to bagging, and try to understand how arcing works. We introduce the definitions of bias and variance for a classifier as components of the test set error. Unstable classifiers can have low bias on a large range of data sets. Their problem is high variance. Combining multiple versions either through bagging or arcing reduces variance significantly.
@article{breiman_arcing_1998,
abstract = {Recent work has shown that combining multiple versions of unstable classifiers such as trees or neural nets results in reduced test set error. One of the more effective is bagging. Here, modified training sets are formed by resampling from the original training set, classifiers constructed using these training sets and then combined by voting. Freund and Schapire propose an algorithm the basis of which is to adaptively resample and combine (hence the acronym “arcing”) so that the weights in the resampling are increased for those cases most often misclassified and the combining is done by weighted voting. Arcing is more successful than bagging in test set error reduction. We explore two arcing algorithms, compare them to each other and to bagging, and try to understand how arcing works. We introduce the definitions of bias and variance for a classifier as components of the test set error. Unstable classifiers can have low bias on a large range of data sets. Their problem is high variance. Combining multiple versions either through bagging or arcing reduces variance significantly.},
added-at = {2017-01-09T13:57:26.000+0100},
author = {Breiman, Leo},
biburl = {https://www.bibsonomy.org/bibtex/256048c9cfaeb0a84e7a8f740b97f8c05/yourwelcome},
doi = {10.1214/aos/1024691079},
interhash = {d6f0509dcad79e916b03962918a4482a},
intrahash = {56048c9cfaeb0a84e7a8f740b97f8c05},
issn = {0090-5364, 2168-8966},
journal = {The Annals of Statistics},
keywords = {Carlo, Decision Ensemble Markov Monte bagging, boosting, chain, coding error-correcting, methods, networks, neural output trees,},
month = jun,
mrnumber = {MR1635406},
number = 3,
pages = {801--849},
timestamp = {2017-01-09T14:01:11.000+0100},
title = {Arcing classifier (with discussion and a rejoinder by the author)},
url = {http://projecteuclid.org/euclid.aos/1024691079},
urldate = {2016-04-18},
volume = 26,
year = 1998
}