Linear classifiers have been shown to be effective for many discrimination tasks. Irrespective of the learning algorithm itself, the final classifier has a weight to multiply by each feature. This suggests that ideally each input feature should be linearly correlated with the target variable (or anti-correlated), whereas raw features may be highly non-linear. In this paper, we attempt to re-shape each input feature so that it is appropriate to use with a linear weight and to scale the different features in proportion to their predictive value. We demonstrate that this pre-processing is beneficial for linear SVM classifiers on a large benchmark of text classification tasks as well as UCI datasets.
%0 Conference Paper
%1 Forman09featureShaping
%A Forman, George
%A Scholz, Martin
%A Rajaram, Shyamsundar
%B KDD '09: Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining
%C New York, NY, USA
%D 2009
%I ACM
%K 09 Forman SVM classification feature selection shaping text
%P 299--308
%R http://doi.acm.org/10.1145/1557019.1557057
%T Feature shaping for linear SVM classifiers
%U http://portal.acm.org/citation.cfm?id=1557057&dl=ACM
%X Linear classifiers have been shown to be effective for many discrimination tasks. Irrespective of the learning algorithm itself, the final classifier has a weight to multiply by each feature. This suggests that ideally each input feature should be linearly correlated with the target variable (or anti-correlated), whereas raw features may be highly non-linear. In this paper, we attempt to re-shape each input feature so that it is appropriate to use with a linear weight and to scale the different features in proportion to their predictive value. We demonstrate that this pre-processing is beneficial for linear SVM classifiers on a large benchmark of text classification tasks as well as UCI datasets.
%@ 978-1-60558-495-9
@inproceedings{Forman09featureShaping,
abstract = {Linear classifiers have been shown to be effective for many discrimination tasks. Irrespective of the learning algorithm itself, the final classifier has a weight to multiply by each feature. This suggests that ideally each input feature should be linearly correlated with the target variable (or anti-correlated), whereas raw features may be highly non-linear. In this paper, we attempt to re-shape each input feature so that it is appropriate to use with a linear weight and to scale the different features in proportion to their predictive value. We demonstrate that this pre-processing is beneficial for linear SVM classifiers on a large benchmark of text classification tasks as well as UCI datasets.},
added-at = {2010-01-14T17:55:54.000+0100},
address = {New York, NY, USA},
author = {Forman, George and Scholz, Martin and Rajaram, Shyamsundar},
biburl = {https://www.bibsonomy.org/bibtex/26eab469522a9f90fe40f3837b7a9cfd3/lee_peck},
booktitle = {KDD '09: Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining},
description = {Feature shaping for linear SVM classifiers},
doi = {http://doi.acm.org/10.1145/1557019.1557057},
interhash = {6be034bd1eca68dcfe9aef7e1be479b5},
intrahash = {6eab469522a9f90fe40f3837b7a9cfd3},
isbn = {978-1-60558-495-9},
keywords = {09 Forman SVM classification feature selection shaping text},
location = {Paris, France},
pages = {299--308},
publisher = {ACM},
timestamp = {2010-01-14T17:55:54.000+0100},
title = {Feature shaping for linear SVM classifiers},
url = {http://portal.acm.org/citation.cfm?id=1557057&dl=ACM},
year = 2009
}