The k-Nearest Neighbor (kNN) classification approach is conceptually simple - yet widely applied since it often performs well in practical applications. However, using a global constant k does not always provide an optimal solution, e. g., for datasets with an irregular density distribution of data points. This paper proposes an adaptive kNN classifier where k is chosen dynamically for each instance (point) to be classified, such that the expected accuracy of classification is maximized. We define the expected accuracy as the accuracy of a set of structurally similar observations. An arbitrary similarity function can be used to find these observations. We introduce and evaluate different similarity functions. For the evaluation, we use five different classification tasks based on geo-spatial data. Each classification task consists of (tens of) thousands of items. We demonstrate, that the presented expected accuracy measures can be a good estimator for kNN performance, and the proposed adaptive kNN classifier outperforms common kNN and previously introduced adaptive kNN algorithms. Also, we show that the range of considered k can be significantly reduced to speed up the algorithm without negative influence on classification accuracy.
Описание
impactfactor = {0.995},
impactfactor-year = 2019,
impactfactor-source = {https://www.scimagojr.com/journalsearch.php?q=89358&tip=sid&clean=0},
impactfactor-notes = {See "Citations per Document"; The two years line is equivalent to journal impact factor (Thomson Reuters) metric.}
%0 Conference Paper
%1 kibanov2018adaptive
%A Kibanov, Mark
%A Becker, Martin
%A Mueller, Juergen
%A Atzmueller, Martin
%A Hotho, Andreas
%A Stumme, Gerd
%B Proceedings of the 33rd Annual ACM Symposium on Applied Computing
%C New York, NY, USA
%D 2018
%I ACM
%K accuracy adaptive eva everyaware expected geo kassel knn myown spatial
%P 857--865
%R 10.1145/3167132.3167226
%T Adaptive kNN Using Expected Accuracy for Classification of Geo-spatial Data
%U http://doi.acm.org/10.1145/3167132.3167226
%X The k-Nearest Neighbor (kNN) classification approach is conceptually simple - yet widely applied since it often performs well in practical applications. However, using a global constant k does not always provide an optimal solution, e. g., for datasets with an irregular density distribution of data points. This paper proposes an adaptive kNN classifier where k is chosen dynamically for each instance (point) to be classified, such that the expected accuracy of classification is maximized. We define the expected accuracy as the accuracy of a set of structurally similar observations. An arbitrary similarity function can be used to find these observations. We introduce and evaluate different similarity functions. For the evaluation, we use five different classification tasks based on geo-spatial data. Each classification task consists of (tens of) thousands of items. We demonstrate, that the presented expected accuracy measures can be a good estimator for kNN performance, and the proposed adaptive kNN classifier outperforms common kNN and previously introduced adaptive kNN algorithms. Also, we show that the range of considered k can be significantly reduced to speed up the algorithm without negative influence on classification accuracy.
%@ 978-1-4503-5191-1
@inproceedings{kibanov2018adaptive,
abstract = {The k-Nearest Neighbor (kNN) classification approach is conceptually simple - yet widely applied since it often performs well in practical applications. However, using a global constant k does not always provide an optimal solution, e. g., for datasets with an irregular density distribution of data points. This paper proposes an adaptive kNN classifier where k is chosen dynamically for each instance (point) to be classified, such that the expected accuracy of classification is maximized. We define the expected accuracy as the accuracy of a set of structurally similar observations. An arbitrary similarity function can be used to find these observations. We introduce and evaluate different similarity functions. For the evaluation, we use five different classification tasks based on geo-spatial data. Each classification task consists of (tens of) thousands of items. We demonstrate, that the presented expected accuracy measures can be a good estimator for kNN performance, and the proposed adaptive kNN classifier outperforms common kNN and previously introduced adaptive kNN algorithms. Also, we show that the range of considered k can be significantly reduced to speed up the algorithm without negative influence on classification accuracy.},
acmid = {3167226},
added-at = {2018-08-09T21:41:25.000+0200},
address = {New York, NY, USA},
author = {Kibanov, Mark and Becker, Martin and Mueller, Juergen and Atzmueller, Martin and Hotho, Andreas and Stumme, Gerd},
biburl = {https://www.bibsonomy.org/bibtex/278823ec8f2c976d28ee85a3a8c72b8e5/becker},
booktitle = {Proceedings of the 33rd Annual ACM Symposium on Applied Computing},
description = {impactfactor = {0.995},
impactfactor-year = 2019,
impactfactor-source = {https://www.scimagojr.com/journalsearch.php?q=89358&tip=sid&clean=0},
impactfactor-notes = {See "Citations per Document"; The two years line is equivalent to journal impact factor (Thomson Reuters) metric.}},
doi = {10.1145/3167132.3167226},
interhash = {fc6e457c689c1fe36a1b6d037b495d80},
intrahash = {78823ec8f2c976d28ee85a3a8c72b8e5},
isbn = {978-1-4503-5191-1},
keywords = {accuracy adaptive eva everyaware expected geo kassel knn myown spatial},
location = {Pau, France},
numpages = {9},
pages = {857--865},
publisher = {ACM},
series = {SAC '18},
timestamp = {2022-02-22T00:01:32.000+0100},
title = {Adaptive kNN Using Expected Accuracy for Classification of Geo-spatial Data},
url = {http://doi.acm.org/10.1145/3167132.3167226},
year = 2018
}