@inproceedings{conf/bibe/DongK20, added-at = {2022-06-23T00:00:00.000+0200}, author = {Dong, Ngan Thi and Khosla, Megha}, biburl = {https://www.bibsonomy.org/bibtex/2af558096f0222673b06c75ccb7123be7/dblp}, booktitle = {BIBE}, crossref = {conf/bibe/2020}, ee = {https://doi.org/10.1109/BIBE50027.2020.00042}, interhash = {cde6de85e687a53f16f436b76f130036}, intrahash = {af558096f0222673b06c75ccb7123be7}, isbn = {978-1-7281-9574-2}, keywords = {dblp}, pages = {211-216}, publisher = {IEEE}, timestamp = {2024-04-09T12:49:20.000+0200}, title = {Revisiting Feature Selection with Data Complexity.}, url = {http://dblp.uni-trier.de/db/conf/bibe/bibe2020.html#DongK20}, year = 2020 } @inproceedings{9288146, abstract = {The identification of biomarkers or predictive features that are indicative of a specific biological or disease state is a major research topic in biomedical applications. Several feature selection (FS) methods ranging from simple univariate methods to recent deep-learning methods have been proposed to select a minimal set of the most predictive features. However, the main question of which method to use when remains unanswered. We study the above problem from the perspective of data complexity and ask if data complexity measures can be used to guide the selection of the most-suitable method. We perform a comparative study of 11 feature selection methods over 27 publicly available datasets evaluated over a range of the number of selected features using classification as the downstream task. We (empirically) show that as regard to classification, the performance of all studied feature selection methods is highly correlated with the error rate of a nearest-neighbor based classifier. We also argue about the non-suitability of studied complexity measures to determine the optimal number of relevant features. While looking closely at several other aspects, we provide recommendations for choosing a particular FS method for a given dataset.}, added-at = {2021-03-03T17:25:48.000+0100}, author = {{Dong}, N. T. and {Khosla}, M.}, biburl = {https://www.bibsonomy.org/bibtex/2b0c8388b8c6df126161228cde8853ac6/ngandong}, booktitle = {2020 IEEE 20th International Conference on Bioinformatics and Bioengineering (BIBE)}, description = {Revisiting Feature Selection with Data Complexity - IEEE Conference Publication}, doi = {10.1109/BIBE50027.2020.00042}, interhash = {cde6de85e687a53f16f436b76f130036}, intrahash = {b0c8388b8c6df126161228cde8853ac6}, issn = {2471-7819}, keywords = {2020 PRESENt myown}, month = oct, pages = {211-216}, publisher = {IEEE}, timestamp = {2021-03-03T17:25:48.000+0100}, title = {Revisiting Feature Selection with Data Complexity}, url = {https://ieeexplore.ieee.org/document/9288146}, year = 2020 } @inproceedings{dong2020, added-at = {2020-09-17T10:56:32.000+0200}, author = {Dong, Ngan Thi and Khosla, Megha}, biburl = {https://www.bibsonomy.org/bibtex/258c1637456986e9a83a82817895b4581/khosla}, booktitle = {Proceedings of the 20th IEEE International Conference on Bioinformatics and Bioengineering, BIBE 2020}, doi = {10.1109/BIBE50027.2020.00042}, interhash = {cde6de85e687a53f16f436b76f130036}, intrahash = {58c1637456986e9a83a82817895b4581}, keywords = {myown}, pages = {211-216}, publisher = {IEEE}, timestamp = {2021-03-04T14:35:07.000+0100}, title = {Revisiting Feature Selection with Data Complexity}, year = 2020 }