Machine learning applications often need large amounts of training data to perform well. Whereas unlabeled data can be easily gathered, the labeling process is difficult, time-consuming, or expensive in most applications. Active learning can help solve this problem by querying labels for those data points that will improve the performance the most. Thereby, the goal is that the learning algorithm performs sufficiently well with fewer labels. We provide a library called scikit-activeml that covers the most relevant query strategies and implements tools to work with partially labeled data. It is programmed in Python and builds on top of scikit-learn.
%0 Journal Article
%1 kottke2021scikit
%A Kottke, Daniel
%A Herde, Marek
%A Minh, Tuan Pham
%A Benz, Alexander
%A Mergard, Pascal
%A Roghman, Atal
%A Sandrock, Christoph
%A Sick, Bernhard
%D 2021
%J Preprints
%K imported itegpub isac-www
%P 2021030194
%T scikit-activeml: A Library and Toolbox for Active Learning Algorithms
%U https://www.preprints.org/manuscript/202103.0194/
%X Machine learning applications often need large amounts of training data to perform well. Whereas unlabeled data can be easily gathered, the labeling process is difficult, time-consuming, or expensive in most applications. Active learning can help solve this problem by querying labels for those data points that will improve the performance the most. Thereby, the goal is that the learning algorithm performs sufficiently well with fewer labels. We provide a library called scikit-activeml that covers the most relevant query strategies and implements tools to work with partially labeled data. It is programmed in Python and builds on top of scikit-learn.
@article{kottke2021scikit,
abstract = {Machine learning applications often need large amounts of training data to perform well. Whereas unlabeled data can be easily gathered, the labeling process is difficult, time-consuming, or expensive in most applications. Active learning can help solve this problem by querying labels for those data points that will improve the performance the most. Thereby, the goal is that the learning algorithm performs sufficiently well with fewer labels. We provide a library called scikit-activeml that covers the most relevant query strategies and implements tools to work with partially labeled data. It is programmed in Python and builds on top of scikit-learn.},
added-at = {2022-11-02T15:38:04.000+0100},
author = {Kottke, Daniel and Herde, Marek and Minh, Tuan Pham and Benz, Alexander and Mergard, Pascal and Roghman, Atal and Sandrock, Christoph and Sick, Bernhard},
biburl = {https://www.bibsonomy.org/bibtex/2a7db00caaf86181e6c70a90afc8cd20e/ies},
codeurl = {https://github.com/scikit-activeml/scikit-activeml},
eid = {2021030194},
eprint = {2021030194},
interhash = {6b1f79d3366ca8c51dc338385417a776},
intrahash = {a7db00caaf86181e6c70a90afc8cd20e},
journal = {Preprints},
keywords = {imported itegpub isac-www},
pages = 2021030194,
timestamp = {2022-11-02T15:38:04.000+0100},
title = {{scikit-activeml}: {A} Library and Toolbox for Active Learning Algorithms},
url = {https://www.preprints.org/manuscript/202103.0194/},
year = 2021
}