Publicly available labelled data sets are necessary for evaluating anomaly-based Intrusion Detection Systems (IDS). However, existing data sets are often not up-to-date or not yet published because of privacy concerns. This paper identifies requirements for good data sets and proposes an approach for their generation. The key idea is to use a test environment and emulate realistic user behaviour with parameterised scripts on the clients. Comprehensive logging mechanisms provide additional information which may be used for a better understanding of the inner dynamics of an IDS. Finally, the proposed approach is used to generate the flow-based CIDDS-002 data set.
%0 Journal Article
%1 ring2017creation
%A Ring, Markus
%A Wunderlich, Sarah
%A Grüdl, Dominik
%A Landes, Dieter
%A Hotho, Andreas
%D 2017
%J Journal of Information Warfare
%K mr
%N 4
%P 41-54
%T Creation of Flow-Based Data Sets for Intrusion Detection
%U https://www.jinfowar.com/journal/volume-16-issue-4/creation-flow-based-data-sets-intrusion-detection
%V 16
%X Publicly available labelled data sets are necessary for evaluating anomaly-based Intrusion Detection Systems (IDS). However, existing data sets are often not up-to-date or not yet published because of privacy concerns. This paper identifies requirements for good data sets and proposes an approach for their generation. The key idea is to use a test environment and emulate realistic user behaviour with parameterised scripts on the clients. Comprehensive logging mechanisms provide additional information which may be used for a better understanding of the inner dynamics of an IDS. Finally, the proposed approach is used to generate the flow-based CIDDS-002 data set.
@article{ring2017creation,
abstract = {Publicly available labelled data sets are necessary for evaluating anomaly-based Intrusion Detection Systems (IDS). However, existing data sets are often not up-to-date or not yet published because of privacy concerns. This paper identifies requirements for good data sets and proposes an approach for their generation. The key idea is to use a test environment and emulate realistic user behaviour with parameterised scripts on the clients. Comprehensive logging mechanisms provide additional information which may be used for a better understanding of the inner dynamics of an IDS. Finally, the proposed approach is used to generate the flow-based CIDDS-002 data set.},
added-at = {2018-03-14T12:31:01.000+0100},
author = {Ring, Markus and Wunderlich, Sarah and Grüdl, Dominik and Landes, Dieter and Hotho, Andreas},
biburl = {https://www.bibsonomy.org/bibtex/2d9d77561c5045bdc3c6d236f88d89a96/baywiss1},
interhash = {7f7e9343f15548875591aeb845367b8a},
intrahash = {d9d77561c5045bdc3c6d236f88d89a96},
issn = {ISSN 1445-3312},
journal = {Journal of Information Warfare},
keywords = {mr},
month = {Dez},
number = 4,
pages = {41-54},
timestamp = {2018-03-14T12:31:01.000+0100},
title = {Creation of Flow-Based Data Sets for Intrusion Detection},
url = {https://www.jinfowar.com/journal/volume-16-issue-4/creation-flow-based-data-sets-intrusion-detection},
volume = 16,
year = 2017
}