Spam, also known as Unsolicited Commercial Email (UCE), is the bane of email communication. Many data mining researchers have addressed the problem of detecting spam, generally by treating it as a static text classification problem. True <i>in vivo</i> spam filtering has characteristics that make it a rich and challenging domain for data mining. Indeed, real-world datasets with these characteristics are typically difficult to acquire and to share. This paper demonstrates some of these characteristics and argues that researchers should pursue <i>in vivo</i> spam filtering as an accessible domain for investigating them.
%0 Journal Article
%1 fawcett2003filtering
%A Fawcett, Tom
%C New York, NY, USA
%D 2003
%I ACM
%J SIGKDD Explor. Newsl.
%K challenges spam spam-detection survey
%P 140--148
%R 10.1145/980972.980990
%T "In vivo" spam filtering: a challenge problem for KDD
%U http://doi.acm.org/10.1145/980972.980990
%V 5
%X Spam, also known as Unsolicited Commercial Email (UCE), is the bane of email communication. Many data mining researchers have addressed the problem of detecting spam, generally by treating it as a static text classification problem. True <i>in vivo</i> spam filtering has characteristics that make it a rich and challenging domain for data mining. Indeed, real-world datasets with these characteristics are typically difficult to acquire and to share. This paper demonstrates some of these characteristics and argues that researchers should pursue <i>in vivo</i> spam filtering as an accessible domain for investigating them.
@article{fawcett2003filtering,
abstract = {Spam, also known as Unsolicited Commercial Email (UCE), is the bane of email communication. Many data mining researchers have addressed the problem of detecting spam, generally by treating it as a static text classification problem. True <i>in vivo</i> spam filtering has characteristics that make it a rich and challenging domain for data mining. Indeed, real-world datasets with these characteristics are typically difficult to acquire and to share. This paper demonstrates some of these characteristics and argues that researchers should pursue <i>in vivo</i> spam filtering as an accessible domain for investigating them.},
acmid = {980990},
added-at = {2012-02-13T23:20:51.000+0100},
address = {New York, NY, USA},
author = {Fawcett, Tom},
biburl = {https://www.bibsonomy.org/bibtex/2000eaa395ab34d01885bf70fbd9cd1ac/beate},
description = {"In vivo" spam filtering},
doi = {10.1145/980972.980990},
interhash = {c506a35462b2a4acece0416cc16f2139},
intrahash = {000eaa395ab34d01885bf70fbd9cd1ac},
issn = {1931-0145},
issue = {2},
journal = {SIGKDD Explor. Newsl.},
keywords = {challenges spam spam-detection survey},
month = {December},
numpages = {9},
pages = {140--148},
publisher = {ACM},
timestamp = {2012-02-13T23:20:51.000+0100},
title = {"In vivo" spam filtering: a challenge problem for KDD},
url = {http://doi.acm.org/10.1145/980972.980990},
volume = 5,
year = 2003
}