Y. Yang, J. Zhang, J. Carbonell, and C. Jin. KDD '02: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining, page 688--693. New York, NY, USA, ACM, (2002)
DOI: 10.1145/775047.775150
Abstract
Automated detection of the first document reporting each new event in temporally-sequenced streams of documents is an open challenge. In this paper we propose a new approach which addresses this problem in two stages: 1) using a supervised learning algorithm to classify the on-line document stream into pre-defined broad topic categories, and 2) performing topic-conditioned novelty detection for documents in each topic. We also focus on exploiting named-entities for event-level novelty detection and using feature-based heuristics derived from the topic histories. Evaluating these methods using a set of broadcast news stories, our results show substantial performance gains over the traditional one-level approach to the novelty detection problem.
%0 Conference Paper
%1 citeulike:1925072
%A Yang, Yiming
%A Zhang, Jian
%A Carbonell, Jaime
%A Jin, Chun
%B KDD '02: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining
%C New York, NY, USA
%D 2002
%I ACM
%K named-entity news novelty
%P 688--693
%R 10.1145/775047.775150
%T Topic-conditioned novelty detection
%U http://dx.doi.org/10.1145/775047.775150
%X Automated detection of the first document reporting each new event in temporally-sequenced streams of documents is an open challenge. In this paper we propose a new approach which addresses this problem in two stages: 1) using a supervised learning algorithm to classify the on-line document stream into pre-defined broad topic categories, and 2) performing topic-conditioned novelty detection for documents in each topic. We also focus on exploiting named-entities for event-level novelty detection and using feature-based heuristics derived from the topic histories. Evaluating these methods using a set of broadcast news stories, our results show substantial performance gains over the traditional one-level approach to the novelty detection problem.
%@ 1-58113-567-X
@inproceedings{citeulike:1925072,
abstract = {{Automated detection of the first document reporting each new event in temporally-sequenced streams of documents is an open challenge. In this paper we propose a new approach which addresses this problem in two stages: 1) using a supervised learning algorithm to classify the on-line document stream into pre-defined broad topic categories, and 2) performing topic-conditioned novelty detection for documents in each topic. We also focus on exploiting named-entities for event-level novelty detection and using feature-based heuristics derived from the topic histories. Evaluating these methods using a set of broadcast news stories, our results show substantial performance gains over the traditional one-level approach to the novelty detection problem.}},
added-at = {2018-03-19T12:24:51.000+0100},
address = {New York, NY, USA},
author = {Yang, Yiming and Zhang, Jian and Carbonell, Jaime and Jin, Chun},
biburl = {https://www.bibsonomy.org/bibtex/2d41ab2d084174b5d304b1f9fa4434e30/aho},
booktitle = {KDD '02: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining},
citeulike-article-id = {1925072},
citeulike-linkout-0 = {http://portal.acm.org/citation.cfm?id=775047.775150},
citeulike-linkout-1 = {http://dx.doi.org/10.1145/775047.775150},
doi = {10.1145/775047.775150},
interhash = {1f0f7b64ea4ec066e07f55dae1503375},
intrahash = {d41ab2d084174b5d304b1f9fa4434e30},
isbn = {1-58113-567-X},
keywords = {named-entity news novelty},
location = {Edmonton, Alberta, Canada},
pages = {688--693},
posted-at = {2008-09-18 20:03:22},
priority = {2},
publisher = {ACM},
timestamp = {2018-03-19T12:24:51.000+0100},
title = {{Topic-conditioned novelty detection}},
url = {http://dx.doi.org/10.1145/775047.775150},
year = 2002
}