| Authors: |
Gianna M. Del Corso
and Antonio Gull\'{\i}
and Francesco Romani
|
| URL: |
http://www2005.org/cdrom/docs/p97.pdf |
| Tags: |
News
|
| Abstract: |
According to a recent survey made by Nielsen NetRatings, searching
on news articles is one of the most important activity online. Indeed,
Google, Yahoo, MSN and many others have proposed commercial search
engines for indexing news feeds. Despite this commercial interest,
no academic research has focused on ranking a stream of news articles
and a set of news sources. In this paper, we introduce this problem
by proposing a ranking framework which models: (1) the process of
generation of a stream of news articles, (2) the news articles clustering
by topics, and (3) the evolution of news story over the time. The
ranking algorithm proposed ranks news information, finding the most
authoritative news sources and identifying the most interesting events
in the different categories to which news article belongs. All these
ranking measures take in account the time and can be obtained without
a predefined sliding window of observation over the stream. The complexity
of our algorithm is linear in the number of pieces of news still
under consideration at the time of a new posting. This allow a continuous
on-line process of ranking. Our ranking framework is validated on
a collection of more than 300,000 pieces of news, produced in two
months by more then 2000 news sources belonging to 13 different categories
(World, U.S, Europe, Sports, Business, etc). This collection is extracted
from the index of comeToMyHead, an academic news search engine available
online. |
@inproceedings{Corso2005,
title = {Ranking a stream of news},
author = {Gianna M. Del Corso and Antonio Gull\'{\i} and Francesco Romani},
booktitle = {International Conference on World Wide Web},
pages = {97--106},
url = {http://www2005.org/cdrom/docs/p97.pdf},
year = {2005},
abstract = {According to a recent survey made by Nielsen NetRatings, searching
on news articles is one of the most important activity online. Indeed,
Google, Yahoo, MSN and many others have proposed commercial search
engines for indexing news feeds. Despite this commercial interest,
no academic research has focused on ranking a stream of news articles
and a set of news sources. In this paper, we introduce this problem
by proposing a ranking framework which models: (1) the process of
generation of a stream of news articles, (2) the news articles clustering
by topics, and (3) the evolution of news story over the time. The
ranking algorithm proposed ranks news information, finding the most
authoritative news sources and identifying the most interesting events
in the different categories to which news article belongs. All these
ranking measures take in account the time and can be obtained without
a predefined sliding window of observation over the stream. The complexity
of our algorithm is linear in the number of pieces of news still
under consideration at the time of a new posting. This allow a continuous
on-line process of ranking. Our ranking framework is validated on
a collection of more than 300,000 pieces of news, produced in two
months by more then 2000 news sources belonging to 13 different categories
(World, U.S, Europe, Sports, Business, etc). This collection is extracted
from the index of comeToMyHead, an academic news search engine available
online.},
keywords = {News }
}