Detecting clusters or communities in large real-world graphs such as large
social or information networks is a problem of considerable interest. In
practice, one typically chooses an objective function that captures the
intuition of a network cluster as set of nodes with better internal
connectivity than external connectivity, and then one applies approximation
algorithms or heuristics to extract sets of nodes that are related to the
objective function and that "look like" good communities for the application of
interest. In this paper, we explore a range of network community detection
methods in order to compare them and to understand their relative performance
and the systematic biases in the clusters they identify. We evaluate several
common objective functions that are used to formalize the notion of a network
community, and we examine several different classes of approximation algorithms
that aim to optimize such objective functions. In addition, rather than simply
fixing an objective and asking for an approximation to the best cluster of any
size, we consider a size-resolved version of the optimization problem.
Considering community quality as a function of its size provides a much finer
lens with which to examine community detection algorithms, since objective
functions and approximation algorithms often have non-obvious size-dependent
behavior.
Description
Empirical Comparison of Algorithms for Network Community Detection
%0 Generic
%1 Leskovec2010
%A Leskovec, Jure
%A Lang, Kevin J.
%A Mahoney, Michael W.
%D 2010
%K clustering community comparision empirical evaluation graph toread
%T Empirical Comparison of Algorithms for Network Community Detection
%U http://arxiv.org/abs/1004.3539
%X Detecting clusters or communities in large real-world graphs such as large
social or information networks is a problem of considerable interest. In
practice, one typically chooses an objective function that captures the
intuition of a network cluster as set of nodes with better internal
connectivity than external connectivity, and then one applies approximation
algorithms or heuristics to extract sets of nodes that are related to the
objective function and that "look like" good communities for the application of
interest. In this paper, we explore a range of network community detection
methods in order to compare them and to understand their relative performance
and the systematic biases in the clusters they identify. We evaluate several
common objective functions that are used to formalize the notion of a network
community, and we examine several different classes of approximation algorithms
that aim to optimize such objective functions. In addition, rather than simply
fixing an objective and asking for an approximation to the best cluster of any
size, we consider a size-resolved version of the optimization problem.
Considering community quality as a function of its size provides a much finer
lens with which to examine community detection algorithms, since objective
functions and approximation algorithms often have non-obvious size-dependent
behavior.
@misc{Leskovec2010,
abstract = { Detecting clusters or communities in large real-world graphs such as large
social or information networks is a problem of considerable interest. In
practice, one typically chooses an objective function that captures the
intuition of a network cluster as set of nodes with better internal
connectivity than external connectivity, and then one applies approximation
algorithms or heuristics to extract sets of nodes that are related to the
objective function and that "look like" good communities for the application of
interest. In this paper, we explore a range of network community detection
methods in order to compare them and to understand their relative performance
and the systematic biases in the clusters they identify. We evaluate several
common objective functions that are used to formalize the notion of a network
community, and we examine several different classes of approximation algorithms
that aim to optimize such objective functions. In addition, rather than simply
fixing an objective and asking for an approximation to the best cluster of any
size, we consider a size-resolved version of the optimization problem.
Considering community quality as a function of its size provides a much finer
lens with which to examine community detection algorithms, since objective
functions and approximation algorithms often have non-obvious size-dependent
behavior.
},
added-at = {2010-04-28T20:43:59.000+0200},
author = {Leskovec, Jure and Lang, Kevin J. and Mahoney, Michael W.},
biburl = {https://www.bibsonomy.org/bibtex/2410a9cbea51ea5dd3c56aad26a0e11b2/hotho},
description = {Empirical Comparison of Algorithms for Network Community Detection},
interhash = {0e58de655596b2198f4a7001facd0c32},
intrahash = {410a9cbea51ea5dd3c56aad26a0e11b2},
keywords = {clustering community comparision empirical evaluation graph toread},
note = {cite arxiv:1004.3539
},
timestamp = {2010-04-28T20:43:59.000+0200},
title = {Empirical Comparison of Algorithms for Network Community Detection},
url = {http://arxiv.org/abs/1004.3539},
year = 2010
}