We consider a setting where multiple players sequentially choose among a
common set of actions (arms). Motivated by a cognitive radio networks
application, we assume that players incur a loss upon colliding, and that
communication between players is not possible. Existing approaches assume that
the system is stationary. Yet this assumption is often violated in practice,
e.g., due to signal strength fluctuations. In this work, we design the first
Multi-player Bandit algorithm that provably works in arbitrarily changing
environments, where the losses of the arms may even be chosen by an adversary.
This resolves an open problem posed by Rosenski, Shamir, and Szlak (2016).
Description
[1902.08036] Multi-Player Bandits: The Adversarial Case
%0 Journal Article
%1 alatur2019multiplayer
%A Alatur, Pragnya
%A Levy, Kfir Y.
%A Krause, Andreas
%D 2019
%K bandits optimization
%T Multi-Player Bandits: The Adversarial Case
%U http://arxiv.org/abs/1902.08036
%X We consider a setting where multiple players sequentially choose among a
common set of actions (arms). Motivated by a cognitive radio networks
application, we assume that players incur a loss upon colliding, and that
communication between players is not possible. Existing approaches assume that
the system is stationary. Yet this assumption is often violated in practice,
e.g., due to signal strength fluctuations. In this work, we design the first
Multi-player Bandit algorithm that provably works in arbitrarily changing
environments, where the losses of the arms may even be chosen by an adversary.
This resolves an open problem posed by Rosenski, Shamir, and Szlak (2016).
@article{alatur2019multiplayer,
abstract = {We consider a setting where multiple players sequentially choose among a
common set of actions (arms). Motivated by a cognitive radio networks
application, we assume that players incur a loss upon colliding, and that
communication between players is not possible. Existing approaches assume that
the system is stationary. Yet this assumption is often violated in practice,
e.g., due to signal strength fluctuations. In this work, we design the first
Multi-player Bandit algorithm that provably works in arbitrarily changing
environments, where the losses of the arms may even be chosen by an adversary.
This resolves an open problem posed by Rosenski, Shamir, and Szlak (2016).},
added-at = {2019-06-02T22:20:31.000+0200},
author = {Alatur, Pragnya and Levy, Kfir Y. and Krause, Andreas},
biburl = {https://www.bibsonomy.org/bibtex/2c61263ba59c4f8b2000cddf499d36949/kirk86},
description = {[1902.08036] Multi-Player Bandits: The Adversarial Case},
interhash = {64b1657c2627566d65e488aee5f860aa},
intrahash = {c61263ba59c4f8b2000cddf499d36949},
keywords = {bandits optimization},
note = {cite arxiv:1902.08036},
timestamp = {2019-06-02T22:20:31.000+0200},
title = {Multi-Player Bandits: The Adversarial Case},
url = {http://arxiv.org/abs/1902.08036},
year = 2019
}