We study a novel multi-armed bandit problem that models the
challenge faced by a company wishing to explore new strategies to
maximize revenue whilst simultaneously maintaining their revenue
above a fixed baseline, uniformly over time. While previous work
addressed the problem under the weaker requirement of maintaining
the revenue constraint only at a given fixed time in the future, the
algorithms previously proposed are unsuitable due to their design
under the more stringent constraints. We consider both the
stochastic and the adversarial settings, where we propose, natural,
yet novel strategies and analyze the price for maintaining the
constraints. Amongst other things, we prove both high probability
and expectation bounds on the regret, while we also consider both
the problem of maintaining the constraints with high probability or
expectation. For the adversarial setting the price of maintaining
the constraint appears to be higher, at least for the algorithm
considered. A lower bound is given showing that the algorithm for
the stochastic setting is almost optimal. Empirical results
obtained in synthetic environments complement our theoretical
findings.
%0 Conference Paper
%1 SWLSz16:Conservative
%A Shariff, R.
%A Wu, Y.
%A Lattimore, T.
%A Szepesvári, Cs.
%B ICML
%D 2016
%K bandits, constrained finite-armed learning learning, online stochastic theory,
%P 1254--1262
%T Conservative Bandits
%X We study a novel multi-armed bandit problem that models the
challenge faced by a company wishing to explore new strategies to
maximize revenue whilst simultaneously maintaining their revenue
above a fixed baseline, uniformly over time. While previous work
addressed the problem under the weaker requirement of maintaining
the revenue constraint only at a given fixed time in the future, the
algorithms previously proposed are unsuitable due to their design
under the more stringent constraints. We consider both the
stochastic and the adversarial settings, where we propose, natural,
yet novel strategies and analyze the price for maintaining the
constraints. Amongst other things, we prove both high probability
and expectation bounds on the regret, while we also consider both
the problem of maintaining the constraints with high probability or
expectation. For the adversarial setting the price of maintaining
the constraint appears to be higher, at least for the algorithm
considered. A lower bound is given showing that the algorithm for
the stochastic setting is almost optimal. Empirical results
obtained in synthetic environments complement our theoretical
findings.
@inproceedings{SWLSz16:Conservative,
abstract = { We study a novel multi-armed bandit problem that models the
challenge faced by a company wishing to explore new strategies to
maximize revenue whilst simultaneously maintaining their revenue
above a fixed baseline, uniformly over time. While previous work
addressed the problem under the weaker requirement of maintaining
the revenue constraint only at a given fixed time in the future, the
algorithms previously proposed are unsuitable due to their design
under the more stringent constraints. We consider both the
stochastic and the adversarial settings, where we propose, natural,
yet novel strategies and analyze the price for maintaining the
constraints. Amongst other things, we prove both high probability
and expectation bounds on the regret, while we also consider both
the problem of maintaining the constraints with high probability or
expectation. For the adversarial setting the price of maintaining
the constraint appears to be higher, at least for the algorithm
considered. A lower bound is given showing that the algorithm for
the stochastic setting is almost optimal. Empirical results
obtained in synthetic environments complement our theoretical
findings.},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Shariff, R. and Wu, Y. and Lattimore, T. and {Sz}epesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/2b8fbf11a7a656b613474dfc8de7c087e/csaba},
booktitle = {ICML},
date-added = {2016-05-09 08:37:59 +0000},
date-modified = {2016-07-29 14:18:10 +0000},
interhash = {2ee9bb1a303c87f107e2ff35b224f623},
intrahash = {b8fbf11a7a656b613474dfc8de7c087e},
keywords = {bandits, constrained finite-armed learning learning, online stochastic theory,},
pages = {1254--1262},
pdf = {papers/ICML16-cbandit.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Conservative Bandits},
year = 2016
}