We study an idealised sequential resource allocation problem. In each time step the learner
chooses an allocation of several resource types between a number of tasks. Assigning more
resources to a task increases the probability that it is completed. The problem is challenging
because the alignment of the tasks to the resource types is unknown and the feedback is noisy.
Our main contribution is the new setting and an algorithm with nearly-optimal
regret analysis. Along the way we draw connections to the problem of minimising regret
for stochastic linear bandits with heteroscedastic noise. We also present some new results for stochastic linear
bandits on the hypercube that significantly improve on existing work, especially in the sparse case.
%0 Conference Paper
%1 LaCrSze15
%A Lattimore, T.
%A Crammer, K.
%A Szepesvári, Cs.
%B NIPS
%D 2015
%K allocation, bandits bandits, information, learning, linear monitoring, online partial resource stochastic theory,
%P 964--972
%T Linear Multi-Resource Allocation with Semi-Bandit Feedback
%X We study an idealised sequential resource allocation problem. In each time step the learner
chooses an allocation of several resource types between a number of tasks. Assigning more
resources to a task increases the probability that it is completed. The problem is challenging
because the alignment of the tasks to the resource types is unknown and the feedback is noisy.
Our main contribution is the new setting and an algorithm with nearly-optimal
regret analysis. Along the way we draw connections to the problem of minimising regret
for stochastic linear bandits with heteroscedastic noise. We also present some new results for stochastic linear
bandits on the hypercube that significantly improve on existing work, especially in the sparse case.
@inproceedings{LaCrSze15,
abstract = {We study an idealised sequential resource allocation problem. In each time step the learner
chooses an allocation of several resource types between a number of tasks. Assigning more
resources to a task increases the probability that it is completed. The problem is challenging
because the alignment of the tasks to the resource types is unknown and the feedback is noisy.
Our main contribution is the new setting and an algorithm with nearly-optimal
regret analysis. Along the way we draw connections to the problem of minimising regret
for stochastic linear bandits with heteroscedastic noise. We also present some new results for stochastic linear
bandits on the hypercube that significantly improve on existing work, especially in the sparse case.
},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Lattimore, T. and Crammer, K. and Szepesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/23c296df603b0d160d92a934c4d2d786c/csaba},
booktitle = {NIPS},
date-added = {2015-12-02 01:31:55 +0000},
date-modified = {2016-08-01 03:14:20 +0000},
interhash = {92f8d298a6ab4513a62caba907d720c8},
intrahash = {3c296df603b0d160d92a934c4d2d786c},
keywords = {allocation, bandits bandits, information, learning, linear monitoring, online partial resource stochastic theory,},
month = {September},
pages = {964--972},
pdf = {papers/NIPS15-mr-bandit.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Linear Multi-Resource Allocation with Semi-Bandit Feedback},
year = 2015
}