We study a sequential resource allocation problem involving a fixed number of recurring jobs.
At each time-step the manager should distribute available resources among the jobs in order to maximise the expected number of completed jobs.
Allocating more resources to a given job increases the probability
that it completes, but with a cut-off.
Specifically, we assume a linear
model where the probability increases linearly until it equals one, after which
allocating additional resources is wasteful.
We assume the difficulty of each job is unknown
and present the first algorithm for this problem and prove upper and lower bounds on its regret.
Despite its apparent simplicity, the problem has a rich structure:
we show that an appropriate optimistic algorithm can improve its learning speed dramatically
beyond the results one normally expects for similar problems
as the problem becomes resource-laden.
%0 Conference Paper
%1 LaCrSze14
%A Lattimore, T.
%A Crammer, K.
%A Szepesvári, Cs.
%B UAI
%D 2014
%K allocation bandits, information, learning, monitoring, online partial resource stochastic theory,
%P 477--486
%T Optimal Resource Allocation with Semi-Bandit Feedback
%X We study a sequential resource allocation problem involving a fixed number of recurring jobs.
At each time-step the manager should distribute available resources among the jobs in order to maximise the expected number of completed jobs.
Allocating more resources to a given job increases the probability
that it completes, but with a cut-off.
Specifically, we assume a linear
model where the probability increases linearly until it equals one, after which
allocating additional resources is wasteful.
We assume the difficulty of each job is unknown
and present the first algorithm for this problem and prove upper and lower bounds on its regret.
Despite its apparent simplicity, the problem has a rich structure:
we show that an appropriate optimistic algorithm can improve its learning speed dramatically
beyond the results one normally expects for similar problems
as the problem becomes resource-laden.
@inproceedings{LaCrSze14,
abstract = {We study a sequential resource allocation problem involving a fixed number of recurring jobs.
At each time-step the manager should distribute available resources among the jobs in order to maximise the expected number of completed jobs.
Allocating more resources to a given job increases the probability
that it completes, but with a cut-off.
Specifically, we assume a linear
model where the probability increases linearly until it equals one, after which
allocating additional resources is wasteful.
We assume the difficulty of each job is unknown
and present the first algorithm for this problem and prove upper and lower bounds on its regret.
Despite its apparent simplicity, the problem has a rich structure:
we show that an appropriate optimistic algorithm can improve its learning speed dramatically
beyond the results one normally expects for similar problems
as the problem becomes resource-laden.},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Lattimore, T. and Crammer, K. and {Sz}epesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/253c10cf755a349501406fb36d8cd4fa5/csaba},
booktitle = {UAI},
date-added = {2014-07-17 20:16:41 -0700},
date-modified = {2015-12-02 01:33:33 +0000},
interhash = {f767a9026ebfe14cbdded70903232a25},
intrahash = {53c10cf755a349501406fb36d8cd4fa5},
keywords = {allocation bandits, information, learning, monitoring, online partial resource stochastic theory,},
pages = {477--486},
pdf = {papers/lcs14mem-alloc.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Optimal Resource Allocation with Semi-Bandit Feedback},
year = 2014
}