In a partial monitoring game, the learner repeatedly chooses an action, the
environment responds with an outcome, and then the learner suffers a loss and
receives a feedback signal, both of which are fixed functions of the action and
the outcome. The goal of the learner is to minimize his regret, which is the
difference between his total cumulative loss and the total loss of the best
fixed action in hindsight.
Assuming that the outcomes are generated in an i.i.d. fashion from an arbitrary and
unknown probability distribution, we characterize the minimax regret of any
partial monitoring game with finitely many actions and
outcomes. It turns out that the minimax regret of any such game is either zero,
Theta(T^1/2), Theta(T^2/3), or Theta(T). We provide a computationally efficient learning
algorithm that achieves the minimax regret within logarithmic factor for any game.
%0 Conference Paper
%1 BaPaSze11
%A Bartók, G.
%A Pál, D.
%A Szepesvári, Cs.
%B COLT
%D 2011
%K game information, learning, online partial theory theory,
%P 133--154
%T Minimax Regret of Finite Partial-Monitoring Games in Stochastic Environments
%X In a partial monitoring game, the learner repeatedly chooses an action, the
environment responds with an outcome, and then the learner suffers a loss and
receives a feedback signal, both of which are fixed functions of the action and
the outcome. The goal of the learner is to minimize his regret, which is the
difference between his total cumulative loss and the total loss of the best
fixed action in hindsight.
Assuming that the outcomes are generated in an i.i.d. fashion from an arbitrary and
unknown probability distribution, we characterize the minimax regret of any
partial monitoring game with finitely many actions and
outcomes. It turns out that the minimax regret of any such game is either zero,
Theta(T^1/2), Theta(T^2/3), or Theta(T). We provide a computationally efficient learning
algorithm that achieves the minimax regret within logarithmic factor for any game.
@inproceedings{BaPaSze11,
abstract = {In a partial monitoring game, the learner repeatedly chooses an action, the
environment responds with an outcome, and then the learner suffers a loss and
receives a feedback signal, both of which are fixed functions of the action and
the outcome. The goal of the learner is to minimize his regret, which is the
difference between his total cumulative loss and the total loss of the best
fixed action in hindsight.
Assuming that the outcomes are generated in an i.i.d. fashion from an arbitrary and
unknown probability distribution, we characterize the minimax regret of any
partial monitoring game with finitely many actions and
outcomes. It turns out that the minimax regret of any such game is either zero,
Theta(T^{1/2}), Theta(T^{2/3}), or Theta(T). We provide a computationally efficient learning
algorithm that achieves the minimax regret within logarithmic factor for any game.},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Bart{\'o}k, G. and P{\'a}l, D. and Szepesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/2f0111519df5aac01eb83974da5d0a97f/csaba},
booktitle = {COLT},
date-added = {2011-07-03 20:57:58 -0600},
date-modified = {2012-06-03 14:10:11 -0600},
interhash = {f54107952268dbcb2ed86a6ca0060dca},
intrahash = {f0111519df5aac01eb83974da5d0a97f},
keywords = {game information, learning, online partial theory theory,},
month = {July},
pages = {133--154},
pdf = {papers/partmon_colt_final.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Minimax Regret of Finite Partial-Monitoring Games in Stochastic Environments},
year = 2011
}