We study the problem of regret minimization in partially observable linear
quadratic control systems when the model dynamics are unknown a priori. We
propose ExpCommit, an explore-then-commit algorithm that learns the model
Markov parameters and then follows the principle of optimism in the face of
uncertainty to design a controller. We propose a novel way to decompose the
regret and provide an end-to-end sublinear regret upper bound for partially
observable linear quadratic control. Finally, we provide stability guarantees
and establish a regret upper bound of $\mathcalO(T^2/3)$ for
ExpCommit, where $T$ is the time horizon of the problem.
Description
[2002.00082] Regret Minimization in Partially Observable Linear Quadratic Control
%0 Journal Article
%1 lale2020regret
%A Lale, Sahin
%A Azizzadenesheli, Kamyar
%A Hassibi, Babak
%A Anandkumar, Anima
%D 2020
%K bounds control optimization theory
%T Regret Minimization in Partially Observable Linear Quadratic Control
%U http://arxiv.org/abs/2002.00082
%X We study the problem of regret minimization in partially observable linear
quadratic control systems when the model dynamics are unknown a priori. We
propose ExpCommit, an explore-then-commit algorithm that learns the model
Markov parameters and then follows the principle of optimism in the face of
uncertainty to design a controller. We propose a novel way to decompose the
regret and provide an end-to-end sublinear regret upper bound for partially
observable linear quadratic control. Finally, we provide stability guarantees
and establish a regret upper bound of $\mathcalO(T^2/3)$ for
ExpCommit, where $T$ is the time horizon of the problem.
@article{lale2020regret,
abstract = {We study the problem of regret minimization in partially observable linear
quadratic control systems when the model dynamics are unknown a priori. We
propose ExpCommit, an explore-then-commit algorithm that learns the model
Markov parameters and then follows the principle of optimism in the face of
uncertainty to design a controller. We propose a novel way to decompose the
regret and provide an end-to-end sublinear regret upper bound for partially
observable linear quadratic control. Finally, we provide stability guarantees
and establish a regret upper bound of $\tilde{\mathcal{O}}(T^{2/3})$ for
ExpCommit, where $T$ is the time horizon of the problem.},
added-at = {2020-05-22T03:38:51.000+0200},
author = {Lale, Sahin and Azizzadenesheli, Kamyar and Hassibi, Babak and Anandkumar, Anima},
biburl = {https://www.bibsonomy.org/bibtex/2b73740f1eb9af8f5b8d105117ac53b30/kirk86},
description = {[2002.00082] Regret Minimization in Partially Observable Linear Quadratic Control},
interhash = {8bc2b6f85ced1b4a41d46b398ddb344c},
intrahash = {b73740f1eb9af8f5b8d105117ac53b30},
keywords = {bounds control optimization theory},
note = {cite arxiv:2002.00082},
timestamp = {2020-05-22T03:38:51.000+0200},
title = {Regret Minimization in Partially Observable Linear Quadratic Control},
url = {http://arxiv.org/abs/2002.00082},
year = 2020
}