We consider planning in a Markovian decision problem, i.e., the problem of finding a good policy given access to a generative model of the environment. We propose to use fitted Q-iteration with penalized (or regularized) least-squares regression as the regression subroutine to address the problem of controlling model-complexity. The algorithm is presented in detail for the case when the function space is a reproducing kernel Hilbert space underlying a user-chosen kernel function. We derive bounds on the quality of the solution and argue that data-dependent penalties can lead to almost optimal performance. A simple example is used to illustrate the benefits of using a penalized procedure.
%0 Conference Paper
%1 farahmand2008
%A Farahmand, A.m.
%A Ghavamzadeh, M.
%A Szepesvári, Cs.
%A Mannor, S.
%B EWRL
%D 2008
%K approximation, function iteration learning, nonparametrics, planning, regularization, reinforcement theory, value
%P 55--68
%R 10.1007/978-3-540-89722-4_5
%T Regularized Fitted Q-Iteration: Application to Planning
%X We consider planning in a Markovian decision problem, i.e., the problem of finding a good policy given access to a generative model of the environment. We propose to use fitted Q-iteration with penalized (or regularized) least-squares regression as the regression subroutine to address the problem of controlling model-complexity. The algorithm is presented in detail for the case when the function space is a reproducing kernel Hilbert space underlying a user-chosen kernel function. We derive bounds on the quality of the solution and argue that data-dependent penalties can lead to almost optimal performance. A simple example is used to illustrate the benefits of using a penalized procedure.
@inproceedings{farahmand2008,
abstract = {We consider planning in a Markovian decision problem, i.e., the problem of finding a good policy given access to a generative model of the environment. We propose to use fitted Q-iteration with penalized (or regularized) least-squares regression as the regression subroutine to address the problem of controlling model-complexity. The algorithm is presented in detail for the case when the function space is a reproducing kernel Hilbert space underlying a user-chosen kernel function. We derive bounds on the quality of the solution and argue that data-dependent penalties can lead to almost optimal performance. A simple example is used to illustrate the benefits of using a penalized procedure.},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Farahmand, A.{m}. and Ghavamzadeh, M. and Szepesv{\'a}ri, {Cs}. and Mannor, S.},
bdsk-url-1 = {http://dx.doi.org/10.1007/978-3-540-89722-4_5},
bibsource = {DBLP, http://dblp.uni-trier.de},
biburl = {https://www.bibsonomy.org/bibtex/2b7bba33a321d034a630545eebec58402/csaba},
booktitle = {EWRL},
crossref = {EWRL2008},
date-added = {2010-08-28 17:38:14 -0600},
date-modified = {2010-11-25 00:52:54 -0700},
doi = {10.1007/978-3-540-89722-4_5},
interhash = {5a32f86cdd262fb77fbe8175617d6793},
intrahash = {b7bba33a321d034a630545eebec58402},
keywords = {approximation, function iteration learning, nonparametrics, planning, regularization, reinforcement theory, value},
pages = {55--68},
pdf = {papers/RegFQI-Plan-EWRL08.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Regularized Fitted {Q}-Iteration: Application to Planning},
year = 2008
}