Reinforcement learning is the problem of generating optimal behavior in a sequential decision-making environment given the opportunity of interacting with it. Many algorithms for solving reinforcement-learning problems work by computing improved estimates of the optimal value function. We extend prior analyses of reinforcement-learning algorithms and present a powerful new theorem that can provide a unified analysis of value-function-based reinforcement-learning algorithms. The usefulness of the theorem lies in how it allows the asynchronous convergence of a complex reinforcement-learning algorithm to be proven by verifying that a simpler synchronous algorithm converges. We illustrate the application of the theorem by analyzing the convergence of Q-learning, model-based reinforcement learning, Q-learning with multi-state updates, Q-learning for Markov games, and risk-sensitive reinforcement learning.
%0 Journal Article
%1 szepesvari1999
%A Szepesvári, Cs.
%A Littman, M.L.
%D 1999
%J Neural Computation
%K MDPs, approximation asymptotic convergence, finite learning, reinforcement stochastic theory,
%P 2017--2059
%T A Unified Analysis of Value-Function-Based Reinforcement-Learning Algorithms
%V 11
%X Reinforcement learning is the problem of generating optimal behavior in a sequential decision-making environment given the opportunity of interacting with it. Many algorithms for solving reinforcement-learning problems work by computing improved estimates of the optimal value function. We extend prior analyses of reinforcement-learning algorithms and present a powerful new theorem that can provide a unified analysis of value-function-based reinforcement-learning algorithms. The usefulness of the theorem lies in how it allows the asynchronous convergence of a complex reinforcement-learning algorithm to be proven by verifying that a simpler synchronous algorithm converges. We illustrate the application of the theorem by analyzing the convergence of Q-learning, model-based reinforcement learning, Q-learning with multi-state updates, Q-learning for Markov games, and risk-sensitive reinforcement learning.
@article{szepesvari1999,
abstract = {Reinforcement learning is the problem of generating optimal behavior in a sequential decision-making environment given the opportunity of interacting with it. Many algorithms for solving reinforcement-learning problems work by computing improved estimates of the optimal value function. We extend prior analyses of reinforcement-learning algorithms and present a powerful new theorem that can provide a unified analysis of value-function-based reinforcement-learning algorithms. The usefulness of the theorem lies in how it allows the asynchronous convergence of a complex reinforcement-learning algorithm to be proven by verifying that a simpler synchronous algorithm converges. We illustrate the application of the theorem by analyzing the convergence of Q-learning, model-based reinforcement learning, Q-learning with multi-state updates, Q-learning for Markov games, and risk-sensitive reinforcement learning.},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Szepesv{\'a}ri, {Cs}. and Littman, M.L.},
biburl = {https://www.bibsonomy.org/bibtex/273e33ac308b95b62ef7dfbca219dcb14/csaba},
date-added = {2010-08-28 17:38:14 -0600},
date-modified = {2010-09-02 13:09:15 -0600},
interhash = {a6ce0c6d007b4f64c2be73fc41fe4f90},
intrahash = {73e33ac308b95b62ef7dfbca219dcb14},
journal = {Neural Computation},
keywords = {MDPs, approximation asymptotic convergence, finite learning, reinforcement stochastic theory,},
pages = {2017--2059},
pdf = {papers/nc-97-gmdp.ps.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {A Unified Analysis of Value-Function-Based Reinforcement-Learning Algorithms},
volume = 11,
year = 1999
}