Q-learning (Watkins, 1989) is a simple way for agents to learn how to act optimally in controlled Markovian domains. It amounts to an incremental method for dynamic programming which imposes limited computational demands. It works by successively improving its evaluations of the quality of particular actions at particular states.
Пожалуйста, войдите в систему, чтобы принять участие в дискуссии (добавить собственные рецензию, или комментарий)
Цитировать эту публикацию
%0 Journal Article
%1 Watkins1992
%A Watkins, Christopher J. C. H.
%A Dayan, Peter
%D 1992
%J Machine Learning
%K DRLAlgoComparison q-learning reinforcement_learning
%N 3
%P 279--292
%R 10.1007/BF00992698
%T Q-learning
%U https://doi.org/10.1007/BF00992698
%V 8
%X Q-learning (Watkins, 1989) is a simple way for agents to learn how to act optimally in controlled Markovian domains. It amounts to an incremental method for dynamic programming which imposes limited computational demands. It works by successively improving its evaluations of the quality of particular actions at particular states.
@article{Watkins1992,
abstract = {Q-learning (Watkins, 1989) is a simple way for agents to learn how to act optimally in controlled Markovian domains. It amounts to an incremental method for dynamic programming which imposes limited computational demands. It works by successively improving its evaluations of the quality of particular actions at particular states.},
added-at = {2020-01-01T20:16:30.000+0100},
author = {Watkins, Christopher J. C. H. and Dayan, Peter},
biburl = {https://www.bibsonomy.org/bibtex/2416ac9f845c6ccea5a7eacee4dedead8/lanteunis},
day = 01,
doi = {10.1007/BF00992698},
interhash = {a4436f9e14335d677f156049cb798253},
intrahash = {416ac9f845c6ccea5a7eacee4dedead8},
issn = {1573-0565},
journal = {Machine Learning},
keywords = {DRLAlgoComparison q-learning reinforcement_learning},
month = may,
number = 3,
pages = {279--292},
timestamp = {2020-01-01T20:16:30.000+0100},
title = {Q-learning},
url = {https://doi.org/10.1007/BF00992698},
volume = 8,
year = 1992
}