The idea of temporal abstraction, i.e. learning, planning and representing the world at multiple time scales, has been a constant thread in AI research, spanning sub-fields from classical planning and search to control and reinforcement learning. For example, programming a robot typically involves making decisions over a set of controllers, rather than working at the level of motor torques. While temporal abstraction is a very natural concept, learning such abstractions with no human input has proved quite daunting. In this paper, we present a general architecture, called option-critic, which allows learning temporal abstractions automatically, end-to-end, simply from the agent's experience. This approach allows continual learning and provides interesting qualitative and quantitative results in several tasks.
%0 Journal Article
%1 BaconPrecup18aimag
%A Bacon, Pierre-Luc
%A Precup, Doina
%D 2018
%J AI Magazine
%K 2018 paper reinforcement-learning temporal
%N 1
%P 39--50
%R 10.1609/aimag.v39i1.2780
%T Constructing Temporal Abstractions Autonomously in Reinforcement Learning
%V 39
%X The idea of temporal abstraction, i.e. learning, planning and representing the world at multiple time scales, has been a constant thread in AI research, spanning sub-fields from classical planning and search to control and reinforcement learning. For example, programming a robot typically involves making decisions over a set of controllers, rather than working at the level of motor torques. While temporal abstraction is a very natural concept, learning such abstractions with no human input has proved quite daunting. In this paper, we present a general architecture, called option-critic, which allows learning temporal abstractions automatically, end-to-end, simply from the agent's experience. This approach allows continual learning and provides interesting qualitative and quantitative results in several tasks.
@article{BaconPrecup18aimag,
abstract = {The idea of temporal abstraction, i.e. learning, planning and representing the world at multiple time scales, has been a constant thread in AI research, spanning sub-fields from classical planning and search to control and reinforcement learning. For example, programming a robot typically involves making decisions over a set of controllers, rather than working at the level of motor torques. While temporal abstraction is a very natural concept, learning such abstractions with no human input has proved quite daunting. In this paper, we present a general architecture, called option-critic, which allows learning temporal abstractions automatically, end-to-end, simply from the agent's experience. This approach allows continual learning and provides interesting qualitative and quantitative results in several tasks.},
added-at = {2018-04-17T17:14:06.000+0200},
author = {Bacon, Pierre-Luc and Precup, Doina},
biburl = {https://www.bibsonomy.org/bibtex/2aba2e954e7e24b94f9255cd3a9ddf722/achakraborty},
doi = {10.1609/aimag.v39i1.2780},
file = {AAAI online:2018/BaconPrecup18aimag.pdf:PDF},
groups = {public},
interhash = {0a86efdccd29a900719f07ddae15148f},
intrahash = {aba2e954e7e24b94f9255cd3a9ddf722},
issn = {0738-4602},
journal = {AI Magazine},
keywords = {2018 paper reinforcement-learning temporal},
month = {#mar#},
number = 1,
pages = {39--50},
timestamp = {2018-04-17T17:14:06.000+0200},
title = {Constructing Temporal Abstractions Autonomously in Reinforcement Learning},
username = {flint63},
volume = 39,
year = 2018
}