Y. Nishiyama, A. Boularias, A. Gretton, and K. Fukumizu. (2012)cite arxiv:1210.4887Comment: Appears in Proceedings of the Twenty-Eighth Conference on Uncertainty in Artificial Intelligence (UAI2012).
Abstract
A nonparametric approach for policy learning for POMDPs is proposed. The
approach represents distributions over the states, observations, and actions as
embeddings in feature spaces, which are reproducing kernel Hilbert spaces.
Distributions over states given the observations are obtained by applying the
kernel Bayes' rule to these distribution embeddings. Policies and value
functions are defined on the feature space over states, which leads to a
feature space expression for the Bellman equation. Value iteration may then be
used to estimate the optimal value function and associated policy. Experimental
results confirm that the correct policy is learned using the feature space
representation.
%0 Generic
%1 nishiyama2012hilbert
%A Nishiyama, Yu
%A Boularias, Abdeslam
%A Gretton, Arthur
%A Fukumizu, Kenji
%D 2012
%K kernel pomdp
%T Hilbert Space Embeddings of POMDPs
%U http://arxiv.org/abs/1210.4887
%X A nonparametric approach for policy learning for POMDPs is proposed. The
approach represents distributions over the states, observations, and actions as
embeddings in feature spaces, which are reproducing kernel Hilbert spaces.
Distributions over states given the observations are obtained by applying the
kernel Bayes' rule to these distribution embeddings. Policies and value
functions are defined on the feature space over states, which leads to a
feature space expression for the Bellman equation. Value iteration may then be
used to estimate the optimal value function and associated policy. Experimental
results confirm that the correct policy is learned using the feature space
representation.
@misc{nishiyama2012hilbert,
abstract = {A nonparametric approach for policy learning for POMDPs is proposed. The
approach represents distributions over the states, observations, and actions as
embeddings in feature spaces, which are reproducing kernel Hilbert spaces.
Distributions over states given the observations are obtained by applying the
kernel Bayes' rule to these distribution embeddings. Policies and value
functions are defined on the feature space over states, which leads to a
feature space expression for the Bellman equation. Value iteration may then be
used to estimate the optimal value function and associated policy. Experimental
results confirm that the correct policy is learned using the feature space
representation.},
added-at = {2014-04-16T23:24:02.000+0200},
author = {Nishiyama, Yu and Boularias, Abdeslam and Gretton, Arthur and Fukumizu, Kenji},
biburl = {https://www.bibsonomy.org/bibtex/23ff4535ad8efb8cf7d0247316a15326f/wittawatj},
description = {[1210.4887] Hilbert Space Embeddings of POMDPs},
interhash = {54a1a004fad42cb98ab00c6a7c7798fb},
intrahash = {3ff4535ad8efb8cf7d0247316a15326f},
keywords = {kernel pomdp},
note = {cite arxiv:1210.4887Comment: Appears in Proceedings of the Twenty-Eighth Conference on Uncertainty in Artificial Intelligence (UAI2012)},
timestamp = {2014-04-16T23:24:02.000+0200},
title = {Hilbert Space Embeddings of POMDPs},
url = {http://arxiv.org/abs/1210.4887},
year = 2012
}