R. Sałustowicz, M. Wiering, and J. Schmidhuber. Progress in Connectionist-based Information Systems:
Proceedings of the Fourth International Conference on
Neural Information Processing ICONIP'97, 1, page 502--505. Springer-Verlag, (1997)
Abstract
We study multiagent learning in a simulated soccer
scenario. Players from the same team share a common
policy for mapping inputs to actions. They get rewarded
or punished collectively in case of goals. For varying
team sizes we compare the following learning
algorithms: TD-Q learning with linear neural networks
(TD-Q-LIN), with a neural gas network (TD-Q-NG),
Probabilistic Incremental Program Evolution (PIPE), and
a PIPE variant based on coevolution (CO-PIPE). TD-Q-LIN
and TD-Q-NG try to learn evaluation functions (EFs)
mapping input/action pairs to expected reward. PIPE and
CO-PIPE search policy space directly. They use adaptive
probability distributions to synthesize programs that
calculate action probabilities from current inputs. We
find that learning appropriate EFs is hard for both
EF-based approaches. Direct search in policy space
discovers more reliable policies and is faster.
%0 Conference Paper
%1 Salustowicz:97iconip
%A Sałustowicz, R. P.
%A Wiering, M. A.
%A Schmidhuber, J.
%B Progress in Connectionist-based Information Systems:
Proceedings of the Fourth International Conference on
Neural Information Processing ICONIP'97
%D 1997
%E Kasabov, N.
%E Kozma, R.
%E Ko, K.
%E O'Shea, R.
%E Coghill, G.
%E Gedeon, T.
%I Springer-Verlag
%K PIPE
%P 502--505
%T Evolving Soccer Strategies
%U ftp://ftp.idsia.ch/pub/rafal/ICONIP_soccer.ps.gz
%V 1
%X We study multiagent learning in a simulated soccer
scenario. Players from the same team share a common
policy for mapping inputs to actions. They get rewarded
or punished collectively in case of goals. For varying
team sizes we compare the following learning
algorithms: TD-Q learning with linear neural networks
(TD-Q-LIN), with a neural gas network (TD-Q-NG),
Probabilistic Incremental Program Evolution (PIPE), and
a PIPE variant based on coevolution (CO-PIPE). TD-Q-LIN
and TD-Q-NG try to learn evaluation functions (EFs)
mapping input/action pairs to expected reward. PIPE and
CO-PIPE search policy space directly. They use adaptive
probability distributions to synthesize programs that
calculate action probabilities from current inputs. We
find that learning appropriate EFs is hard for both
EF-based approaches. Direct search in policy space
discovers more reliable policies and is faster.
@inproceedings{Salustowicz:97iconip,
abstract = {We study multiagent learning in a simulated soccer
scenario. Players from the same team share a common
policy for mapping inputs to actions. They get rewarded
or punished collectively in case of goals. For varying
team sizes we compare the following learning
algorithms: TD-Q learning with linear neural networks
(TD-Q-LIN), with a neural gas network (TD-Q-NG),
Probabilistic Incremental Program Evolution (PIPE), and
a PIPE variant based on coevolution (CO-PIPE). TD-Q-LIN
and TD-Q-NG try to learn evaluation functions (EFs)
mapping input/action pairs to expected reward. PIPE and
CO-PIPE search policy space directly. They use adaptive
probability distributions to synthesize programs that
calculate action probabilities from current inputs. We
find that learning appropriate EFs is hard for both
EF-based approaches. Direct search in policy space
discovers more reliable policies and is faster.},
added-at = {2008-06-19T17:46:40.000+0200},
author = {Sa\l{}ustowicz, R. P. and Wiering, M. A. and Schmidhuber, J.},
biburl = {https://www.bibsonomy.org/bibtex/2f7f3a571b64039aff5f7d47deec0a7f5/brazovayeye},
booktitle = {Progress in Connectionist-based Information Systems:
Proceedings of the Fourth International Conference on
Neural Information Processing ICONIP'97},
editor = {Kasabov, N. and Kozma, R. and Ko, K. and O'Shea, R. and Coghill, G. and Gedeon, T.},
interhash = {0900b2c8ef73147204eb76f9ecb8b322},
intrahash = {f7f3a571b64039aff5f7d47deec0a7f5},
keywords = {PIPE},
pages = {502--505},
publisher = {Springer-Verlag},
publisher_address = {Singapore},
size = {5 pages},
timestamp = {2008-06-19T17:50:57.000+0200},
title = {Evolving Soccer Strategies},
url = {ftp://ftp.idsia.ch/pub/rafal/ICONIP_soccer.ps.gz},
volume = 1,
year = 1997
}