We use simulated soccer to study multiagent learning.
Each team's players (agents) share action set and
policy, but may behave differently due to
position-dependent inputs. All agents making up a team
are rewarded or punished collectively in case of goals.
We conduct simulations with varying team sizes, and
compare several learning algorithms: TD-Q learning with
linear neural networks (TD-Q), Probabilistic
Incremental Program Evolution (PIPE), and a PIPE
version that learns by coevolution (CO-PIPE). TD-Q is
based on learning evaluation functions (EFs) mapping
input/action pairs to expected reward. PIPE and CO-PIPE
search policy space directly. They use adaptive
probability distributions to synthesize programs that
calculate action probabilities from current inputs. Our
results show that linear TD-Q encounters several
difficulties in learning appropriate shared EFs. PIPE
and CO-PIPE, however, do not depend on EFs and find
good policies faster and more reliably. This suggests
that in some multiagent learning scenarios direct
search in policy space can offer advantages over
EF-based approaches.
%0 Journal Article
%1 Salustowicz:97mlj
%A Sałustowicz, R. P.
%A Wiering, M. A.
%A Schmidhuber, J.
%D 1998
%J Machine Learning
%K PIPE, TD-Q coevolution evaluation evolution, functions, incremental learning, multiagent probabilistic program reinforcement soccer,
%N 2-3
%P 263--282
%T Learning Team Strategies: Soccer Case Studies
%U ftp://ftp.idsia.ch/pub/rafal/soccer.ps.gz
%V 33
%X We use simulated soccer to study multiagent learning.
Each team's players (agents) share action set and
policy, but may behave differently due to
position-dependent inputs. All agents making up a team
are rewarded or punished collectively in case of goals.
We conduct simulations with varying team sizes, and
compare several learning algorithms: TD-Q learning with
linear neural networks (TD-Q), Probabilistic
Incremental Program Evolution (PIPE), and a PIPE
version that learns by coevolution (CO-PIPE). TD-Q is
based on learning evaluation functions (EFs) mapping
input/action pairs to expected reward. PIPE and CO-PIPE
search policy space directly. They use adaptive
probability distributions to synthesize programs that
calculate action probabilities from current inputs. Our
results show that linear TD-Q encounters several
difficulties in learning appropriate shared EFs. PIPE
and CO-PIPE, however, do not depend on EFs and find
good policies faster and more reliably. This suggests
that in some multiagent learning scenarios direct
search in policy space can offer advantages over
EF-based approaches.
@article{Salustowicz:97mlj,
abstract = {We use simulated soccer to study multiagent learning.
Each team's players (agents) share action set and
policy, but may behave differently due to
position-dependent inputs. All agents making up a team
are rewarded or punished collectively in case of goals.
We conduct simulations with varying team sizes, and
compare several learning algorithms: TD-Q learning with
linear neural networks (TD-Q), Probabilistic
Incremental Program Evolution (PIPE), and a PIPE
version that learns by coevolution (CO-PIPE). TD-Q is
based on learning evaluation functions (EFs) mapping
input/action pairs to expected reward. PIPE and CO-PIPE
search policy space directly. They use adaptive
probability distributions to synthesize programs that
calculate action probabilities from current inputs. Our
results show that linear TD-Q encounters several
difficulties in learning appropriate shared EFs. PIPE
and CO-PIPE, however, do not depend on EFs and find
good policies faster and more reliably. This suggests
that in some multiagent learning scenarios direct
search in policy space can offer advantages over
EF-based approaches.},
added-at = {2008-06-19T17:46:40.000+0200},
author = {Sa\l{}ustowicz, R. P. and Wiering, M. A. and Schmidhuber, J.},
biburl = {https://www.bibsonomy.org/bibtex/27594c44d60c4a6b489feb66dbab6c733/brazovayeye},
interhash = {8687e7c7dd2dffbbcc1e045a50e337d4},
intrahash = {7594c44d60c4a6b489feb66dbab6c733},
issn = {0885-6125},
journal = {Machine Learning},
keywords = {PIPE, TD-Q coevolution evaluation evolution, functions, incremental learning, multiagent probabilistic program reinforcement soccer,},
month = {12 November},
number = {2-3},
pages = {263--282},
timestamp = {2008-06-19T17:50:58.000+0200},
title = {Learning Team Strategies: Soccer Case Studies},
url = {ftp://ftp.idsia.ch/pub/rafal/soccer.ps.gz},
volume = 33,
year = 1998
}