We propose a unified mechanism for achieving coordination and communication
in Multi-Agent Reinforcement Learning (MARL), through rewarding agents for
having causal influence over other agents' actions. Causal influence is
assessed using counterfactual reasoning. At each timestep, an agent simulates
alternate actions that it could have taken, and computes their effect on the
behavior of other agents. Actions that lead to bigger changes in other agents'
behavior are considered influential and are rewarded. We show that this is
equivalent to rewarding agents for having high mutual information between their
actions. Empirical results demonstrate that influence leads to enhanced
coordination and communication in challenging social dilemma environments,
dramatically increasing the learning curves of the deep RL agents, and leading
to more meaningful learned communication protocols. The influence rewards for
all agents can be computed in a decentralized way by enabling agents to learn a
model of other agents using deep neural networks. In contrast, key previous
works on emergent communication in the MARL setting were unable to learn
diverse policies in a decentralized manner and had to resort to centralized
training. Consequently, the influence reward opens up a window of new
opportunities for research in this area.
Description
[1810.08647] Social Influence as Intrinsic Motivation for Multi-Agent Deep Reinforcement Learning
%0 Journal Article
%1 jaques2018social
%A Jaques, Natasha
%A Lazaridou, Angeliki
%A Hughes, Edward
%A Gulcehre, Caglar
%A Ortega, Pedro A.
%A Strouse, DJ
%A Leibo, Joel Z.
%A de Freitas, Nando
%D 2018
%K information reinforcement-learning
%T Social Influence as Intrinsic Motivation for Multi-Agent Deep
Reinforcement Learning
%U http://arxiv.org/abs/1810.08647
%X We propose a unified mechanism for achieving coordination and communication
in Multi-Agent Reinforcement Learning (MARL), through rewarding agents for
having causal influence over other agents' actions. Causal influence is
assessed using counterfactual reasoning. At each timestep, an agent simulates
alternate actions that it could have taken, and computes their effect on the
behavior of other agents. Actions that lead to bigger changes in other agents'
behavior are considered influential and are rewarded. We show that this is
equivalent to rewarding agents for having high mutual information between their
actions. Empirical results demonstrate that influence leads to enhanced
coordination and communication in challenging social dilemma environments,
dramatically increasing the learning curves of the deep RL agents, and leading
to more meaningful learned communication protocols. The influence rewards for
all agents can be computed in a decentralized way by enabling agents to learn a
model of other agents using deep neural networks. In contrast, key previous
works on emergent communication in the MARL setting were unable to learn
diverse policies in a decentralized manner and had to resort to centralized
training. Consequently, the influence reward opens up a window of new
opportunities for research in this area.
@article{jaques2018social,
abstract = {We propose a unified mechanism for achieving coordination and communication
in Multi-Agent Reinforcement Learning (MARL), through rewarding agents for
having causal influence over other agents' actions. Causal influence is
assessed using counterfactual reasoning. At each timestep, an agent simulates
alternate actions that it could have taken, and computes their effect on the
behavior of other agents. Actions that lead to bigger changes in other agents'
behavior are considered influential and are rewarded. We show that this is
equivalent to rewarding agents for having high mutual information between their
actions. Empirical results demonstrate that influence leads to enhanced
coordination and communication in challenging social dilemma environments,
dramatically increasing the learning curves of the deep RL agents, and leading
to more meaningful learned communication protocols. The influence rewards for
all agents can be computed in a decentralized way by enabling agents to learn a
model of other agents using deep neural networks. In contrast, key previous
works on emergent communication in the MARL setting were unable to learn
diverse policies in a decentralized manner and had to resort to centralized
training. Consequently, the influence reward opens up a window of new
opportunities for research in this area.},
added-at = {2019-06-12T17:53:33.000+0200},
author = {Jaques, Natasha and Lazaridou, Angeliki and Hughes, Edward and Gulcehre, Caglar and Ortega, Pedro A. and Strouse, DJ and Leibo, Joel Z. and de Freitas, Nando},
biburl = {https://www.bibsonomy.org/bibtex/27b278515d43f671b29a27bc4a2350ae3/kirk86},
description = {[1810.08647] Social Influence as Intrinsic Motivation for Multi-Agent Deep Reinforcement Learning},
interhash = {773ce430652adba17e8525175835a4d8},
intrahash = {7b278515d43f671b29a27bc4a2350ae3},
keywords = {information reinforcement-learning},
note = {cite arxiv:1810.08647},
timestamp = {2019-06-12T17:53:33.000+0200},
title = {Social Influence as Intrinsic Motivation for Multi-Agent Deep
Reinforcement Learning},
url = {http://arxiv.org/abs/1810.08647},
year = 2018
}