We present a new approach for transfer of dynamic robot control policies such
as biped locomotion from simulation to real hardware. Key to our approach is to
perform system identification of the model parameters \mu of the hardware
(e.g. friction, center-of-mass) in two distinct stages, before policy learning
(pre-sysID) and after policy learning (post-sysID). Pre-sysID begins by
collecting trajectories from the physical hardware based on a set of generic
motion sequences. Because the trajectories may not be related to the task of
interest, presysID does not attempt to accurately identify the true value of
\mu, but only to approximate the range of \mu to guide the policy learning.
Next, a Projected Universal Policy (PUP) is created by simultaneously training
a network that projects \mu to a low-dimensional latent variable \eta and a
family of policies that are conditioned on \eta. The second round of system
identification (post-sysID) is then carried out by deploying the PUP on the
robot hardware using task-relevant trajectories. We use Bayesian Optimization
to determine the values for \eta that optimizes the performance of PUP on the
real hardware. We have used this approach to create three successful biped
locomotion controllers (walk forward, walk backwards, walk sideways) on the
Darwin OP2 robot.
Description
[1903.01390] Sim-to-Real Transfer for Biped Locomotion
%0 Journal Article
%1 yu2019simtoreal
%A Yu, Wenhao
%A Kumar, Visak CV
%A Turk, Greg
%A Liu, C. Karen
%D 2019
%K reinforcement-learning
%T Sim-to-Real Transfer for Biped Locomotion
%U http://arxiv.org/abs/1903.01390
%X We present a new approach for transfer of dynamic robot control policies such
as biped locomotion from simulation to real hardware. Key to our approach is to
perform system identification of the model parameters \mu of the hardware
(e.g. friction, center-of-mass) in two distinct stages, before policy learning
(pre-sysID) and after policy learning (post-sysID). Pre-sysID begins by
collecting trajectories from the physical hardware based on a set of generic
motion sequences. Because the trajectories may not be related to the task of
interest, presysID does not attempt to accurately identify the true value of
\mu, but only to approximate the range of \mu to guide the policy learning.
Next, a Projected Universal Policy (PUP) is created by simultaneously training
a network that projects \mu to a low-dimensional latent variable \eta and a
family of policies that are conditioned on \eta. The second round of system
identification (post-sysID) is then carried out by deploying the PUP on the
robot hardware using task-relevant trajectories. We use Bayesian Optimization
to determine the values for \eta that optimizes the performance of PUP on the
real hardware. We have used this approach to create three successful biped
locomotion controllers (walk forward, walk backwards, walk sideways) on the
Darwin OP2 robot.
@article{yu2019simtoreal,
abstract = {We present a new approach for transfer of dynamic robot control policies such
as biped locomotion from simulation to real hardware. Key to our approach is to
perform system identification of the model parameters {\mu} of the hardware
(e.g. friction, center-of-mass) in two distinct stages, before policy learning
(pre-sysID) and after policy learning (post-sysID). Pre-sysID begins by
collecting trajectories from the physical hardware based on a set of generic
motion sequences. Because the trajectories may not be related to the task of
interest, presysID does not attempt to accurately identify the true value of
{\mu}, but only to approximate the range of {\mu} to guide the policy learning.
Next, a Projected Universal Policy (PUP) is created by simultaneously training
a network that projects {\mu} to a low-dimensional latent variable {\eta} and a
family of policies that are conditioned on {\eta}. The second round of system
identification (post-sysID) is then carried out by deploying the PUP on the
robot hardware using task-relevant trajectories. We use Bayesian Optimization
to determine the values for {\eta} that optimizes the performance of PUP on the
real hardware. We have used this approach to create three successful biped
locomotion controllers (walk forward, walk backwards, walk sideways) on the
Darwin OP2 robot.},
added-at = {2019-03-08T01:13:51.000+0100},
author = {Yu, Wenhao and Kumar, Visak CV and Turk, Greg and Liu, C. Karen},
biburl = {https://www.bibsonomy.org/bibtex/28586a3c1014a912c8ff922d70d8dba40/kirk86},
description = {[1903.01390] Sim-to-Real Transfer for Biped Locomotion},
interhash = {4523619443df120c455952f401b31116},
intrahash = {8586a3c1014a912c8ff922d70d8dba40},
keywords = {reinforcement-learning},
note = {cite arxiv:1903.01390},
timestamp = {2019-03-08T01:13:51.000+0100},
title = {Sim-to-Real Transfer for Biped Locomotion},
url = {http://arxiv.org/abs/1903.01390},
year = 2019
}