We present a method for jointly training the estimation of depth, ego-motion,
and a dense 3D translation field of objects relative to the scene, with
monocular photometric consistency being the sole source of supervision. We show
that this apparently heavily underdetermined problem can be regularized by
imposing the following prior knowledge about 3D translation fields: they are
sparse, since most of the scene is static, and they tend to be constant for
rigid moving objects. We show that this regularization alone is sufficient to
train monocular depth prediction models that exceed the accuracy achieved in
prior work for dynamic scenes, including methods that require semantic input.
Code is at
https://github.com/google-research/google-research/tree/master/depth_and_motion_learning .
%0 Conference Paper
%1 2020-li
%A Li, Hanhan
%A Gordon, Ariel
%A Zhao, Hang
%A Casser, Vincent
%A Angelova, Anelia
%B 4th Conference on Robot Learning (CoRL)
%D 2020
%K 3d casser depth estimation flow google li monocular motion object unsupervised
%T Unsupervised Monocular Depth Learning in Dynamic Scenes
%X We present a method for jointly training the estimation of depth, ego-motion,
and a dense 3D translation field of objects relative to the scene, with
monocular photometric consistency being the sole source of supervision. We show
that this apparently heavily underdetermined problem can be regularized by
imposing the following prior knowledge about 3D translation fields: they are
sparse, since most of the scene is static, and they tend to be constant for
rigid moving objects. We show that this regularization alone is sufficient to
train monocular depth prediction models that exceed the accuracy achieved in
prior work for dynamic scenes, including methods that require semantic input.
Code is at
https://github.com/google-research/google-research/tree/master/depth_and_motion_learning .
@inproceedings{2020-li,
abstract = {We present a method for jointly training the estimation of depth, ego-motion,
and a dense 3D translation field of objects relative to the scene, with
monocular photometric consistency being the sole source of supervision. We show
that this apparently heavily underdetermined problem can be regularized by
imposing the following prior knowledge about 3D translation fields: they are
sparse, since most of the scene is static, and they tend to be constant for
rigid moving objects. We show that this regularization alone is sufficient to
train monocular depth prediction models that exceed the accuracy achieved in
prior work for dynamic scenes, including methods that require semantic input.
Code is at
https://github.com/google-research/google-research/tree/master/depth_and_motion_learning .},
added-at = {2021-07-07T13:09:07.000+0200},
author = {Li, Hanhan and Gordon, Ariel and Zhao, Hang and Casser, Vincent and Angelova, Anelia},
biburl = {https://www.bibsonomy.org/bibtex/2b5d1626d87994906c7f297bda949de73/pkoch},
booktitle = {4th Conference on Robot Learning (CoRL)},
interhash = {5ca192af4f54e496f7fdf382df5d9f33},
intrahash = {b5d1626d87994906c7f297bda949de73},
keywords = {3d casser depth estimation flow google li monocular motion object unsupervised},
timestamp = {2021-07-12T21:19:53.000+0200},
title = {Unsupervised Monocular Depth Learning in Dynamic Scenes},
year = 2020
}