Scene flow estimation has been receiving increasing attention for 3D environment perception. Monocular scene flow estimation - obtaining 3D structure and 3D motion from two temporally consecutive images - is a highly ill-posed problem, and practical solutions are lacking to date. We propose a novel monocular scene flow method that yields competitive accuracy and real-time performance. By taking an inverse problem view, we design a single convolutional neural network (CNN) that successfully estimates depth and 3D motion simultaneously from a classical optical flow cost volume. We adopt self-supervised learning with 3D loss functions and occlusion reasoning to leverage unlabeled data. We validate our design choices, including the proxy loss and augmentation setup. Our model achieves state-of-the-art accuracy among unsupervised/self-supervised learning approaches to monocular scene flow, and yields competitive results for the optical flow and monocular depth estimation sub-tasks. Semi-supervised fine-tuning further improves the accuracy and yields promising results in real-time.
%0 Conference Paper
%1 2020-hur
%A Hur, Junhwa
%A Roth, Stefan
%B 2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)
%D 2020
%K depth estimation flow monocular scene
%P 7394-7403
%R 10.1109/CVPR42600.2020.00742
%T Self-Supervised Monocular Scene Flow Estimation
%U https://ieeexplore.ieee.org/document/9157146/
%X Scene flow estimation has been receiving increasing attention for 3D environment perception. Monocular scene flow estimation - obtaining 3D structure and 3D motion from two temporally consecutive images - is a highly ill-posed problem, and practical solutions are lacking to date. We propose a novel monocular scene flow method that yields competitive accuracy and real-time performance. By taking an inverse problem view, we design a single convolutional neural network (CNN) that successfully estimates depth and 3D motion simultaneously from a classical optical flow cost volume. We adopt self-supervised learning with 3D loss functions and occlusion reasoning to leverage unlabeled data. We validate our design choices, including the proxy loss and augmentation setup. Our model achieves state-of-the-art accuracy among unsupervised/self-supervised learning approaches to monocular scene flow, and yields competitive results for the optical flow and monocular depth estimation sub-tasks. Semi-supervised fine-tuning further improves the accuracy and yields promising results in real-time.
@inproceedings{2020-hur,
abstract = {Scene flow estimation has been receiving increasing attention for 3D environment perception. Monocular scene flow estimation - obtaining 3D structure and 3D motion from two temporally consecutive images - is a highly ill-posed problem, and practical solutions are lacking to date. We propose a novel monocular scene flow method that yields competitive accuracy and real-time performance. By taking an inverse problem view, we design a single convolutional neural network (CNN) that successfully estimates depth and 3D motion simultaneously from a classical optical flow cost volume. We adopt self-supervised learning with 3D loss functions and occlusion reasoning to leverage unlabeled data. We validate our design choices, including the proxy loss and augmentation setup. Our model achieves state-of-the-art accuracy among unsupervised/self-supervised learning approaches to monocular scene flow, and yields competitive results for the optical flow and monocular depth estimation sub-tasks. Semi-supervised fine-tuning further improves the accuracy and yields promising results in real-time.},
added-at = {2021-07-07T12:31:39.000+0200},
author = {Hur, Junhwa and Roth, Stefan},
biburl = {https://www.bibsonomy.org/bibtex/22840ad7361b4830c56f985a93db6f069/pkoch},
booktitle = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
doi = {10.1109/CVPR42600.2020.00742},
interhash = {3fb286a199511c09e4a4a25b48dac7d3},
intrahash = {2840ad7361b4830c56f985a93db6f069},
issn = {2575-7075},
keywords = {depth estimation flow monocular scene},
month = {June},
pages = {7394-7403},
timestamp = {2021-07-07T12:31:39.000+0200},
title = {Self-Supervised Monocular Scene Flow Estimation},
url = {https://ieeexplore.ieee.org/document/9157146/},
year = 2020
}