This paper focuses on the problem of learning 6-DOF grasping with a parallel
jaw gripper in simulation. We propose the notion of a geometry-aware
representation in grasping based on the assumption that knowledge of 3D
geometry is at the heart of interaction. Our key idea is constraining and
regularizing grasping interaction learning through 3D geometry prediction.
Specifically, we formulate the learning of deep geometry-aware grasping model
in two steps: First, we learn to build mental geometry-aware representation by
reconstructing the scene (i.e., 3D occupancy grid) from RGBD input via
generative 3D shape modeling. Second, we learn to predict grasping outcome with
its internal geometry-aware representation. The learned outcome prediction
model is used to sequentially propose grasping solutions via
analysis-by-synthesis optimization. Our contributions are fourfold: (1) To best
of our knowledge, we are presenting for the first time a method to learn a
6-DOF grasping net from RGBD input; (2) We build a grasping dataset from
demonstrations in virtual reality with rich sensory and interaction
annotations. This dataset includes 101 everyday objects spread across 7
categories, additionally, we propose a data augmentation strategy for effective
learning; (3) We demonstrate that the learned geometry-aware representation
leads to about 10 percent relative performance improvement over the baseline
CNN on grasping objects from our dataset. (4) We further demonstrate that the
model generalizes to novel viewpoints and object instances.
Description
[1708.07303] Learning 6-DOF Grasping Interaction with Deep Geometry-aware 3D Representations
%0 Generic
%1 yan2017learning
%A Yan, Xinchen
%A Hsu, Jasmine
%A Khansari, Mohi
%A Bai, Yunfei
%A Pathak, Arkanath
%A Gupta, Abhinav
%A Davidson, James
%A Lee, Honglak
%D 2017
%K 2017 arxiv deep-learning grasp robotics
%T Learning 6-DOF Grasping Interaction with Deep Geometry-aware 3D
Representations
%U http://arxiv.org/abs/1708.07303
%X This paper focuses on the problem of learning 6-DOF grasping with a parallel
jaw gripper in simulation. We propose the notion of a geometry-aware
representation in grasping based on the assumption that knowledge of 3D
geometry is at the heart of interaction. Our key idea is constraining and
regularizing grasping interaction learning through 3D geometry prediction.
Specifically, we formulate the learning of deep geometry-aware grasping model
in two steps: First, we learn to build mental geometry-aware representation by
reconstructing the scene (i.e., 3D occupancy grid) from RGBD input via
generative 3D shape modeling. Second, we learn to predict grasping outcome with
its internal geometry-aware representation. The learned outcome prediction
model is used to sequentially propose grasping solutions via
analysis-by-synthesis optimization. Our contributions are fourfold: (1) To best
of our knowledge, we are presenting for the first time a method to learn a
6-DOF grasping net from RGBD input; (2) We build a grasping dataset from
demonstrations in virtual reality with rich sensory and interaction
annotations. This dataset includes 101 everyday objects spread across 7
categories, additionally, we propose a data augmentation strategy for effective
learning; (3) We demonstrate that the learned geometry-aware representation
leads to about 10 percent relative performance improvement over the baseline
CNN on grasping objects from our dataset. (4) We further demonstrate that the
model generalizes to novel viewpoints and object instances.
@misc{yan2017learning,
abstract = {This paper focuses on the problem of learning 6-DOF grasping with a parallel
jaw gripper in simulation. We propose the notion of a geometry-aware
representation in grasping based on the assumption that knowledge of 3D
geometry is at the heart of interaction. Our key idea is constraining and
regularizing grasping interaction learning through 3D geometry prediction.
Specifically, we formulate the learning of deep geometry-aware grasping model
in two steps: First, we learn to build mental geometry-aware representation by
reconstructing the scene (i.e., 3D occupancy grid) from RGBD input via
generative 3D shape modeling. Second, we learn to predict grasping outcome with
its internal geometry-aware representation. The learned outcome prediction
model is used to sequentially propose grasping solutions via
analysis-by-synthesis optimization. Our contributions are fourfold: (1) To best
of our knowledge, we are presenting for the first time a method to learn a
6-DOF grasping net from RGBD input; (2) We build a grasping dataset from
demonstrations in virtual reality with rich sensory and interaction
annotations. This dataset includes 101 everyday objects spread across 7
categories, additionally, we propose a data augmentation strategy for effective
learning; (3) We demonstrate that the learned geometry-aware representation
leads to about 10 percent relative performance improvement over the baseline
CNN on grasping objects from our dataset. (4) We further demonstrate that the
model generalizes to novel viewpoints and object instances.},
added-at = {2018-05-28T05:24:53.000+0200},
author = {Yan, Xinchen and Hsu, Jasmine and Khansari, Mohi and Bai, Yunfei and Pathak, Arkanath and Gupta, Abhinav and Davidson, James and Lee, Honglak},
biburl = {https://www.bibsonomy.org/bibtex/2d10ee8aaaddeef3cdaefddf5b5b2a695/achakraborty},
description = {[1708.07303] Learning 6-DOF Grasping Interaction with Deep Geometry-aware 3D Representations},
interhash = {1206c693e798e077d9b242f95bd99bb0},
intrahash = {d10ee8aaaddeef3cdaefddf5b5b2a695},
keywords = {2017 arxiv deep-learning grasp robotics},
note = {cite arxiv:1708.07303Comment: Deep Geometry-aware Grasping},
timestamp = {2018-05-28T05:24:53.000+0200},
title = {Learning 6-DOF Grasping Interaction with Deep Geometry-aware 3D
Representations},
url = {http://arxiv.org/abs/1708.07303},
year = 2017
}