Recent years have witnessed rapid progress in detecting and recognizing
individual object instances. To understand the situation in a scene, however,
computers need to recognize how humans interact with surrounding objects. In
this paper, we tackle the challenging task of detecting human-object
interactions (HOI). Our core idea is that the appearance of a person or an
object instance contains informative cues on which relevant parts of an image
to attend to for facilitating interaction prediction. To exploit these cues, we
propose an instance-centric attention module that learns to dynamically
highlight regions in an image conditioned on the appearance of each instance.
Such an attention-based network allows us to selectively aggregate features
relevant for recognizing HOIs. We validate the efficacy of the proposed network
on the Verb in COCO and HICO-DET datasets and show that our approach compares
favorably with the state-of-the-arts.
%0 Generic
%1 citeulike:14643356
%A xxx,
%D 2018
%K attention interaction loss
%T iCAN: Instance-Centric Attention Network for Human-Object Interaction Detection
%U http://arxiv.org/abs/1808.10437
%X Recent years have witnessed rapid progress in detecting and recognizing
individual object instances. To understand the situation in a scene, however,
computers need to recognize how humans interact with surrounding objects. In
this paper, we tackle the challenging task of detecting human-object
interactions (HOI). Our core idea is that the appearance of a person or an
object instance contains informative cues on which relevant parts of an image
to attend to for facilitating interaction prediction. To exploit these cues, we
propose an instance-centric attention module that learns to dynamically
highlight regions in an image conditioned on the appearance of each instance.
Such an attention-based network allows us to selectively aggregate features
relevant for recognizing HOIs. We validate the efficacy of the proposed network
on the Verb in COCO and HICO-DET datasets and show that our approach compares
favorably with the state-of-the-arts.
@misc{citeulike:14643356,
abstract = {{Recent years have witnessed rapid progress in detecting and recognizing
individual object instances. To understand the situation in a scene, however,
computers need to recognize how humans interact with surrounding objects. In
this paper, we tackle the challenging task of detecting human-object
interactions (HOI). Our core idea is that the appearance of a person or an
object instance contains informative cues on which relevant parts of an image
to attend to for facilitating interaction prediction. To exploit these cues, we
propose an instance-centric attention module that learns to dynamically
highlight regions in an image conditioned on the appearance of each instance.
Such an attention-based network allows us to selectively aggregate features
relevant for recognizing HOIs. We validate the efficacy of the proposed network
on the Verb in COCO and HICO-DET datasets and show that our approach compares
favorably with the state-of-the-arts.}},
added-at = {2019-02-27T22:23:29.000+0100},
archiveprefix = {arXiv},
author = {xxx},
biburl = {https://www.bibsonomy.org/bibtex/23a6892450cb2c5da4793483847a08ff0/nmatsuk},
citeulike-article-id = {14643356},
citeulike-linkout-0 = {http://arxiv.org/abs/1808.10437},
citeulike-linkout-1 = {http://arxiv.org/pdf/1808.10437},
day = 30,
eprint = {1808.10437},
interhash = {1bdd488b672485838763bd459bcd471d},
intrahash = {3a6892450cb2c5da4793483847a08ff0},
keywords = {attention interaction loss},
month = aug,
posted-at = {2018-10-05 12:25:24},
priority = {2},
timestamp = {2019-02-27T22:23:29.000+0100},
title = {{iCAN: Instance-Centric Attention Network for Human-Object Interaction Detection}},
url = {http://arxiv.org/abs/1808.10437},
year = 2018
}