The problem of tracking multiple objects in a video sequence poses several
challenging tasks. For tracking-by-detection these include, among others,
object re-identification, motion prediction and dealing with occlusions. We
present a tracker without bells and whistles that accomplishes tracking without
specifically targeting any of these tasks, in particular, we perform no
training or optimization on tracking data. To this end, we exploit the bounding
box regression of an object detector to predict an object's new position in the
next frame, thereby converting a detector into a Tracktor. We demonstrate the
extensibility of our Tracktor and provide a new state-of-the-art on three
multi-object tracking benchmarks by extending it with a straightforward
re-identification and camera motion compensation.
We then perform an analysis on the performance and failure cases of several
state-of-the-art tracking methods and our Tracktor. Surprisingly, none of the
dedicated tracking methods are considerably better in dealing with complex
tracking scenarios, namely, small and occluded objects or missing detections.
However, our approach tackles most of the easy tracking scenarios. Therefore,
we motivate our approach as a new tracking paradigm and point out promising
future research directions. Overall, we show that a cleverly exploited detector
can perform better tracking than any current tracking method and expose the
real tracking challenges which are still unsolved.
%0 Generic
%1 bergmann2019tracking
%A Bergmann, Philipp
%A Meinhardt, Tim
%A Leal-Taixe, Laura
%D 2019
%K arch detection rcnn tracking video
%T Tracking without bells and whistles
%U http://arxiv.org/abs/1903.05625
%X The problem of tracking multiple objects in a video sequence poses several
challenging tasks. For tracking-by-detection these include, among others,
object re-identification, motion prediction and dealing with occlusions. We
present a tracker without bells and whistles that accomplishes tracking without
specifically targeting any of these tasks, in particular, we perform no
training or optimization on tracking data. To this end, we exploit the bounding
box regression of an object detector to predict an object's new position in the
next frame, thereby converting a detector into a Tracktor. We demonstrate the
extensibility of our Tracktor and provide a new state-of-the-art on three
multi-object tracking benchmarks by extending it with a straightforward
re-identification and camera motion compensation.
We then perform an analysis on the performance and failure cases of several
state-of-the-art tracking methods and our Tracktor. Surprisingly, none of the
dedicated tracking methods are considerably better in dealing with complex
tracking scenarios, namely, small and occluded objects or missing detections.
However, our approach tackles most of the easy tracking scenarios. Therefore,
we motivate our approach as a new tracking paradigm and point out promising
future research directions. Overall, we show that a cleverly exploited detector
can perform better tracking than any current tracking method and expose the
real tracking challenges which are still unsolved.
@misc{bergmann2019tracking,
abstract = {The problem of tracking multiple objects in a video sequence poses several
challenging tasks. For tracking-by-detection these include, among others,
object re-identification, motion prediction and dealing with occlusions. We
present a tracker without bells and whistles that accomplishes tracking without
specifically targeting any of these tasks, in particular, we perform no
training or optimization on tracking data. To this end, we exploit the bounding
box regression of an object detector to predict an object's new position in the
next frame, thereby converting a detector into a Tracktor. We demonstrate the
extensibility of our Tracktor and provide a new state-of-the-art on three
multi-object tracking benchmarks by extending it with a straightforward
re-identification and camera motion compensation.
We then perform an analysis on the performance and failure cases of several
state-of-the-art tracking methods and our Tracktor. Surprisingly, none of the
dedicated tracking methods are considerably better in dealing with complex
tracking scenarios, namely, small and occluded objects or missing detections.
However, our approach tackles most of the easy tracking scenarios. Therefore,
we motivate our approach as a new tracking paradigm and point out promising
future research directions. Overall, we show that a cleverly exploited detector
can perform better tracking than any current tracking method and expose the
real tracking challenges which are still unsolved.},
added-at = {2019-04-08T22:11:56.000+0200},
author = {Bergmann, Philipp and Meinhardt, Tim and Leal-Taixe, Laura},
biburl = {https://www.bibsonomy.org/bibtex/201bb2a2366ef257968be8ddbd99ebc79/nmatsuk},
description = {Tracking without bells and whistles},
interhash = {664a7ddd9d8171ce2d43e5f839ad5d4f},
intrahash = {01bb2a2366ef257968be8ddbd99ebc79},
keywords = {arch detection rcnn tracking video},
note = {cite arxiv:1903.05625},
timestamp = {2019-04-08T22:11:56.000+0200},
title = {Tracking without bells and whistles},
url = {http://arxiv.org/abs/1903.05625},
year = 2019
}