Large scale deep learning excels when labeled images are abundant, yet
data-efficient learning remains a longstanding challenge. While biological
vision is thought to leverage vast amounts of unlabeled data to solve
classification problems with limited supervision, computer vision has so far
not succeeded in this `semi-supervised' regime. Our work tackles this challenge
with Contrastive Predictive Coding, an unsupervised objective which extracts
stable structure from still images. The result is a representation which,
equipped with a simple linear classifier, separates ImageNet categories better
than all competing methods, and surpasses the performance of a fully-supervised
AlexNet model. When given a small number of labeled images (as few as 13 per
class), this representation retains a strong classification performance,
outperforming state-of-the-art semi-supervised methods by 10% Top-5 accuracy
and supervised methods by 20%. Finally, we find our unsupervised representation
to serve as a useful substrate for image detection on the PASCAL-VOC 2007
dataset, approaching the performance of representations trained with a fully
annotated ImageNet dataset. We expect these results to open the door to
pipelines that use scalable unsupervised representations as a drop-in
replacement for supervised ones for real-world vision tasks where labels are
scarce.
Description
Data-Efficient Image Recognition with Contrastive Predictive Coding
%0 Generic
%1 henaff2019dataefficient
%A Hénaff, Olivier J.
%A Razavi, Ali
%A Doersch, Carl
%A Eslami, S. M. Ali
%A Oord, Aaron van den
%D 2019
%K backbone loss oneshot semisup unsup
%T Data-Efficient Image Recognition with Contrastive Predictive Coding
%U http://arxiv.org/abs/1905.09272
%X Large scale deep learning excels when labeled images are abundant, yet
data-efficient learning remains a longstanding challenge. While biological
vision is thought to leverage vast amounts of unlabeled data to solve
classification problems with limited supervision, computer vision has so far
not succeeded in this `semi-supervised' regime. Our work tackles this challenge
with Contrastive Predictive Coding, an unsupervised objective which extracts
stable structure from still images. The result is a representation which,
equipped with a simple linear classifier, separates ImageNet categories better
than all competing methods, and surpasses the performance of a fully-supervised
AlexNet model. When given a small number of labeled images (as few as 13 per
class), this representation retains a strong classification performance,
outperforming state-of-the-art semi-supervised methods by 10% Top-5 accuracy
and supervised methods by 20%. Finally, we find our unsupervised representation
to serve as a useful substrate for image detection on the PASCAL-VOC 2007
dataset, approaching the performance of representations trained with a fully
annotated ImageNet dataset. We expect these results to open the door to
pipelines that use scalable unsupervised representations as a drop-in
replacement for supervised ones for real-world vision tasks where labels are
scarce.
@misc{henaff2019dataefficient,
abstract = {Large scale deep learning excels when labeled images are abundant, yet
data-efficient learning remains a longstanding challenge. While biological
vision is thought to leverage vast amounts of unlabeled data to solve
classification problems with limited supervision, computer vision has so far
not succeeded in this `semi-supervised' regime. Our work tackles this challenge
with Contrastive Predictive Coding, an unsupervised objective which extracts
stable structure from still images. The result is a representation which,
equipped with a simple linear classifier, separates ImageNet categories better
than all competing methods, and surpasses the performance of a fully-supervised
AlexNet model. When given a small number of labeled images (as few as 13 per
class), this representation retains a strong classification performance,
outperforming state-of-the-art semi-supervised methods by 10% Top-5 accuracy
and supervised methods by 20%. Finally, we find our unsupervised representation
to serve as a useful substrate for image detection on the PASCAL-VOC 2007
dataset, approaching the performance of representations trained with a fully
annotated ImageNet dataset. We expect these results to open the door to
pipelines that use scalable unsupervised representations as a drop-in
replacement for supervised ones for real-world vision tasks where labels are
scarce.},
added-at = {2019-07-08T21:44:06.000+0200},
author = {Hénaff, Olivier J. and Razavi, Ali and Doersch, Carl and Eslami, S. M. Ali and Oord, Aaron van den},
biburl = {https://www.bibsonomy.org/bibtex/2f958ff01a164209c1660e0594b6271ae/nmatsuk},
description = {Data-Efficient Image Recognition with Contrastive Predictive Coding},
interhash = {371800272aca6e446695b33670629095},
intrahash = {f958ff01a164209c1660e0594b6271ae},
keywords = {backbone loss oneshot semisup unsup},
note = {cite arxiv:1905.09272},
timestamp = {2019-07-08T21:44:06.000+0200},
title = {Data-Efficient Image Recognition with Contrastive Predictive Coding},
url = {http://arxiv.org/abs/1905.09272},
year = 2019
}