We improve the recently-proposed "MixMatch" semi-supervised learning
algorithm by introducing two new techniques: distribution alignment and
augmentation anchoring. Distribution alignment encourages the marginal
distribution of predictions on unlabeled data to be close to the marginal
distribution of ground-truth labels. Augmentation anchoring feeds multiple
strongly augmented versions of an input into the model and encourages each
output to be close to the prediction for a weakly-augmented version of the same
input. To produce strong augmentations, we propose a variant of AutoAugment
which learns the augmentation policy while the model is being trained. Our new
algorithm, dubbed ReMixMatch, is significantly more data-efficient than prior
work, requiring between $5\times$ and $16\times$ less data to reach the same
accuracy. For example, on CIFAR-10 with 250 labeled examples we reach $93.73\%$
accuracy (compared to MixMatch's accuracy of $93.58\%$ with $4,000$ examples)
and a median accuracy of $84.92\%$ with just four labels per class. We make our
code and data open-source at https://github.com/google-research/remixmatch.
Описание
[1911.09785] ReMixMatch: Semi-Supervised Learning with Distribution Alignment and Augmentation Anchoring
%0 Generic
%1 berthelot2019remixmatch
%A Berthelot, David
%A Carlini, Nicholas
%A Cubuk, Ekin D.
%A Kurakin, Alex
%A Sohn, Kihyuk
%A Zhang, Han
%A Raffel, Colin
%D 2019
%K 2019 2020 iclr machine-learning semi-supervised
%T ReMixMatch: Semi-Supervised Learning with Distribution Alignment and
Augmentation Anchoring
%U http://arxiv.org/abs/1911.09785
%X We improve the recently-proposed "MixMatch" semi-supervised learning
algorithm by introducing two new techniques: distribution alignment and
augmentation anchoring. Distribution alignment encourages the marginal
distribution of predictions on unlabeled data to be close to the marginal
distribution of ground-truth labels. Augmentation anchoring feeds multiple
strongly augmented versions of an input into the model and encourages each
output to be close to the prediction for a weakly-augmented version of the same
input. To produce strong augmentations, we propose a variant of AutoAugment
which learns the augmentation policy while the model is being trained. Our new
algorithm, dubbed ReMixMatch, is significantly more data-efficient than prior
work, requiring between $5\times$ and $16\times$ less data to reach the same
accuracy. For example, on CIFAR-10 with 250 labeled examples we reach $93.73\%$
accuracy (compared to MixMatch's accuracy of $93.58\%$ with $4,000$ examples)
and a median accuracy of $84.92\%$ with just four labels per class. We make our
code and data open-source at https://github.com/google-research/remixmatch.
@misc{berthelot2019remixmatch,
abstract = {We improve the recently-proposed "MixMatch" semi-supervised learning
algorithm by introducing two new techniques: distribution alignment and
augmentation anchoring. Distribution alignment encourages the marginal
distribution of predictions on unlabeled data to be close to the marginal
distribution of ground-truth labels. Augmentation anchoring feeds multiple
strongly augmented versions of an input into the model and encourages each
output to be close to the prediction for a weakly-augmented version of the same
input. To produce strong augmentations, we propose a variant of AutoAugment
which learns the augmentation policy while the model is being trained. Our new
algorithm, dubbed ReMixMatch, is significantly more data-efficient than prior
work, requiring between $5\times$ and $16\times$ less data to reach the same
accuracy. For example, on CIFAR-10 with 250 labeled examples we reach $93.73\%$
accuracy (compared to MixMatch's accuracy of $93.58\%$ with $4{,}000$ examples)
and a median accuracy of $84.92\%$ with just four labels per class. We make our
code and data open-source at https://github.com/google-research/remixmatch.},
added-at = {2020-01-14T15:33:24.000+0100},
author = {Berthelot, David and Carlini, Nicholas and Cubuk, Ekin D. and Kurakin, Alex and Sohn, Kihyuk and Zhang, Han and Raffel, Colin},
biburl = {https://www.bibsonomy.org/bibtex/2dba24cbb53cab59608dbd9b2b460cca5/analyst},
description = {[1911.09785] ReMixMatch: Semi-Supervised Learning with Distribution Alignment and Augmentation Anchoring},
interhash = {3f9ac204260ab72855a65103eacd022c},
intrahash = {dba24cbb53cab59608dbd9b2b460cca5},
keywords = {2019 2020 iclr machine-learning semi-supervised},
note = {cite arxiv:1911.09785},
timestamp = {2020-01-14T15:33:24.000+0100},
title = {ReMixMatch: Semi-Supervised Learning with Distribution Alignment and
Augmentation Anchoring},
url = {http://arxiv.org/abs/1911.09785},
year = 2019
}