Existing semi-supervised learning (SSL) algorithms use a single weight to
balance the loss of labeled and unlabeled examples, i.e., all unlabeled
examples are equally weighted. But not all unlabeled data are equal. In this
paper we study how to use a different weight for every unlabeled example.
Manual tuning of all those weights -- as done in prior work -- is no longer
possible. Instead, we adjust those weights via an algorithm based on the
influence function, a measure of a model's dependency on one training example.
To make the approach efficient, we propose a fast and effective approximation
of the influence function. We demonstrate that this technique outperforms
state-of-the-art methods on semi-supervised image and language classification
tasks.
Description
[2007.01293] Not All Unlabeled Data are Equal: Learning to Weight Data in Semi-supervised Learning
%0 Generic
%1 ren2020unlabeled
%A Ren, Zhongzheng
%A Yeh, Raymond A.
%A Schwing, Alexander G.
%D 2020
%K 2020 data dataset machine-learning semi-supervised
%T Not All Unlabeled Data are Equal: Learning to Weight Data in
Semi-supervised Learning
%U http://arxiv.org/abs/2007.01293
%X Existing semi-supervised learning (SSL) algorithms use a single weight to
balance the loss of labeled and unlabeled examples, i.e., all unlabeled
examples are equally weighted. But not all unlabeled data are equal. In this
paper we study how to use a different weight for every unlabeled example.
Manual tuning of all those weights -- as done in prior work -- is no longer
possible. Instead, we adjust those weights via an algorithm based on the
influence function, a measure of a model's dependency on one training example.
To make the approach efficient, we propose a fast and effective approximation
of the influence function. We demonstrate that this technique outperforms
state-of-the-art methods on semi-supervised image and language classification
tasks.
@misc{ren2020unlabeled,
abstract = {Existing semi-supervised learning (SSL) algorithms use a single weight to
balance the loss of labeled and unlabeled examples, i.e., all unlabeled
examples are equally weighted. But not all unlabeled data are equal. In this
paper we study how to use a different weight for every unlabeled example.
Manual tuning of all those weights -- as done in prior work -- is no longer
possible. Instead, we adjust those weights via an algorithm based on the
influence function, a measure of a model's dependency on one training example.
To make the approach efficient, we propose a fast and effective approximation
of the influence function. We demonstrate that this technique outperforms
state-of-the-art methods on semi-supervised image and language classification
tasks.},
added-at = {2020-07-04T20:28:25.000+0200},
author = {Ren, Zhongzheng and Yeh, Raymond A. and Schwing, Alexander G.},
biburl = {https://www.bibsonomy.org/bibtex/215b3c1b8c2bcaf06532f99a82a3a81da/analyst},
description = {[2007.01293] Not All Unlabeled Data are Equal: Learning to Weight Data in Semi-supervised Learning},
interhash = {b5b3ca68de2b2e97a61b8caf39b985d8},
intrahash = {15b3c1b8c2bcaf06532f99a82a3a81da},
keywords = {2020 data dataset machine-learning semi-supervised},
note = {cite arxiv:2007.01293},
timestamp = {2020-07-04T20:28:25.000+0200},
title = {Not All Unlabeled Data are Equal: Learning to Weight Data in
Semi-supervised Learning},
url = {http://arxiv.org/abs/2007.01293},
year = 2020
}