We prove that the empirical risk of most well-known loss functions factors
into a linear term aggregating all labels with a term that is label free, and
can further be expressed by sums of the loss. This holds true even for
non-smooth, non-convex losses and in any RKHS. The first term is a (kernel)
mean operator --the focal quantity of this work-- which we characterize as the
sufficient statistic for the labels. The result tightens known generalization
bounds and sheds new light on their interpretation.
Factorization has a direct application on weakly supervised learning. In
particular, we demonstrate that algorithms like SGD and proximal methods can be
adapted with minimal effort to handle weak supervision, once the mean operator
has been estimated. We apply this idea to learning with asymmetric noisy
labels, connecting and extending prior work. Furthermore, we show that most
losses enjoy a data-dependent (by the mean operator) form of noise robustness,
in contrast with known negative results.
Description
[1602.02450] Loss factorization, weakly supervised learning and label noise robustness
%0 Journal Article
%1 patrini2016factorization
%A Patrini, Giorgio
%A Nielsen, Frank
%A Nock, Richard
%A Carioni, Marcello
%D 2016
%K objectives robustness
%T Loss factorization, weakly supervised learning and label noise
robustness
%U http://arxiv.org/abs/1602.02450
%X We prove that the empirical risk of most well-known loss functions factors
into a linear term aggregating all labels with a term that is label free, and
can further be expressed by sums of the loss. This holds true even for
non-smooth, non-convex losses and in any RKHS. The first term is a (kernel)
mean operator --the focal quantity of this work-- which we characterize as the
sufficient statistic for the labels. The result tightens known generalization
bounds and sheds new light on their interpretation.
Factorization has a direct application on weakly supervised learning. In
particular, we demonstrate that algorithms like SGD and proximal methods can be
adapted with minimal effort to handle weak supervision, once the mean operator
has been estimated. We apply this idea to learning with asymmetric noisy
labels, connecting and extending prior work. Furthermore, we show that most
losses enjoy a data-dependent (by the mean operator) form of noise robustness,
in contrast with known negative results.
@article{patrini2016factorization,
abstract = {We prove that the empirical risk of most well-known loss functions factors
into a linear term aggregating all labels with a term that is label free, and
can further be expressed by sums of the loss. This holds true even for
non-smooth, non-convex losses and in any RKHS. The first term is a (kernel)
mean operator --the focal quantity of this work-- which we characterize as the
sufficient statistic for the labels. The result tightens known generalization
bounds and sheds new light on their interpretation.
Factorization has a direct application on weakly supervised learning. In
particular, we demonstrate that algorithms like SGD and proximal methods can be
adapted with minimal effort to handle weak supervision, once the mean operator
has been estimated. We apply this idea to learning with asymmetric noisy
labels, connecting and extending prior work. Furthermore, we show that most
losses enjoy a data-dependent (by the mean operator) form of noise robustness,
in contrast with known negative results.},
added-at = {2019-12-11T14:35:35.000+0100},
author = {Patrini, Giorgio and Nielsen, Frank and Nock, Richard and Carioni, Marcello},
biburl = {https://www.bibsonomy.org/bibtex/22501f116d3e45f25647ff128d12c3831/kirk86},
description = {[1602.02450] Loss factorization, weakly supervised learning and label noise robustness},
interhash = {7fe4aae5d3feb2553bd1728fd16252d8},
intrahash = {2501f116d3e45f25647ff128d12c3831},
keywords = {objectives robustness},
note = {cite arxiv:1602.02450},
timestamp = {2019-12-11T14:35:35.000+0100},
title = {Loss factorization, weakly supervised learning and label noise
robustness},
url = {http://arxiv.org/abs/1602.02450},
year = 2016
}