Optimal transport (OT) distances are increasingly used as loss functions for
statistical inference, notably in the learning of generative models or
supervised learning. Yet, the behavior of minimum Wasserstein estimators is
poorly understood, notably in high-dimensional regimes or under model
misspecification. In this work we adopt the viewpoint of projection robust (PR)
OT, which seeks to maximize the OT cost between two measures by choosing a
$k$-dimensional subspace onto which they can be projected. Our first
contribution is to establish several fundamental statistical properties of PR
Wasserstein distances, complementing and improving previous literature that has
been restricted to one-dimensional and well-specified cases. Next, we propose
the integral PR Wasserstein (IPRW) distance as an alternative to the PRW
distance, by averaging rather than optimizing on subspaces. Our complexity
bounds can help explain why both PRW and IPRW distances outperform Wasserstein
distances empirically in high-dimensional inference tasks. Finally, we consider
parametric inference using the PRW distance. We provide an asymptotic guarantee
of two types of minimum PRW estimators and formulate a central limit theorem
for max-sliced Wasserstein estimator under model misspecification. To enable
our analysis on PRW with projection dimension larger than one, we devise a
novel combination of variational analysis and statistical theory.
Description
[2006.12301] On Projection Robust Optimal Transport: Sample Complexity and Model Misspecification
%0 Journal Article
%1 lin2020projection
%A Lin, Tianyi
%A Zheng, Zeyu
%A Chen, Elynn Y.
%A Cuturi, Marco
%A Jordan, Michael I.
%D 2020
%K complexity generative-models misspecification optimal-transport readings robustness sampling
%T On Projection Robust Optimal Transport: Sample Complexity and Model
Misspecification
%U http://arxiv.org/abs/2006.12301
%X Optimal transport (OT) distances are increasingly used as loss functions for
statistical inference, notably in the learning of generative models or
supervised learning. Yet, the behavior of minimum Wasserstein estimators is
poorly understood, notably in high-dimensional regimes or under model
misspecification. In this work we adopt the viewpoint of projection robust (PR)
OT, which seeks to maximize the OT cost between two measures by choosing a
$k$-dimensional subspace onto which they can be projected. Our first
contribution is to establish several fundamental statistical properties of PR
Wasserstein distances, complementing and improving previous literature that has
been restricted to one-dimensional and well-specified cases. Next, we propose
the integral PR Wasserstein (IPRW) distance as an alternative to the PRW
distance, by averaging rather than optimizing on subspaces. Our complexity
bounds can help explain why both PRW and IPRW distances outperform Wasserstein
distances empirically in high-dimensional inference tasks. Finally, we consider
parametric inference using the PRW distance. We provide an asymptotic guarantee
of two types of minimum PRW estimators and formulate a central limit theorem
for max-sliced Wasserstein estimator under model misspecification. To enable
our analysis on PRW with projection dimension larger than one, we devise a
novel combination of variational analysis and statistical theory.
@article{lin2020projection,
abstract = {Optimal transport (OT) distances are increasingly used as loss functions for
statistical inference, notably in the learning of generative models or
supervised learning. Yet, the behavior of minimum Wasserstein estimators is
poorly understood, notably in high-dimensional regimes or under model
misspecification. In this work we adopt the viewpoint of projection robust (PR)
OT, which seeks to maximize the OT cost between two measures by choosing a
$k$-dimensional subspace onto which they can be projected. Our first
contribution is to establish several fundamental statistical properties of PR
Wasserstein distances, complementing and improving previous literature that has
been restricted to one-dimensional and well-specified cases. Next, we propose
the integral PR Wasserstein (IPRW) distance as an alternative to the PRW
distance, by averaging rather than optimizing on subspaces. Our complexity
bounds can help explain why both PRW and IPRW distances outperform Wasserstein
distances empirically in high-dimensional inference tasks. Finally, we consider
parametric inference using the PRW distance. We provide an asymptotic guarantee
of two types of minimum PRW estimators and formulate a central limit theorem
for max-sliced Wasserstein estimator under model misspecification. To enable
our analysis on PRW with projection dimension larger than one, we devise a
novel combination of variational analysis and statistical theory.},
added-at = {2020-07-16T12:39:13.000+0200},
author = {Lin, Tianyi and Zheng, Zeyu and Chen, Elynn Y. and Cuturi, Marco and Jordan, Michael I.},
biburl = {https://www.bibsonomy.org/bibtex/243640662883173abde36d432e6f1ecee/kirk86},
description = {[2006.12301] On Projection Robust Optimal Transport: Sample Complexity and Model Misspecification},
interhash = {17c55b20ec9b78624e6c6f46f3b785d0},
intrahash = {43640662883173abde36d432e6f1ecee},
keywords = {complexity generative-models misspecification optimal-transport readings robustness sampling},
note = {cite arxiv:2006.12301Comment: Correct some typos; 46 Pages, 41 figures},
timestamp = {2020-07-16T12:43:31.000+0200},
title = {On Projection Robust Optimal Transport: Sample Complexity and Model
Misspecification},
url = {http://arxiv.org/abs/2006.12301},
year = 2020
}