In distributed statistical learning, $N$ samples are split across $m$
machines and a learner wishes to use minimal communication to learn as well as
if the examples were on a single machine. This model has received substantial
interest in machine learning due to its scalability and potential for parallel
speedup. However, in high-dimensional settings, where the number examples is
smaller than the number of features ("dimension"), the speedup afforded by
distributed learning may be overshadowed by the cost of communicating a single
example. This paper investigates the following question: When is it possible to
learn a $d$-dimensional model in the distributed setting with total
communication sublinear in $d$?
Starting with a negative result, we show that for learning $\ell_1$-bounded
or sparse linear models, no algorithm can obtain optimal error until
communication is linear in dimension. Our main result is that that by slightly
relaxing the standard boundedness assumptions for linear models, we can obtain
distributed algorithms that enjoy optimal error with communication logarithmic
in dimension. This result is based on a family of algorithms that combine
mirror descent with randomized sparsification/quantization of iterates, and
extends to the general stochastic convex optimization model.
Description
[1902.11259] Distributed Learning with Sublinear Communication
%0 Journal Article
%1 acharya2019distributed
%A Acharya, Jayadev
%A De Sa, Christopher
%A Foster, Dylan J.
%A Sridharan, Karthik
%D 2019
%K distributed learning
%T Distributed Learning with Sublinear Communication
%U http://arxiv.org/abs/1902.11259
%X In distributed statistical learning, $N$ samples are split across $m$
machines and a learner wishes to use minimal communication to learn as well as
if the examples were on a single machine. This model has received substantial
interest in machine learning due to its scalability and potential for parallel
speedup. However, in high-dimensional settings, where the number examples is
smaller than the number of features ("dimension"), the speedup afforded by
distributed learning may be overshadowed by the cost of communicating a single
example. This paper investigates the following question: When is it possible to
learn a $d$-dimensional model in the distributed setting with total
communication sublinear in $d$?
Starting with a negative result, we show that for learning $\ell_1$-bounded
or sparse linear models, no algorithm can obtain optimal error until
communication is linear in dimension. Our main result is that that by slightly
relaxing the standard boundedness assumptions for linear models, we can obtain
distributed algorithms that enjoy optimal error with communication logarithmic
in dimension. This result is based on a family of algorithms that combine
mirror descent with randomized sparsification/quantization of iterates, and
extends to the general stochastic convex optimization model.
@article{acharya2019distributed,
abstract = {In distributed statistical learning, $N$ samples are split across $m$
machines and a learner wishes to use minimal communication to learn as well as
if the examples were on a single machine. This model has received substantial
interest in machine learning due to its scalability and potential for parallel
speedup. However, in high-dimensional settings, where the number examples is
smaller than the number of features ("dimension"), the speedup afforded by
distributed learning may be overshadowed by the cost of communicating a single
example. This paper investigates the following question: When is it possible to
learn a $d$-dimensional model in the distributed setting with total
communication sublinear in $d$?
Starting with a negative result, we show that for learning $\ell_1$-bounded
or sparse linear models, no algorithm can obtain optimal error until
communication is linear in dimension. Our main result is that that by slightly
relaxing the standard boundedness assumptions for linear models, we can obtain
distributed algorithms that enjoy optimal error with communication logarithmic
in dimension. This result is based on a family of algorithms that combine
mirror descent with randomized sparsification/quantization of iterates, and
extends to the general stochastic convex optimization model.},
added-at = {2019-03-04T16:12:08.000+0100},
author = {Acharya, Jayadev and De Sa, Christopher and Foster, Dylan J. and Sridharan, Karthik},
biburl = {https://www.bibsonomy.org/bibtex/24124915a754f08318b304b8cd5cca8d0/kirk86},
description = {[1902.11259] Distributed Learning with Sublinear Communication},
interhash = {ca690ac2ec04755a1c519ed7d9fa576f},
intrahash = {4124915a754f08318b304b8cd5cca8d0},
keywords = {distributed learning},
note = {cite arxiv:1902.11259},
timestamp = {2019-03-04T16:12:08.000+0100},
title = {Distributed Learning with Sublinear Communication},
url = {http://arxiv.org/abs/1902.11259},
year = 2019
}