A landmark result of non-smooth convex optimization is that gradient descent
is an optimal algorithm whenever the number of computed gradients is smaller
than the dimension $d$. In this paper we study the extension of this result to
the parallel optimization setting. Namely we consider optimization algorithms
interacting with a highly parallel gradient oracle, that is one that can answer
$poly(d)$ gradient queries in parallel. We show that in this case
gradient descent is optimal only up to $O(d)$ rounds of
interactions with the oracle. The lower bound improves upon a decades old
construction by Nemirovski which proves optimality only up to $d^1/3$ rounds
(as recently observed by Balkanski and Singer), and the suboptimality of
gradient descent after $d$ rounds was already observed by Duchi,
Bartlett and Wainwright. In the latter regime we propose a new method with
improved complexity, which we conjecture to be optimal. The analysis of this
new method is based upon a generalized version of the recent results on optimal
acceleration for highly smooth convex optimization.
Description
[1906.10655] Complexity of Highly Parallel Non-Smooth Convex Optimization
%0 Journal Article
%1 bubeck2019complexity
%A Bubeck, Sébastien
%A Jiang, Qijia
%A Lee, Yin Tat
%A Li, Yuanzhi
%A Sidford, Aaron
%D 2019
%K convex optimization
%T Complexity of Highly Parallel Non-Smooth Convex Optimization
%U http://arxiv.org/abs/1906.10655
%X A landmark result of non-smooth convex optimization is that gradient descent
is an optimal algorithm whenever the number of computed gradients is smaller
than the dimension $d$. In this paper we study the extension of this result to
the parallel optimization setting. Namely we consider optimization algorithms
interacting with a highly parallel gradient oracle, that is one that can answer
$poly(d)$ gradient queries in parallel. We show that in this case
gradient descent is optimal only up to $O(d)$ rounds of
interactions with the oracle. The lower bound improves upon a decades old
construction by Nemirovski which proves optimality only up to $d^1/3$ rounds
(as recently observed by Balkanski and Singer), and the suboptimality of
gradient descent after $d$ rounds was already observed by Duchi,
Bartlett and Wainwright. In the latter regime we propose a new method with
improved complexity, which we conjecture to be optimal. The analysis of this
new method is based upon a generalized version of the recent results on optimal
acceleration for highly smooth convex optimization.
@article{bubeck2019complexity,
abstract = {A landmark result of non-smooth convex optimization is that gradient descent
is an optimal algorithm whenever the number of computed gradients is smaller
than the dimension $d$. In this paper we study the extension of this result to
the parallel optimization setting. Namely we consider optimization algorithms
interacting with a highly parallel gradient oracle, that is one that can answer
$\mathrm{poly}(d)$ gradient queries in parallel. We show that in this case
gradient descent is optimal only up to $\tilde{O}(\sqrt{d})$ rounds of
interactions with the oracle. The lower bound improves upon a decades old
construction by Nemirovski which proves optimality only up to $d^{1/3}$ rounds
(as recently observed by Balkanski and Singer), and the suboptimality of
gradient descent after $\sqrt{d}$ rounds was already observed by Duchi,
Bartlett and Wainwright. In the latter regime we propose a new method with
improved complexity, which we conjecture to be optimal. The analysis of this
new method is based upon a generalized version of the recent results on optimal
acceleration for highly smooth convex optimization.},
added-at = {2019-06-27T16:10:00.000+0200},
author = {Bubeck, Sébastien and Jiang, Qijia and Lee, Yin Tat and Li, Yuanzhi and Sidford, Aaron},
biburl = {https://www.bibsonomy.org/bibtex/295ab0eeacbc1c519b4f00d5557f5f8e5/kirk86},
description = {[1906.10655] Complexity of Highly Parallel Non-Smooth Convex Optimization},
interhash = {53ca63d8abf2a0f8cd0b21b72b48194a},
intrahash = {95ab0eeacbc1c519b4f00d5557f5f8e5},
keywords = {convex optimization},
note = {cite arxiv:1906.10655},
timestamp = {2019-06-27T16:10:00.000+0200},
title = {Complexity of Highly Parallel Non-Smooth Convex Optimization},
url = {http://arxiv.org/abs/1906.10655},
year = 2019
}