t-distributed Stochastic Neighborhood Embedding (t-SNE), a clustering and
visualization method proposed by van der Maaten & Hinton in 2008, has rapidly
become a standard tool in a number of natural sciences. Despite its
overwhelming success, there is a distinct lack of mathematical foundations and
the inner workings of the algorithm are not well understood. The purpose of
this paper is to prove that t-SNE is able to recover well-separated clusters;
more precisely, we prove that t-SNE in the `early exaggeration' phase, an
optimization technique proposed by van der Maaten & Hinton (2008) and van der
Maaten (2014), can be rigorously analyzed. As a byproduct, the proof suggests
novel ways for setting the exaggeration parameter $\alpha$ and step size $h$.
Numerical examples illustrate the effectiveness of these rules: in particular,
the quality of embedding of topological structures (e.g. the swiss roll)
improves. We also discuss a connection to spectral clustering methods.
%0 Generic
%1 linderman2017clustering
%A Linderman, George C.
%A Steinerberger, Stefan
%D 2017
%K bio cluster clustering embedding sne t-sne tsne visualization
%T Clustering with t-SNE, provably
%U http://arxiv.org/abs/1706.02582
%X t-distributed Stochastic Neighborhood Embedding (t-SNE), a clustering and
visualization method proposed by van der Maaten & Hinton in 2008, has rapidly
become a standard tool in a number of natural sciences. Despite its
overwhelming success, there is a distinct lack of mathematical foundations and
the inner workings of the algorithm are not well understood. The purpose of
this paper is to prove that t-SNE is able to recover well-separated clusters;
more precisely, we prove that t-SNE in the `early exaggeration' phase, an
optimization technique proposed by van der Maaten & Hinton (2008) and van der
Maaten (2014), can be rigorously analyzed. As a byproduct, the proof suggests
novel ways for setting the exaggeration parameter $\alpha$ and step size $h$.
Numerical examples illustrate the effectiveness of these rules: in particular,
the quality of embedding of topological structures (e.g. the swiss roll)
improves. We also discuss a connection to spectral clustering methods.
@misc{linderman2017clustering,
abstract = {t-distributed Stochastic Neighborhood Embedding (t-SNE), a clustering and
visualization method proposed by van der Maaten & Hinton in 2008, has rapidly
become a standard tool in a number of natural sciences. Despite its
overwhelming success, there is a distinct lack of mathematical foundations and
the inner workings of the algorithm are not well understood. The purpose of
this paper is to prove that t-SNE is able to recover well-separated clusters;
more precisely, we prove that t-SNE in the `early exaggeration' phase, an
optimization technique proposed by van der Maaten & Hinton (2008) and van der
Maaten (2014), can be rigorously analyzed. As a byproduct, the proof suggests
novel ways for setting the exaggeration parameter $\alpha$ and step size $h$.
Numerical examples illustrate the effectiveness of these rules: in particular,
the quality of embedding of topological structures (e.g. the swiss roll)
improves. We also discuss a connection to spectral clustering methods.},
added-at = {2019-02-08T00:43:08.000+0100},
author = {Linderman, George C. and Steinerberger, Stefan},
biburl = {https://www.bibsonomy.org/bibtex/22e5548502142ab6b868197b210bcda81/becker},
description = {[1706.02582] Clustering with t-SNE, provably},
interhash = {ea31eaa1cdd0f42d9e00198600e93349},
intrahash = {2e5548502142ab6b868197b210bcda81},
keywords = {bio cluster clustering embedding sne t-sne tsne visualization},
note = {cite arxiv:1706.02582},
timestamp = {2019-02-08T00:43:08.000+0100},
title = {Clustering with t-SNE, provably},
url = {http://arxiv.org/abs/1706.02582},
year = 2017
}