We dene the relevant information in a signal x 2 X as
being the information that this signal provides about
another signal y 2 Y . Examples include the information
that face images provide about the names of the people
portrayed, or the information that speech sounds
provide about the words spoken. Understanding the
signal x requires more than just predicting y, it also
requires specifying which features of X play a role in
the prediction. We formalize the problem as that of
nding a short code for X that preserves the maximum
information about Y . That is, we squeeze the
information that X provides about Y through a
`bottleneck ' formed by a limited set of codewords
~ X. This constrained optimization problem can be seen
as a generalization of rate distortion theory in which
the distortion measure d(x; ~ x) emerges from the joint
statistics of X and Y . The approach yields an exact
set of self-consistent equations for the coding rules X
! ~ X and ~ X ! Y . Solutions to t...
%0 Conference Paper
%1 tishby-information-bottleneck-method-1999
%A Tishby, Naftali
%A Pereira, Fernando C.
%A Bialek, William
%D 1999
%K clustering information_bottleneck
%P 368--377
%T The Information Bottleneck Method
%U http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.39.9882
%X We dene the relevant information in a signal x 2 X as
being the information that this signal provides about
another signal y 2 Y . Examples include the information
that face images provide about the names of the people
portrayed, or the information that speech sounds
provide about the words spoken. Understanding the
signal x requires more than just predicting y, it also
requires specifying which features of X play a role in
the prediction. We formalize the problem as that of
nding a short code for X that preserves the maximum
information about Y . That is, we squeeze the
information that X provides about Y through a
`bottleneck ' formed by a limited set of codewords
~ X. This constrained optimization problem can be seen
as a generalization of rate distortion theory in which
the distortion measure d(x; ~ x) emerges from the joint
statistics of X and Y . The approach yields an exact
set of self-consistent equations for the coding rules X
! ~ X and ~ X ! Y . Solutions to t...
@inproceedings{tishby-information-bottleneck-method-1999,
abstract = {We dene the relevant information in a signal x 2 X as
being the information that this signal provides about
another signal y 2 Y . Examples include the information
that face images provide about the names of the people
portrayed, or the information that speech sounds
provide about the words spoken. Understanding the
signal x requires more than just predicting y, it also
requires specifying which features of X play a role in
the prediction. We formalize the problem as that of
nding a short code for X that preserves the maximum
information about Y . That is, we squeeze the
information that X provides about Y through a
`bottleneck ' formed by a limited set of codewords
~ X. This constrained optimization problem can be seen
as a generalization of rate distortion theory in which
the distortion measure d(x; ~ x) emerges from the joint
statistics of X and Y . The approach yields an exact
set of self-consistent equations for the coding rules X
! ~ X and ~ X ! Y . Solutions to t...},
added-at = {2011-10-20T13:48:10.000+0200},
author = {Tishby, Naftali and Pereira, Fernando C. and Bialek, William},
biburl = {https://www.bibsonomy.org/bibtex/29f59d7281b6f02200fc0710624ee3b48/mhwombat},
description = {The Information Bottleneck Method},
interhash = {15bd5efbf394791da00b09839b9a5757},
intrahash = {9f59d7281b6f02200fc0710624ee3b48},
keywords = {clustering information_bottleneck},
pages = {368--377},
timestamp = {2016-07-12T19:25:30.000+0200},
title = {The Information Bottleneck Method},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.39.9882},
year = 1999
}