The cascade model is a well-established model of user interaction with content. In this work, we propose cascading bandits, a learning variant of the model where the objective is to learn K most attractive items out of L ground items. We cast the problem as a stochastic combinatorial bandit with a non-linear reward function and partially observed weights of items. Both of these are challenging in the context of combinatorial bandits. We propose two computationally-efficient algorithms for our problem, CascadeUCB1 and CascadeKL-UCB, and prove gap-dependent upper bounds on their regret. We also derive a lower bound for cascading bandits and show that it matches the upper bound of CascadeKL-UCB up to a logarithmic factor. Finally, we evaluate our algorithms on synthetic problems. Our experiments demonstrate that the algorithms perform well and robustly even when our modeling assumptions are violated.
%0 Conference Paper
%1 KWASz15:Cascading
%A Kveton, B.
%A Wen, Z.
%A Ashkan, A.
%A Szepesvári, Cs.
%B ICML
%D 2015
%K bandits bandits, cascading information, learning, nonlinear online partial stochastic theory,
%P 767--776
%T Cascading Bandits: Learning to Rank in the Cascade Model
%X The cascade model is a well-established model of user interaction with content. In this work, we propose cascading bandits, a learning variant of the model where the objective is to learn K most attractive items out of L ground items. We cast the problem as a stochastic combinatorial bandit with a non-linear reward function and partially observed weights of items. Both of these are challenging in the context of combinatorial bandits. We propose two computationally-efficient algorithms for our problem, CascadeUCB1 and CascadeKL-UCB, and prove gap-dependent upper bounds on their regret. We also derive a lower bound for cascading bandits and show that it matches the upper bound of CascadeKL-UCB up to a logarithmic factor. Finally, we evaluate our algorithms on synthetic problems. Our experiments demonstrate that the algorithms perform well and robustly even when our modeling assumptions are violated.
@inproceedings{KWASz15:Cascading,
abstract = {The cascade model is a well-established model of user interaction with content. In this work, we propose cascading bandits, a learning variant of the model where the objective is to learn K most attractive items out of L ground items. We cast the problem as a stochastic combinatorial bandit with a non-linear reward function and partially observed weights of items. Both of these are challenging in the context of combinatorial bandits. We propose two computationally-efficient algorithms for our problem, CascadeUCB1 and CascadeKL-UCB, and prove gap-dependent upper bounds on their regret. We also derive a lower bound for cascading bandits and show that it matches the upper bound of CascadeKL-UCB up to a logarithmic factor. Finally, we evaluate our algorithms on synthetic problems. Our experiments demonstrate that the algorithms perform well and robustly even when our modeling assumptions are violated.
},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Kveton, B. and Wen, Z. and Ashkan, A. and Szepesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/210e484c5f9caf190c17b08b904ded8ad/csaba},
booktitle = {ICML},
date-added = {2015-04-25 18:20:55 +0000},
date-modified = {2015-12-02 01:29:29 +0000},
interhash = {784fc83fc72ee31efdf48472533e400e},
intrahash = {10e484c5f9caf190c17b08b904ded8ad},
keywords = {bandits bandits, cascading information, learning, nonlinear online partial stochastic theory,},
pages = {767--776},
pdf = {papers/ICML15-CascadingBandits.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Cascading Bandits: Learning to Rank in the Cascade Model},
year = 2015
}