We consider learning to maximize reward in combinatorial cascading bandits, a new learning setting that unifies cascading and combinatorial bandits. The unification of these frameworks presents unique challenges in the analysis but allows for modeling a rich set of partial monitoring problems, such as learning to route in a communication network to minimize the probability of losing routed packets and recommending diverse items. We propose CombCascade, a computationally-efficient UCB-like algorithm for solving our problem; and derive gap-dependent and gap-free upper bounds on its regret. Our analysis builds on recent results in stochastic combinatorial semi-bandits but also addresses two novel challenges of our learning setting, a non-linear objective and partial observability. We evaluate CombCascade on two real-world problems and demonstrate that it performs well even when our modeling assumptions are violated. We also demonstrate that our setting requires new learning algorithms.
%0 Conference Paper
%1 KveWeAshSze15
%A Kveton, B.
%A Wen, Z.
%A Ashkan, A.
%A Szepesvári, Cs.
%B NIPS
%D 2015
%K bandits bandits, cascading combinatorial information, learning, nonlinear online partial stochastic theory,
%P 1450--1458
%T Combinatorial Cascading Bandits
%X We consider learning to maximize reward in combinatorial cascading bandits, a new learning setting that unifies cascading and combinatorial bandits. The unification of these frameworks presents unique challenges in the analysis but allows for modeling a rich set of partial monitoring problems, such as learning to route in a communication network to minimize the probability of losing routed packets and recommending diverse items. We propose CombCascade, a computationally-efficient UCB-like algorithm for solving our problem; and derive gap-dependent and gap-free upper bounds on its regret. Our analysis builds on recent results in stochastic combinatorial semi-bandits but also addresses two novel challenges of our learning setting, a non-linear objective and partial observability. We evaluate CombCascade on two real-world problems and demonstrate that it performs well even when our modeling assumptions are violated. We also demonstrate that our setting requires new learning algorithms.
@inproceedings{KveWeAshSze15,
abstract = {We consider learning to maximize reward in combinatorial cascading bandits, a new learning setting that unifies cascading and combinatorial bandits. The unification of these frameworks presents unique challenges in the analysis but allows for modeling a rich set of partial monitoring problems, such as learning to route in a communication network to minimize the probability of losing routed packets and recommending diverse items. We propose CombCascade, a computationally-efficient UCB-like algorithm for solving our problem; and derive gap-dependent and gap-free upper bounds on its regret. Our analysis builds on recent results in stochastic combinatorial semi-bandits but also addresses two novel challenges of our learning setting, a non-linear objective and partial observability. We evaluate CombCascade on two real-world problems and demonstrate that it performs well even when our modeling assumptions are violated. We also demonstrate that our setting requires new learning algorithms.
},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Kveton, B. and Wen, Z. and Ashkan, A. and Szepesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/2bb1bcea523d68b26b62470d7ea9d3a36/csaba},
booktitle = {NIPS},
date-added = {2015-12-02 01:22:43 +0000},
date-modified = {2016-08-01 03:14:33 +0000},
interhash = {59edb706b212acf485de0d539c82a638},
intrahash = {bb1bcea523d68b26b62470d7ea9d3a36},
keywords = {bandits bandits, cascading combinatorial information, learning, nonlinear online partial stochastic theory,},
month = {September},
pages = {1450--1458},
pdf = {papers/NIPS15-CombCascadeBandit.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Combinatorial Cascading Bandits},
year = 2015
}