Cumulative prospect theory (CPT) is a popular approach for modeling human preferences. It is based on probabilistic distortions and generalizes expected utility theory. We bring CPT to a stochastic optimization framework and propose algorithms for both estimation and optimization of CPT-value objectives. We propose an empirical distribution function-based scheme to estimate the CPT-value and then use this scheme in the inner loop of a CPT-value optimization procedure. We propose both gradient-based as well as gradient-free CPT-value optimization algorithms that are based on two well-known simulation optimization ideas: simultaneous perturbation stochastic approximation (SPSA) and model-based parameter search (MPS), respectively. We provide theoretical convergence guarantees for all the proposed algorithms
and also illustrate the potential of CPT-based criteria in a traffic signal control application.
%0 Journal Article
%1 JiPrFuMaSz18
%A Jie, C.
%A Prashanth L.A.,
%A Fu, M.C.
%A Marcus, S.
%A Szepesvári, Cs.
%D 2018
%J IEEE Transactions on Automatic Control
%K Decision Markov Processes, SPSA, criteria, cumulative learning, optimization, prospect reinforcement risk-sensitive stochastic theory
%N 9
%P 2867--2882
%T Stochastic Optimization in a Cumulative Prospect Theory Framework
%U http://ieeexplore.ieee.org/document/8014469/
%V 63
%X Cumulative prospect theory (CPT) is a popular approach for modeling human preferences. It is based on probabilistic distortions and generalizes expected utility theory. We bring CPT to a stochastic optimization framework and propose algorithms for both estimation and optimization of CPT-value objectives. We propose an empirical distribution function-based scheme to estimate the CPT-value and then use this scheme in the inner loop of a CPT-value optimization procedure. We propose both gradient-based as well as gradient-free CPT-value optimization algorithms that are based on two well-known simulation optimization ideas: simultaneous perturbation stochastic approximation (SPSA) and model-based parameter search (MPS), respectively. We provide theoretical convergence guarantees for all the proposed algorithms
and also illustrate the potential of CPT-based criteria in a traffic signal control application.
@article{JiPrFuMaSz18,
abstract = {Cumulative prospect theory (CPT) is a popular approach for modeling human preferences. It is based on probabilistic distortions and generalizes expected utility theory. We bring CPT to a stochastic optimization framework and propose algorithms for both estimation and optimization of CPT-value objectives. We propose an empirical distribution function-based scheme to estimate the CPT-value and then use this scheme in the inner loop of a CPT-value optimization procedure. We propose both gradient-based as well as gradient-free CPT-value optimization algorithms that are based on two well-known simulation optimization ideas: simultaneous perturbation stochastic approximation (SPSA) and model-based parameter search (MPS), respectively. We provide theoretical convergence guarantees for all the proposed algorithms
and also illustrate the potential of CPT-based criteria in a traffic signal control application.
},
added-at = {2020-03-17T03:03:01.000+0100},
author = {Jie, C. and {Prashanth L.A.} and Fu, M.C. and Marcus, S. and {Sz}epesv{\'a}ri, {Cs}.},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxBNLi4vLi4vLi4vLi4vTGlicmFyeS5wYXBlcnMzL0ZpbGVzL0Q5L0Q5QkEzMkVGLTMyQ0QtNERBRS04MkQzLTgyNjY2QTgyOUY0Ri5wZGZPEQHaAAAAAAHaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAAAAAAAAQkQAAf////8fRDlCQTMyRUYtMzJDRC00REFFI0ZGRkZGRkZGLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAAEAAQAAAogY3UAAAAAAAAAAAAAAAAAAkQ5AAIAWS86VXNlcnM6Y3NhYmE6RG9jdW1lbnRzOkxpYnJhcnkucGFwZXJzMzpGaWxlczpEOTpEOUJBMzJFRi0zMkNELTREQUUtODJEMy04MjY2NkE4MjlGNEYucGRmAAAOAFIAKABEADkAQgBBADMAMgBFAEYALQAzADIAQwBEAC0ANABEAEEARQAtADgAMgBEADMALQA4ADIANgA2ADYAQQA4ADIAOQBGADQARgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAV1VzZXJzL2NzYWJhL0RvY3VtZW50cy9MaWJyYXJ5LnBhcGVyczMvRmlsZXMvRDkvRDlCQTMyRUYtMzJDRC00REFFLTgyRDMtODI2NjZBODI5RjRGLnBkZgAAEwABLwAAFQACAAz//wAAAAgADQAaACQAdAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAJS},
bdsk-url-1 = {http://ieeexplore.ieee.org/document/8014469/},
bdsk-url-2 = {https://dx.doi.org/10.1109/TAC.2017.2743163},
biburl = {https://www.bibsonomy.org/bibtex/2a4a65a3d2e7627184cd06bdbdf801676/csaba},
date-added = {2018-03-11 17:24:12 +0000},
date-modified = {2019-07-20 10:15:20 -0600},
interhash = {c6028afaff1ae3b44a92d2ac48a64558},
intrahash = {a4a65a3d2e7627184cd06bdbdf801676},
journal = {IEEE Transactions on Automatic Control},
keywords = {Decision Markov Processes, SPSA, criteria, cumulative learning, optimization, prospect reinforcement risk-sensitive stochastic theory},
number = 9,
pages = {2867--2882},
pdf = {papers/2018-cpt-rl-tac.pdf},
rating = {0},
read = {Yes},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Stochastic Optimization in a Cumulative Prospect Theory Framework},
url = {http://ieeexplore.ieee.org/document/8014469/},
volume = 63,
year = 2018
}