@article{journals/corr/abs-2402-14228,
added-at = {2024-07-17T00:00:00.000+0200},
author = {Zhang, Han and Gui, Lin and Lei, Yu and Zhai, Yuanzhao and Zhang, Yehong and He, Yulan and Wang, Hui and Yu, Yue and Wong, Kam-Fai and Liang, Bin and Xu, Ruifeng},
biburl = {https://www.bibsonomy.org/bibtex/282994937c7b07d326662ba84713c108e/dblp},
ee = {https://doi.org/10.48550/arXiv.2402.14228},
interhash = {c5525a30ac0fd90aefa21059d02f0eda},
intrahash = {82994937c7b07d326662ba84713c108e},
journal = {CoRR},
keywords = {dblp},
timestamp = {2024-07-22T07:09:43.000+0200},
title = {COPR: Continual Human Preference Learning via Optimal Policy Regularization.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr2402.html#abs-2402-14228},
volume = {abs/2402.14228},
year = 2024
}