The results show that fine-tuning with human feedback is a promising direction for aligning language models with human intent and showing improvements in truthfulness and reductions in toxic output generation while having minimal performance regressions on public NLP datasets.
%0 Journal Article
%1 Ouyang2022TrainingLM
%A Ouyang, Long
%A Wu, Jeff
%A Jiang, Xu
%A Almeida, Diogo
%A Wainwright, Carroll L.
%A Mishkin, Pamela
%A Zhang, Chong
%A Agarwal, Sandhini
%A Slama, Katarina
%A Ray, Alex
%A Schulman, J.
%A Hilton, Jacob
%A Kelton, Fraser
%A Miller, Luke E.
%A Simens, Maddie
%A Askell, Amanda
%A Welinder, P.
%A Christiano, P.
%A Leike, J.
%A Lowe, Ryan J.
%D 2022
%J ArXiv
%K deep-learning machine-learning NLP AI posted_with_chatgpt
%T Training language models to follow instructions with human feedback
%U https://www.semanticscholar.org/paper/d766bffc357127e0dc86dd69561d5aeb520d6f4c
@article{Ouyang2022TrainingLM,
added-at = {2023-07-26T00:06:34.000+0200},
author = {Ouyang, Long and Wu, Jeff and Jiang, Xu and Almeida, Diogo and Wainwright, Carroll L. and Mishkin, Pamela and Zhang, Chong and Agarwal, Sandhini and Slama, Katarina and Ray, Alex and Schulman, J. and Hilton, Jacob and Kelton, Fraser and Miller, Luke E. and Simens, Maddie and Askell, Amanda and Welinder, P. and Christiano, P. and Leike, J. and Lowe, Ryan J.},
biburl = {https://www.bibsonomy.org/bibtex/24c8089e40722eb6a56cd05b6f9d1fe2c/tomvoelker},
description = {The results show that fine-tuning with human feedback is a promising direction for aligning language models with human intent and showing improvements in truthfulness and reductions in toxic output generation while having minimal performance regressions on public NLP datasets.},
interhash = {f7d728e71594758edda91ecd8c84ee49},
intrahash = {4c8089e40722eb6a56cd05b6f9d1fe2c},
journal = {ArXiv},
keywords = {deep-learning machine-learning NLP AI posted_with_chatgpt},
timestamp = {2023-07-26T00:06:34.000+0200},
title = {Training language models to follow instructions with human feedback},
url = {https://www.semanticscholar.org/paper/d766bffc357127e0dc86dd69561d5aeb520d6f4c},
year = 2022
}