This survey reviews works in which language models (LMs) are augmented with
reasoning skills and the ability to use tools. The former is defined as
decomposing a potentially complex task into simpler subtasks while the latter
consists in calling external modules such as a code interpreter. LMs can
leverage these augmentations separately or in combination via heuristics, or
learn to do so from demonstrations. While adhering to a standard missing tokens
prediction objective, such augmented LMs can use various, possibly
non-parametric external modules to expand their context processing ability,
thus departing from the pure language modeling paradigm. We therefore refer to
them as Augmented Language Models (ALMs). The missing token objective allows
ALMs to learn to reason, use tools, and even act, while still performing
standard natural language tasks and even outperforming most regular LMs on
several benchmarks. In this work, after reviewing current advance in ALMs, we
conclude that this new research direction has the potential to address common
limitations of traditional LMs such as interpretability, consistency, and
scalability issues.
%0 Generic
%1 mialon2023augmented
%A Mialon, Grégoire
%A Dessì, Roberto
%A Lomeli, Maria
%A Nalmpantis, Christoforos
%A Pasunuru, Ram
%A Raileanu, Roberta
%A Rozière, Baptiste
%A Schick, Timo
%A Dwivedi-Yu, Jane
%A Celikyilmaz, Asli
%A Grave, Edouard
%A LeCun, Yann
%A Scialom, Thomas
%D 2023
%K machinelearn
%T Augmented Language Models: a Survey
%U http://arxiv.org/abs/2302.07842
%X This survey reviews works in which language models (LMs) are augmented with
reasoning skills and the ability to use tools. The former is defined as
decomposing a potentially complex task into simpler subtasks while the latter
consists in calling external modules such as a code interpreter. LMs can
leverage these augmentations separately or in combination via heuristics, or
learn to do so from demonstrations. While adhering to a standard missing tokens
prediction objective, such augmented LMs can use various, possibly
non-parametric external modules to expand their context processing ability,
thus departing from the pure language modeling paradigm. We therefore refer to
them as Augmented Language Models (ALMs). The missing token objective allows
ALMs to learn to reason, use tools, and even act, while still performing
standard natural language tasks and even outperforming most regular LMs on
several benchmarks. In this work, after reviewing current advance in ALMs, we
conclude that this new research direction has the potential to address common
limitations of traditional LMs such as interpretability, consistency, and
scalability issues.
@misc{mialon2023augmented,
abstract = {This survey reviews works in which language models (LMs) are augmented with
reasoning skills and the ability to use tools. The former is defined as
decomposing a potentially complex task into simpler subtasks while the latter
consists in calling external modules such as a code interpreter. LMs can
leverage these augmentations separately or in combination via heuristics, or
learn to do so from demonstrations. While adhering to a standard missing tokens
prediction objective, such augmented LMs can use various, possibly
non-parametric external modules to expand their context processing ability,
thus departing from the pure language modeling paradigm. We therefore refer to
them as Augmented Language Models (ALMs). The missing token objective allows
ALMs to learn to reason, use tools, and even act, while still performing
standard natural language tasks and even outperforming most regular LMs on
several benchmarks. In this work, after reviewing current advance in ALMs, we
conclude that this new research direction has the potential to address common
limitations of traditional LMs such as interpretability, consistency, and
scalability issues.},
added-at = {2023-02-19T21:54:07.000+0100},
author = {Mialon, Grégoire and Dessì, Roberto and Lomeli, Maria and Nalmpantis, Christoforos and Pasunuru, Ram and Raileanu, Roberta and Rozière, Baptiste and Schick, Timo and Dwivedi-Yu, Jane and Celikyilmaz, Asli and Grave, Edouard and LeCun, Yann and Scialom, Thomas},
biburl = {https://www.bibsonomy.org/bibtex/2ec6b3fa7640bc90919cfb7c0b7ce84f5/cmcneile},
description = {Augmented Language Models: a Survey},
interhash = {da3dbeab5df8414fd24bd37797a880c8},
intrahash = {ec6b3fa7640bc90919cfb7c0b7ce84f5},
keywords = {machinelearn},
note = {cite arxiv:2302.07842},
timestamp = {2023-02-19T21:54:07.000+0100},
title = {Augmented Language Models: a Survey},
url = {http://arxiv.org/abs/2302.07842},
year = 2023
}