We propose the Gaussian Error Linear Unit (GELU), a high-performing neural
network activation function. The GELU nonlinearity is the expected
transformation of a stochastic regularizer which randomly applies the identity
or zero map to a neuron's input. The GELU nonlinearity weights inputs by their
magnitude, rather than gates inputs by their sign as in ReLUs. We perform an
empirical evaluation of the GELU nonlinearity against the ReLU and ELU
activations and find performance improvements across all considered computer
vision, natural language processing, and speech tasks.
%0 Generic
%1 hendrycks2016gaussian
%A Hendrycks, Dan
%A Gimpel, Kevin
%D 2016
%K activation error function gaussian gelu unit
%T Gaussian Error Linear Units (GELUs)
%U http://arxiv.org/abs/1606.08415
%X We propose the Gaussian Error Linear Unit (GELU), a high-performing neural
network activation function. The GELU nonlinearity is the expected
transformation of a stochastic regularizer which randomly applies the identity
or zero map to a neuron's input. The GELU nonlinearity weights inputs by their
magnitude, rather than gates inputs by their sign as in ReLUs. We perform an
empirical evaluation of the GELU nonlinearity against the ReLU and ELU
activations and find performance improvements across all considered computer
vision, natural language processing, and speech tasks.
@misc{hendrycks2016gaussian,
abstract = {We propose the Gaussian Error Linear Unit (GELU), a high-performing neural
network activation function. The GELU nonlinearity is the expected
transformation of a stochastic regularizer which randomly applies the identity
or zero map to a neuron's input. The GELU nonlinearity weights inputs by their
magnitude, rather than gates inputs by their sign as in ReLUs. We perform an
empirical evaluation of the GELU nonlinearity against the ReLU and ELU
activations and find performance improvements across all considered computer
vision, natural language processing, and speech tasks.},
added-at = {2020-05-28T14:49:00.000+0200},
author = {Hendrycks, Dan and Gimpel, Kevin},
biburl = {https://www.bibsonomy.org/bibtex/2cc584809b1202c68ca494fc026905aa1/nosebrain},
interhash = {078d9aa80b94ec084327a83005890ea3},
intrahash = {cc584809b1202c68ca494fc026905aa1},
keywords = {activation error function gaussian gelu unit},
timestamp = {2020-08-14T17:37:41.000+0200},
title = {Gaussian Error Linear Units (GELUs)},
url = {http://arxiv.org/abs/1606.08415},
year = 2016
}