@jonaskaiser4 years ago
(last updated 4 years ago)
In der Ausarbeitung genutzt, um die ADD & NORM Schicht des Transformers zu erläutern. Untersucht die Bedeutung der Layer Normalization im Kontext des Transformers.
References
Bookmarks
deleting review
Please log in to take part in the discussion (add own reviews or comments).
@article{journals/corr/abs-2002-04745,
added-at = {2020-07-14T17:47:18.000+0200},
author = {Xiong, Ruibin and Yang, Yunchang and He, Di and Zheng, Kai and Zheng, Shuxin and Xing, Chen and Zhang, Huishuai and Lan, Yanyan and Wang, Liwei and Liu, Tie-Yan},
biburl = {https://www.bibsonomy.org/bibtex/235129deea8818cf5eac7967b0dbb4609/jonaskaiser},
ee = {https://arxiv.org/abs/2002.04745},
interhash = {4c0ba8cd34fcfaaad43dd0bb6c7c72ac},
intrahash = {35129deea8818cf5eac7967b0dbb4609},
journal = {CoRR},
keywords = {},
timestamp = {2020-07-14T17:47:18.000+0200},
title = {On Layer Normalization in the Transformer Architecture.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr2002.html#abs-2002-04745},
volume = {abs/2002.04745},
year = 2020
}