The paper describes a named entity recognition system for Amharic, an under-resourced language, using a recurrent neural network, a bi-directional long short term memory model to identify and classify tokens into six predefined classes: Person, Location, Organization, Time, Title, and Other (non-named entity tokens). Word vectors based on semantic information are built for all tokens using an unsupervised learning algorithm, word2vec. The word vectors were merged with a set of specifically developed language independent features and together fed to the neural network model to predict the classes of the words. When evaluated by 10-fold cross-validation, the created Amharic named entity recogniser achieved good average precision (77.2%), but did worse on recall (63.4%), for a 69.7% F1-score.
Beschreibung
Named entity recognition for Amharic using deep learning - IEEE Conference Publication
%0 Conference Paper
%1 8102402
%A Gambäck, B.
%A Sikdar, U. K.
%B 2017 IST-Africa Week Conference (IST-Africa)
%D 2017
%K NER amharic deepLearning ethiopic
%P 1-8
%R 10.23919/ISTAFRICA.2017.8102402
%T Named entity recognition for Amharic using deep learning
%U http://ieeexplore.ieee.org/abstract/document/8102402/
%X The paper describes a named entity recognition system for Amharic, an under-resourced language, using a recurrent neural network, a bi-directional long short term memory model to identify and classify tokens into six predefined classes: Person, Location, Organization, Time, Title, and Other (non-named entity tokens). Word vectors based on semantic information are built for all tokens using an unsupervised learning algorithm, word2vec. The word vectors were merged with a set of specifically developed language independent features and together fed to the neural network model to predict the classes of the words. When evaluated by 10-fold cross-validation, the created Amharic named entity recogniser achieved good average precision (77.2%), but did worse on recall (63.4%), for a 69.7% F1-score.
@inproceedings{8102402,
abstract = {The paper describes a named entity recognition system for Amharic, an under-resourced language, using a recurrent neural network, a bi-directional long short term memory model to identify and classify tokens into six predefined classes: Person, Location, Organization, Time, Title, and Other (non-named entity tokens). Word vectors based on semantic information are built for all tokens using an unsupervised learning algorithm, word2vec. The word vectors were merged with a set of specifically developed language independent features and together fed to the neural network model to predict the classes of the words. When evaluated by 10-fold cross-validation, the created Amharic named entity recogniser achieved good average precision (77.2%), but did worse on recall (63.4%), for a 69.7% F1-score.},
added-at = {2018-02-26T09:43:28.000+0100},
author = {Gambäck, B. and Sikdar, U. K.},
biburl = {https://www.bibsonomy.org/bibtex/2901e1863f970a171d26409f697b4b3ba/asmelash},
booktitle = {2017 IST-Africa Week Conference (IST-Africa)},
description = {Named entity recognition for Amharic using deep learning - IEEE Conference Publication},
doi = {10.23919/ISTAFRICA.2017.8102402},
interhash = {237f27f659f405ec4106ac1e2c2ef8ee},
intrahash = {901e1863f970a171d26409f697b4b3ba},
keywords = {NER amharic deepLearning ethiopic},
month = may,
pages = {1-8},
timestamp = {2018-02-26T09:43:28.000+0100},
title = {Named entity recognition for Amharic using deep learning},
url = {http://ieeexplore.ieee.org/abstract/document/8102402/},
year = 2017
}