A monitoring system is proposed to detect violent content in Arabic social media. This is a new and challenging task due to the presence of various Arabic dialects in the social media and the non-violent context where violent words might be used. We proposed to use a probabilistic nonlinear dimensionality reduction technique called sparse Gaussian process latent variable model (SGPLVM) followed by k-means to separate violent from non-violent content. This framework does not require any labelled corpora for training. We show that violent and non-violent Arabic
tweets are not separable using k-means in the original high dimensional space, however better results are achieved by clustering in low dimensional latent space of SGPLVM.
%0 Journal Article
%1 dhinaharannagamalai2017unsupervised
%A Kareem E Abdelfatah, Gabriel Terejanu, Ayman A Alhelbawy
%D 2017
%E Dhinaharan Nagamalai, Natarajan Meghanathan
%J Computer Science & Information Technology (CS & IT)
%K Arabic, Dimensionality Media, Reduction, SGPLVM, Social Unsupervised learning
%N 4
%T Unsupervised Detection of Violent Content in Arabic Social Media
%U http://airccse.org/V7N66.html
%V 7
%X A monitoring system is proposed to detect violent content in Arabic social media. This is a new and challenging task due to the presence of various Arabic dialects in the social media and the non-violent context where violent words might be used. We proposed to use a probabilistic nonlinear dimensionality reduction technique called sparse Gaussian process latent variable model (SGPLVM) followed by k-means to separate violent from non-violent content. This framework does not require any labelled corpora for training. We show that violent and non-violent Arabic
tweets are not separable using k-means in the original high dimensional space, however better results are achieved by clustering in low dimensional latent space of SGPLVM.
@article{dhinaharannagamalai2017unsupervised,
abstract = {A monitoring system is proposed to detect violent content in Arabic social media. This is a new and challenging task due to the presence of various Arabic dialects in the social media and the non-violent context where violent words might be used. We proposed to use a probabilistic nonlinear dimensionality reduction technique called sparse Gaussian process latent variable model (SGPLVM) followed by k-means to separate violent from non-violent content. This framework does not require any labelled corpora for training. We show that violent and non-violent Arabic
tweets are not separable using k-means in the original high dimensional space, however better results are achieved by clustering in low dimensional latent space of SGPLVM.},
added-at = {2017-03-21T00:36:03.000+0100},
author = {{Kareem E Abdelfatah, Gabriel Terejanu}, Ayman A Alhelbawy},
biburl = {https://www.bibsonomy.org/bibtex/23a77af13aa5f86a41d3815b46979fb79/laimbee},
editor = {Dhinaharan Nagamalai, Natarajan Meghanathan},
interhash = {b861cb02e6d1145d0e74baecd1b5a251},
intrahash = {3a77af13aa5f86a41d3815b46979fb79},
journal = {Computer Science & Information Technology (CS & IT) },
keywords = {Arabic, Dimensionality Media, Reduction, SGPLVM, Social Unsupervised learning},
month = {1-7},
number = 4,
timestamp = {2017-03-21T00:36:03.000+0100},
title = {Unsupervised Detection of Violent Content in Arabic Social Media},
url = {http://airccse.org/V7N66.html},
volume = 7,
year = 2017
}