@conference{Adouane2020,
title = {Identifying Sentiments in Algerian Code-switched User-generated Comments},
author = {Wafia Adouane and Samia Touileb and Jean-Philippe Bernardy},
url = {https://www.aclweb.org/anthology/2020.lrec-1.328.pdf},
year = {2020},
date = {2020-05-06},
journal = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},
pages = {2698–2705},
abstract = {We present in this paper our work on Algerian language, an under-resourced North African colloquial Arabic variety, for which we
built a comparably large corpus of more than 36,000 code-switched user-generated comments annotated for sentiments. We opted
for this data domain because Algerian is a colloquial language with no existing freely available corpora. Moreover, we compiled
sentiment lexicons of positive and negative unigrams and bigrams reflecting the code-switches present in the language. We compare
the performance of four models on the task of identifying sentiments, and the results indicate that a CNN model trained end-to-end fits
better our unedited code-switched and unbalanced data across the predefined sentiment classes. Additionally, injecting the lexicons as
background knowledge to the model boosts its performance on the minority class with a gain of 10.54 points on the F-score. The results
of our experiments can be used as a baseline for future research for Algerian sentiment analysis.
},
note = {Pre SFI},
keywords = {Algerian Arabic, code-switching, sentiment analysis, under-resourced colloquial languages, user-generated data, WP5: Norwegian Language Technologies},
pubstate = {published},
tppubtype = {conference}
}