2020
|
Touileb, Samia; Øvrelid, Lilja; Velldal, Erik Gender and sentiment, critics and authors: a dataset of Norwegian book reviews Journal Article Gender Bias in Natural Language Processing. Association for Computational Linguistics, 2020, (Pre SFI). Abstract | Links | BibTeX @article{Touileb2020,
title = {Gender and sentiment, critics and authors: a dataset of Norwegian book reviews},
author = {Samia Touileb and Lilja Øvrelid and Erik Velldal},
url = {https://www.aclweb.org/anthology/2020.gebnlp-1.11.pdf},
year = {2020},
date = {2020-12-01},
journal = {Gender Bias in Natural Language Processing. Association for Computational Linguistics},
abstract = {Gender bias in models and datasets is widely studied in NLP. The focus has usually been on analysing how females and males express themselves, or how females and males are described. However, a less studied aspect is the combination of these two perspectives, how female and male describe the same or opposite gender. In this paper, we present a new gender annotated sentiment dataset of critics reviewing the works of female and male authors. We investigate if this newly annotated dataset contains differences in how the works of male and female authors are critiqued, in particular in terms of positive and negative sentiment. We also explore the differences in how this is done by male and female critics. We show that there are differences in how critics assess the works of authors of the same or opposite gender. For example, male critics rate crime novels written by females, and romantic and sentimental works written by males, more negatively.},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Gender bias in models and datasets is widely studied in NLP. The focus has usually been on analysing how females and males express themselves, or how females and males are described. However, a less studied aspect is the combination of these two perspectives, how female and male describe the same or opposite gender. In this paper, we present a new gender annotated sentiment dataset of critics reviewing the works of female and male authors. We investigate if this newly annotated dataset contains differences in how the works of male and female authors are critiqued, in particular in terms of positive and negative sentiment. We also explore the differences in how this is done by male and female critics. We show that there are differences in how critics assess the works of authors of the same or opposite gender. For example, male critics rate crime novels written by females, and romantic and sentimental works written by males, more negatively. |
Barnes, J; Velldal, Erik; Øvrelid, Lilja Improving sentiment analysis with multi-task learning of negation Journal Article 2020, (Pre SFI). Links | BibTeX @article{Barnes2020,
title = {Improving sentiment analysis with multi-task learning of negation},
author = {J Barnes and Erik Velldal and Lilja Øvrelid},
url = {https://www.cambridge.org/core/journals/natural-language-engineering/article/abs/improving-sentiment-analysis-with-multitask-learning-of-negation/14EF2B829EC4B8EC29E7C0C5C77B95B0},
year = {2020},
date = {2020-11-11},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Barnes, J; Øvrelid, Lilja; Velldal, Erik Sentiment analysis is not solved! Assessing and probing sentiment classification Proceeding 2020, (Pre SFI). Links | BibTeX @proceedings{Barnes2020b,
title = {Sentiment analysis is not solved! Assessing and probing sentiment classification},
author = {J Barnes and Lilja Øvrelid and Erik Velldal},
url = {https://www.aclweb.org/anthology/W19-4802/},
year = {2020},
date = {2020-08-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
Adouane, Wafia; Touileb, Samia; Bernardy, Jean-Philippe Identifying Sentiments in Algerian Code-switched User-generated Comments Conference 2020, (Pre SFI). Abstract | Links | BibTeX @conference{Adouane2020,
title = {Identifying Sentiments in Algerian Code-switched User-generated Comments},
author = {Wafia Adouane and Samia Touileb and Jean-Philippe Bernardy},
url = {https://www.aclweb.org/anthology/2020.lrec-1.328.pdf},
year = {2020},
date = {2020-05-06},
journal = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},
pages = {2698–2705},
abstract = {We present in this paper our work on Algerian language, an under-resourced North African colloquial Arabic variety, for which we
built a comparably large corpus of more than 36,000 code-switched user-generated comments annotated for sentiments. We opted
for this data domain because Algerian is a colloquial language with no existing freely available corpora. Moreover, we compiled
sentiment lexicons of positive and negative unigrams and bigrams reflecting the code-switches present in the language. We compare
the performance of four models on the task of identifying sentiments, and the results indicate that a CNN model trained end-to-end fits
better our unedited code-switched and unbalanced data across the predefined sentiment classes. Additionally, injecting the lexicons as
background knowledge to the model boosts its performance on the minority class with a gain of 10.54 points on the F-score. The results
of our experiments can be used as a baseline for future research for Algerian sentiment analysis.
},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
We present in this paper our work on Algerian language, an under-resourced North African colloquial Arabic variety, for which we
built a comparably large corpus of more than 36,000 code-switched user-generated comments annotated for sentiments. We opted
for this data domain because Algerian is a colloquial language with no existing freely available corpora. Moreover, we compiled
sentiment lexicons of positive and negative unigrams and bigrams reflecting the code-switches present in the language. We compare
the performance of four models on the task of identifying sentiments, and the results indicate that a CNN model trained end-to-end fits
better our unedited code-switched and unbalanced data across the predefined sentiment classes. Additionally, injecting the lexicons as
background knowledge to the model boosts its performance on the minority class with a gain of 10.54 points on the F-score. The results
of our experiments can be used as a baseline for future research for Algerian sentiment analysis.
|
Meurer, P; Rosén, V; Smedt, Koenraad De Interactive Visualizations in INESS Book Chapter Butt, M; Hautli-Janisz, A; (Eds.), Lyding V (Ed.): 2020, (Pre SFI). Links | BibTeX @inbook{Meurer2020,
title = {Interactive Visualizations in INESS},
author = {P Meurer and V Rosén and Koenraad De Smedt},
editor = {M. Butt and A. Hautli-Janisz and V. Lyding (Eds.)},
url = {https://web.stanford.edu/group/cslipublications/cslipublications/site/9781684000333.shtml},
year = {2020},
date = {2020-05-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {inbook}
}
|
Øvrelid, Lilja; Mæhlum, P; Barnes, J; Velldal, Erik A Fine-Grained Sentiment Dataset for Norwegian Proceeding 2020, (Pre SFI). Links | BibTeX @proceedings{Øvrelid2020,
title = {A Fine-Grained Sentiment Dataset for Norwegian},
author = {Lilja Øvrelid and P Mæhlum and J Barnes and Erik Velldal},
url = {https://www.aclweb.org/anthology/2020.lrec-1.618/},
year = {2020},
date = {2020-05-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
Jørgensen, F; Aasmoe, T; Husevåg, ASR; Øvrelid, Lilja; Velldal, Erik (Ed.) NorNE: Annotating Named Entities for Norwegian Proceeding 2020, (Pre SFI). Links | BibTeX @proceedings{Jørgensen2020,
title = {NorNE: Annotating Named Entities for Norwegian},
editor = {F Jørgensen and T Aasmoe and ASR Husevåg and Lilja Øvrelid and Erik Velldal},
url = {https://oda.oslomet.no/handle/10642/8830},
year = {2020},
date = {2020-05-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
Lison, Pierre; Hubin, Aliaksandr; Barnes, Jeremy; Touileb, Samia Named Entity Recognition without Labelled Data: A Weak Supervision Approach Journal Article Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 1518–1533, 2020, (Pre SFI). Abstract | Links | BibTeX @article{Lison2020,
title = {Named Entity Recognition without Labelled Data: A Weak Supervision Approach},
author = {Pierre Lison and Aliaksandr Hubin and Jeremy Barnes and Samia Touileb},
url = {https://arxiv.org/pdf/2004.14723.pdf},
year = {2020},
date = {2020-04-30},
journal = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
pages = {1518–1533},
abstract = {Named Entity Recognition (NER) performance often degrades rapidly when applied to target domains that differ from the texts observed during training. When in-domain labelled data is available, transfer learning techniques can be used to adapt existing NER models to the target domain. But what should one do when there is no hand-labelled data for the target domain? This paper presents a simple but powerful approach to learn NER models in the absence of labelled data through weak supervision. The approach relies on a broad spectrum of labelling functions to automatically annotate texts from the target domain. These annotations are then merged together using a hidden Markov model which captures the varying accuracies and confusions of the labelling functions. A sequence labelling model can finally be trained on the basis of this unified annotation. We evaluate the approach on two English datasets (CoNLL 2003 and news articles from Reuters and Bloomberg) and demonstrate an improvement of about 7 percentage points in entity-level F1 scores compared to an out-of-domain neural NER model.},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Named Entity Recognition (NER) performance often degrades rapidly when applied to target domains that differ from the texts observed during training. When in-domain labelled data is available, transfer learning techniques can be used to adapt existing NER models to the target domain. But what should one do when there is no hand-labelled data for the target domain? This paper presents a simple but powerful approach to learn NER models in the absence of labelled data through weak supervision. The approach relies on a broad spectrum of labelling functions to automatically annotate texts from the target domain. These annotations are then merged together using a hidden Markov model which captures the varying accuracies and confusions of the labelling functions. A sequence labelling model can finally be trained on the basis of this unified annotation. We evaluate the approach on two English datasets (CoNLL 2003 and news articles from Reuters and Bloomberg) and demonstrate an improvement of about 7 percentage points in entity-level F1 scores compared to an out-of-domain neural NER model. |
de Smedt, Koenraad; Koureas, D; Wittenberg, P FAIR Digital Objects for Science: From Data Pieces to Actionable Knowledge Units Journal Article 2020, (Pre SFI). Links | BibTeX @article{deSmedt2020,
title = {FAIR Digital Objects for Science: From Data Pieces to Actionable Knowledge Units},
author = {Koenraad de Smedt and D Koureas and P Wittenberg},
url = {https://ideas.repec.org/a/gam/jpubli/v8y2020i2p21-d344422.html},
year = {2020},
date = {2020-04-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2019
|
Barnes, Jeremy; Touileb, Samia; Øvrelid, Lilja; Velldal, Erik Lexicon information in neural sentiment analysis: a multi-task learning approach Conference Linköping University Electronic Press, 2019, (Pre SFI). Abstract | Links | BibTeX @conference{Barnes2019,
title = {Lexicon information in neural sentiment analysis: a multi-task learning approach},
author = {Jeremy Barnes and Samia Touileb and Lilja Øvrelid and Erik Velldal},
url = {https://www.aclweb.org/anthology/W19-6119.pdf},
year = {2019},
date = {2019-10-01},
journal = {Proceedings of the 22nd Nordic Conference on Computational Linguistics (NoDaLiDa)},
pages = {175–186},
publisher = {Linköping University Electronic Press},
abstract = {This paper explores the use of multi-task learning (MTL) for incorporating external knowledge in neural models. Specifically, we show how MTL can enable a BiLSTM sentiment classifier to incorporate information from sentiment lexicons. Our MTL set-up is shown to improve model performance (compared to a single-task set-up) on both English and Norwegian sentence-level sentiment datasets. The paper also introduces a new sentiment lexicon for Norwegian.},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
This paper explores the use of multi-task learning (MTL) for incorporating external knowledge in neural models. Specifically, we show how MTL can enable a BiLSTM sentiment classifier to incorporate information from sentiment lexicons. Our MTL set-up is shown to improve model performance (compared to a single-task set-up) on both English and Norwegian sentence-level sentiment datasets. The paper also introduces a new sentiment lexicon for Norwegian. |
2018
|
Kutuzov, A; Øvrelid, Lilja; Szymanski, T; Velldal, Erik Diachronic word embeddings and semantic shifts: a survey Proceeding 2018, (Pre SFI). Links | BibTeX @proceedings{Kutuzov2018,
title = {Diachronic word embeddings and semantic shifts: a survey},
author = {A Kutuzov and Lilja Øvrelid and T Szymanski and Erik Velldal},
url = {https://www.aclweb.org/anthology/C18-1117/},
year = {2018},
date = {2018-08-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
Velldal, Erik; Øvrelid, Lilja; Bergem, Eivind Alexander; Stadsnes, Cathrine; Touileb, Samia; Jørgensen, Fredrik NoReC: The Norwegian Review Corpus Proceeding 2018, (Pre SFI). Abstract | BibTeX @proceedings{Velldal2018,
title = {NoReC: The Norwegian Review Corpus},
author = {Erik Velldal and Lilja Øvrelid and Eivind Alexander Bergem and Cathrine Stadsnes and Samia Touileb and Fredrik Jørgensen},
year = {2018},
date = {2018-05-12},
abstract = {https://repo.clarino.uib.no/xmlui/handle/11509/124},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
https://repo.clarino.uib.no/xmlui/handle/11509/124 |
2017
|
Touileb, Samia; Pedersen, Truls; Sjøvaag, Helle Automatic identification of unknown names with specific roles Journal Article Proceedings of the Second Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, pp. 150-158, 2017, (Pre SFI). Abstract | Links | BibTeX @article{Touileb2017,
title = {Automatic identification of unknown names with specific roles},
author = {Samia Touileb and Truls Pedersen and Helle Sjøvaag},
url = {https://www.aclweb.org/anthology/W18-4517.pdf},
year = {2017},
date = {2017-08-01},
journal = {Proceedings of the Second Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature},
pages = {150-158},
abstract = {Automatically identifying persons in a particular role within a large corpus can be a difficult task, especially if you don’t know who you are actually looking for. Resources compiling names of persons can be available, but no exhaustive lists exist. However, such lists usually contain known names that are “visible” in the national public sphere, and tend to ignore the marginal and international ones. In this article we propose a method for automatically generating suggestions of names found in a corpus of Norwegian news articles, and which “naturally” belong to a given initial list of members, and that were not known (compiled in a list) beforehand. The approach is based, in part, on the assumption that surface level syntactic features reveal parts of the underlying semantic content and can help uncover the structure of the language.},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Automatically identifying persons in a particular role within a large corpus can be a difficult task, especially if you don’t know who you are actually looking for. Resources compiling names of persons can be available, but no exhaustive lists exist. However, such lists usually contain known names that are “visible” in the national public sphere, and tend to ignore the marginal and international ones. In this article we propose a method for automatically generating suggestions of names found in a corpus of Norwegian news articles, and which “naturally” belong to a given initial list of members, and that were not known (compiled in a list) beforehand. The approach is based, in part, on the assumption that surface level syntactic features reveal parts of the underlying semantic content and can help uncover the structure of the language. |
Fares, M; Kutuzov, A; Oepen, S; Velldal, Erik Word vectors, reuse, and replicability: Towards a community repository of large-text resources Proceeding 2017, (Pre SFI). Links | BibTeX @proceedings{Fares2017,
title = { Word vectors, reuse, and replicability: Towards a community repository of large-text resources},
author = {M Fares and A Kutuzov and S Oepen and Erik Velldal},
url = {https://www.duo.uio.no/handle/10852/65205},
year = {2017},
date = {2017-05-22},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
2016
|
Rosén, V; Thunes, M; Haugereid, P; Losnegaard, GS; Dyvik, H; Meurer, P; Lyse, G; Smedt, Koenraad De The enrichment of lexical resources through incremental parsebanking Journal Article 2016, (Pre SFI). Links | BibTeX @article{Rosén2016,
title = {The enrichment of lexical resources through incremental parsebanking},
author = {V Rosén and M Thunes and P Haugereid and GS Losnegaard and H Dyvik and P Meurer and G Lyse and Koenraad De Smedt},
url = {https://bora.uib.no/bora-xmlui/handle/1956/15680},
year = {2016},
date = {2016-06-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Dyvik, H; Meurer, P; Rosén, V; Smedt, Koenraad De; Haugereid, P; Losnegaard, GS; Lyse, G; Thunes, M NorGramBank: A 'Deep' Treebank for Norwegian.Proceedings of LREC Proceeding 2016, (Pre SFI). Links | BibTeX @proceedings{Dyvik2016,
title = {NorGramBank: A 'Deep' Treebank for Norwegian.Proceedings of LREC},
author = {H Dyvik and P Meurer and V Rosén and Koenraad De Smedt and P Haugereid and GS Losnegaard and G Lyse and M Thunes},
url = {https://www.aclweb.org/anthology/L16-1565.pdf},
year = {2016},
date = {2016-05-16},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
Rosén, V; Smedt, Koenraad De; Losnegaard, GS; Bejcek, E; Savary, A; Osenova, P MWEs in Treebanks: From Survey to Guidelines Proceeding 2016, (Pre SFI). Links | BibTeX @proceedings{Rosén2016b,
title = {MWEs in Treebanks: From Survey to Guidelines},
author = {V Rosén and Koenraad De Smedt and GS Losnegaard and E Bejcek and A Savary and P Osenova},
url = {https://www.aclweb.org/anthology/L16-1368.pdf},
year = {2016},
date = {2016-05-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
Øvrelid, Lilja; Hohle, P Universal dependencies for Norwegian Proceeding 2016, (Pre SFI). Links | BibTeX @proceedings{Øvrelid2016,
title = { Universal dependencies for Norwegian},
author = {Lilja Øvrelid and P Hohle},
url = {https://www.aclweb.org/anthology/L16-1250/},
year = {2016},
date = {2016-05-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
2012
|
Lapponi, E; Read, J; Øvrelid, Lilja Representing and resolving negation for sentiment analysis Proceeding 2012, (Pre SFI). Links | BibTeX @proceedings{Lapponi2012,
title = {Representing and resolving negation for sentiment analysis},
author = {E Lapponi and J Read and Lilja Øvrelid},
url = {https://ieeexplore.ieee.org/document/6406506},
year = {2012},
date = {2012-12-10},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
|
Velldal, Erik; Øvrelid, Lilja; Read, J; Oepen, S Speculation and negation: Rules, rankers, and the role of syntax Journal Article 2012, (Pre SFI). Links | BibTeX @article{Velldal2012,
title = {Speculation and negation: Rules, rankers, and the role of syntax},
author = {Erik Velldal and Lilja Øvrelid and J Read and S Oepen},
url = {https://www.mitpressjournals.org/doi/pdf/10.1162/COLI_a_00126},
year = {2012},
date = {2012-01-01},
note = {Pre SFI},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|