Unsupervised vector-based approaches to semantics can model rich lexical meanings, but they largely fail to capture sentiment information that is central to many word meanings and important for a wide range of NLP tasks. We present a model that uses a mix of unsupervised and supervised techniques to learn word vectors capturing semantic term--document information as well as rich sentiment content. The proposed model can leverage both continuous and multi-dimensional sentiment information as well as non-sentiment annotations. We instantiate the model to utilize the document-level sentiment polarity annotations present in many online documents (e.g. star ratings). We evaluate the model using small, widely used sentiment and subjectivity corpora and find it out-performs several previously introduced methods for sentiment classification. We also introduce a large dataset of movie reviews to serve as a more robust benchmark for work in this area.
Description
This paper focuses on learning word vectors for sentiment analysis of IMDB movie reviews, providing insights into the application of machine learning in analyzing sentiments in the domain of movie reviews.
%0 Generic
%1 Andrew2011
%A Maas, Andrew L.
%A Daly, Raymond E.
%A Pham, Peter T.
%A Huang, Dan
%A Ng, A.
%A Potts, Christopher
%D 2011
%K sentiment-analysis machine-learning IMDB movie-reviews related_works AI related_works_benchmark posted_with_chatgpt
%P 142-150
%T Learning Word Vectors for Sentiment Analysis
%U https://www.semanticscholar.org/paper/649d03490ef72c5274e3bccd03d7a299d2f8da91
%X Unsupervised vector-based approaches to semantics can model rich lexical meanings, but they largely fail to capture sentiment information that is central to many word meanings and important for a wide range of NLP tasks. We present a model that uses a mix of unsupervised and supervised techniques to learn word vectors capturing semantic term--document information as well as rich sentiment content. The proposed model can leverage both continuous and multi-dimensional sentiment information as well as non-sentiment annotations. We instantiate the model to utilize the document-level sentiment polarity annotations present in many online documents (e.g. star ratings). We evaluate the model using small, widely used sentiment and subjectivity corpora and find it out-performs several previously introduced methods for sentiment classification. We also introduce a large dataset of movie reviews to serve as a more robust benchmark for work in this area.
@JournalArticle{Andrew2011,
abstract = {Unsupervised vector-based approaches to semantics can model rich lexical meanings, but they largely fail to capture sentiment information that is central to many word meanings and important for a wide range of NLP tasks. We present a model that uses a mix of unsupervised and supervised techniques to learn word vectors capturing semantic term--document information as well as rich sentiment content. The proposed model can leverage both continuous and multi-dimensional sentiment information as well as non-sentiment annotations. We instantiate the model to utilize the document-level sentiment polarity annotations present in many online documents (e.g. star ratings). We evaluate the model using small, widely used sentiment and subjectivity corpora and find it out-performs several previously introduced methods for sentiment classification. We also introduce a large dataset of movie reviews to serve as a more robust benchmark for work in this area.},
added-at = {2023-09-22T12:17:58.000+0200},
author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, A. and Potts, Christopher},
biburl = {https://www.bibsonomy.org/bibtex/24e03f895cbdab96d0f3056ed8d4347b2/tomvoelker},
day = 19,
description = {This paper focuses on learning word vectors for sentiment analysis of IMDB movie reviews, providing insights into the application of machine learning in analyzing sentiments in the domain of movie reviews.},
interhash = {fdf6f17a910c1ba04efcfdc8b951fc9d},
intrahash = {4e03f895cbdab96d0f3056ed8d4347b2},
keywords = {sentiment-analysis machine-learning IMDB movie-reviews related_works AI related_works_benchmark posted_with_chatgpt},
month = {6},
pages = {142-150},
timestamp = {2023-09-22T12:17:58.000+0200},
title = {Learning Word Vectors for Sentiment Analysis},
url = {https://www.semanticscholar.org/paper/649d03490ef72c5274e3bccd03d7a299d2f8da91},
year = 2011
}