Module auto_osint_v.sentiment_analyser

This module's primary role is to analyse the semantics of the intelligence statement.

This module will likely be reused/modified within source aggregation.

Expand source code
"""This module's primary role is to analyse the semantics of the intelligence statement.

This module will likely be reused/modified within source aggregation.
"""

from transformers import pipeline


class SentimentAnalyser:
    """This class provides methods for conducting sentiment analysis on a given document.

    This document could be the intelligence statement or a different document.
    """

    def __init__(self, read_statement, statement_title, file_handler_object):
        """Initialises variables to be used in this object.

        Args:
            read_statement: This is the statement input in __main__.py and read by
                            FileHandler.read_file()
            statement_title: This is the source title or filename.
            file_handler_object: The file handler object passed to the class for reuse
        """
        self.statement = read_statement
        self.file_name = statement_title
        self.file_handler = file_handler_object
        # Trying a variety of models. Need one with 3 labels for +ve, -ve and neutral.
        # We want intelligence statements to be neutral and not too +ve or -ve
        self.sentiment_analysis = pipeline("sentiment-analysis",
                                           model="Souvikcmsa/BERT_sentiment_analysis")

    def set_statement(self, new_statement):
        """Setter for the self.statement initial variable"""
        self.statement = new_statement

    def statement_analyser(self):
        """This method analyses the overall sentiment of the intelligence statement.

        (it could be used to analyse other statements)
        This code is all from the Hugging Face documentation.

        Returns:
            Nothing - outputs to file
        """
        classification = self.sentiment_analysis(self.statement)
        # print(classification)
        # create a sentiment threshold for the intel statement
        # If the threshold is exceeded add extra information to warn user that their statement is
        # likely bias or sensational, see Calvo et al. (2021).
        threshold = 0.9
        # If you want a different threshold depending on source type,
        # add as a parameter to statement_analyser

        # Thought to offer better readability to user
        evidence_type = "sentiment-analysis-of-" + self.file_name
        classification_label = classification[0]["label"]
        classification_score = classification[0]["score"]
        if classification_label != 'neutral' and classification_score > threshold:
            # write analysis info and a warning as extra info
            warning = ("Warning: analysis of your statement indicates it is likely biased or "
                       "sensational")
            self.file_handler.open_evidence_file([evidence_type, classification_label +
                                                  ' sentiment, score: ' + str(classification_score),
                                                  warning])
        else:
            # write just the analysis info
            self.file_handler.open_evidence_file([evidence_type, classification_label +
                                                  ' sentiment, score: ' + str(classification_score)]
                                                 )

    def headline_analyser(self, headline):
        """Runs sentiment analysis on a given headline.

        Returns:
            the sentiment label (positive, negative, neutral) and the confidence score.
        """
        headline = headline.strip()
        classification = self.sentiment_analysis(headline)
        # get the label and score
        classification_label = classification[0]["label"]
        classification_score = classification[0]["score"]
        return classification_label, classification_score

Classes

class SentimentAnalyser (read_statement, statement_title, file_handler_object)

This class provides methods for conducting sentiment analysis on a given document.

This document could be the intelligence statement or a different document.

Initialises variables to be used in this object.

Args

read_statement
This is the statement input in main.py and read by FileHandler.read_file()
statement_title
This is the source title or filename.
file_handler_object
The file handler object passed to the class for reuse
Expand source code
class SentimentAnalyser:
    """This class provides methods for conducting sentiment analysis on a given document.

    This document could be the intelligence statement or a different document.
    """

    def __init__(self, read_statement, statement_title, file_handler_object):
        """Initialises variables to be used in this object.

        Args:
            read_statement: This is the statement input in __main__.py and read by
                            FileHandler.read_file()
            statement_title: This is the source title or filename.
            file_handler_object: The file handler object passed to the class for reuse
        """
        self.statement = read_statement
        self.file_name = statement_title
        self.file_handler = file_handler_object
        # Trying a variety of models. Need one with 3 labels for +ve, -ve and neutral.
        # We want intelligence statements to be neutral and not too +ve or -ve
        self.sentiment_analysis = pipeline("sentiment-analysis",
                                           model="Souvikcmsa/BERT_sentiment_analysis")

    def set_statement(self, new_statement):
        """Setter for the self.statement initial variable"""
        self.statement = new_statement

    def statement_analyser(self):
        """This method analyses the overall sentiment of the intelligence statement.

        (it could be used to analyse other statements)
        This code is all from the Hugging Face documentation.

        Returns:
            Nothing - outputs to file
        """
        classification = self.sentiment_analysis(self.statement)
        # print(classification)
        # create a sentiment threshold for the intel statement
        # If the threshold is exceeded add extra information to warn user that their statement is
        # likely bias or sensational, see Calvo et al. (2021).
        threshold = 0.9
        # If you want a different threshold depending on source type,
        # add as a parameter to statement_analyser

        # Thought to offer better readability to user
        evidence_type = "sentiment-analysis-of-" + self.file_name
        classification_label = classification[0]["label"]
        classification_score = classification[0]["score"]
        if classification_label != 'neutral' and classification_score > threshold:
            # write analysis info and a warning as extra info
            warning = ("Warning: analysis of your statement indicates it is likely biased or "
                       "sensational")
            self.file_handler.open_evidence_file([evidence_type, classification_label +
                                                  ' sentiment, score: ' + str(classification_score),
                                                  warning])
        else:
            # write just the analysis info
            self.file_handler.open_evidence_file([evidence_type, classification_label +
                                                  ' sentiment, score: ' + str(classification_score)]
                                                 )

    def headline_analyser(self, headline):
        """Runs sentiment analysis on a given headline.

        Returns:
            the sentiment label (positive, negative, neutral) and the confidence score.
        """
        headline = headline.strip()
        classification = self.sentiment_analysis(headline)
        # get the label and score
        classification_label = classification[0]["label"]
        classification_score = classification[0]["score"]
        return classification_label, classification_score

Methods

def headline_analyser(self, headline)

Runs sentiment analysis on a given headline.

Returns

the sentiment label (positive, negative, neutral) and the confidence score.

Expand source code
def headline_analyser(self, headline):
    """Runs sentiment analysis on a given headline.

    Returns:
        the sentiment label (positive, negative, neutral) and the confidence score.
    """
    headline = headline.strip()
    classification = self.sentiment_analysis(headline)
    # get the label and score
    classification_label = classification[0]["label"]
    classification_score = classification[0]["score"]
    return classification_label, classification_score
def set_statement(self, new_statement)

Setter for the self.statement initial variable

Expand source code
def set_statement(self, new_statement):
    """Setter for the self.statement initial variable"""
    self.statement = new_statement
def statement_analyser(self)

This method analyses the overall sentiment of the intelligence statement.

(it could be used to analyse other statements) This code is all from the Hugging Face documentation.

Returns

Nothing - outputs to file

Expand source code
def statement_analyser(self):
    """This method analyses the overall sentiment of the intelligence statement.

    (it could be used to analyse other statements)
    This code is all from the Hugging Face documentation.

    Returns:
        Nothing - outputs to file
    """
    classification = self.sentiment_analysis(self.statement)
    # print(classification)
    # create a sentiment threshold for the intel statement
    # If the threshold is exceeded add extra information to warn user that their statement is
    # likely bias or sensational, see Calvo et al. (2021).
    threshold = 0.9
    # If you want a different threshold depending on source type,
    # add as a parameter to statement_analyser

    # Thought to offer better readability to user
    evidence_type = "sentiment-analysis-of-" + self.file_name
    classification_label = classification[0]["label"]
    classification_score = classification[0]["score"]
    if classification_label != 'neutral' and classification_score > threshold:
        # write analysis info and a warning as extra info
        warning = ("Warning: analysis of your statement indicates it is likely biased or "
                   "sensational")
        self.file_handler.open_evidence_file([evidence_type, classification_label +
                                              ' sentiment, score: ' + str(classification_score),
                                              warning])
    else:
        # write just the analysis info
        self.file_handler.open_evidence_file([evidence_type, classification_label +
                                              ' sentiment, score: ' + str(classification_score)]
                                             )