Spaces:
Sleeping
Sleeping
from typing import Dict, List | |
import evaluate | |
from datasets import Features, Sequence, Value | |
from sklearn.metrics import accuracy_score | |
from itertools import chain | |
from random import choice | |
from typing import Any, Dict, List, Optional, Tuple | |
_CITATION = """ | |
""" | |
_DESCRIPTION = """ | |
This module provides evaluation metrics for Aspect-Based Sentiment Analysis (ABSA). | |
The metrics include precision, recall, and F1 score for both aspect terms and category detection. | |
Additionally it calculates de accuracy for polarities from aspect terms and category detection. | |
ABSA evaluates the capability of a model to identify and correctly classify the sentiment of specific aspects within a text. | |
""" | |
_KWARGS_DESCRIPTION = """ | |
Computes precision, recall, and F1 score for aspect terms and category detection in Aspect-Based Sentiment Analysis (ABSA). Also calculates de accuracy for polarities on each task. | |
Args: | |
predictions: List of ABSA predictions with the following structure: | |
- 'aspects': Sequence of aspect annotations, each with the following keys: | |
- 'term': Aspect term | |
- 'polarity': Polarity of the aspect term | |
- 'category': Sequence of category annotations, each with the following keys: | |
- 'category': Category | |
- 'polarity': polarity of the category | |
references: List of ABSA references with the same structure as predictions. | |
Returns: | |
term_extraction_results: f1 score, precision and recall for aspect terms | |
term_polarity_results_accuracy: accuracy for polarities on aspect terms | |
category_detection_results: f1 score, precision and recall for category detection | |
category_polarity_results_accuracy: accuracy for polarities on categories | |
""" | |
class AbsaEvaluator(evaluate.Metric): | |
def _info(self): | |
return evaluate.MetricInfo( | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
features=Features( | |
{ | |
"predictions": Features( | |
{ | |
"aspects": Features( | |
{ | |
"term": Sequence(Value("string")), | |
"polarity": Sequence(Value("string")), | |
} | |
), | |
"category": Features( | |
{ | |
"category": Sequence(Value("string")), | |
"polarity": Sequence(Value("string")), | |
} | |
), | |
} | |
), | |
"references": Features( | |
{ | |
"aspects": Features( | |
{ | |
"term": Sequence(Value("string")), | |
"polarity": Sequence(Value("string")), | |
} | |
), | |
"category": Features( | |
{ | |
"category": Sequence(Value("string")), | |
"polarity": Sequence(Value("string")), | |
} | |
), | |
} | |
), | |
} | |
), | |
) | |
def _compute(self, predictions, references): | |
# preprocess aspect term | |
( | |
truth_aspect_terms, | |
pred_aspect_terms, | |
truth_term_polarities, | |
pred_term_polarities, | |
) = absa_term_preprocess( | |
references=references, | |
predictions=predictions, | |
subtask_key="aspects", | |
subtask_value="term", | |
) | |
# evaluate | |
term_results = self.semeval_metric( | |
truth_aspect_terms, pred_aspect_terms | |
) | |
term_polarity_acc = accuracy_score( | |
truth_term_polarities, pred_term_polarities | |
) | |
# preprocess category detection | |
( | |
truth_categories, | |
pred_categories, | |
truth_cat_polarities, | |
pred_cat_polarities, | |
) = absa_term_preprocess( | |
references=references, | |
predictions=predictions, | |
subtask_key="category", | |
subtask_value="category", | |
) | |
# evaluate | |
category_results = self.semeval_metric( | |
truth_categories, pred_categories | |
) | |
cat_polarity_acc = accuracy_score( | |
truth_cat_polarities, pred_cat_polarities | |
) | |
return { | |
"term_extraction_results": term_results, | |
"term_polarity_results_accuracy": term_polarity_acc, | |
"category_detection_results": category_results, | |
"category_polarity_results_accuracy": cat_polarity_acc, | |
} | |
def semeval_metric( | |
self, truths: List[List[str]], preds: List[List[str]] | |
) -> Dict[str, float]: | |
""" | |
Implements evaluation for extraction tasks using precision, recall, and F1 score. | |
Parameters: | |
- truths: List of lists, where each list contains the ground truth labels for a sample. | |
- preds: List of lists, where each list contains the predicted labels for a sample. | |
Returns: | |
- A dictionary containing the precision, recall, F1 score, and counts of common, retrieved, and relevant. | |
link for code: link for this code: https://github.com/davidsbatista/Aspect-Based-Sentiment-Analysis/blob/1d9c8ec1131993d924e96676fa212db6b53cb870/libraries/baselines.py#L387 | |
""" | |
b = 1 | |
common, relevant, retrieved = 0.0, 0.0, 0.0 | |
for truth, pred in zip(truths, preds): | |
common += len([a for a in pred if a in truth]) | |
retrieved += len(pred) | |
relevant += len(truth) | |
precision = common / retrieved if retrieved > 0 else 0.0 | |
recall = common / relevant if relevant > 0 else 0.0 | |
f1 = ( | |
(1 + (b**2)) | |
* precision | |
* recall | |
/ ((precision * b**2) + recall) | |
if precision > 0 and recall > 0 | |
else 0.0 | |
) | |
return { | |
"precision": precision, | |
"recall": recall, | |
"f1_score": f1, | |
"common": common, | |
"retrieved": retrieved, | |
"relevant": relevant, | |
} | |
def adjust_predictions(refs, preds, choices): | |
"""Adjust predictions to match the length of references with either a special token or random choice.""" | |
adjusted_preds = [] | |
for ref, pred in zip(refs, preds): | |
if len(pred) < len(ref): | |
missing_count = len(ref) - len(pred) | |
pred.extend([choice(choices) for _ in range(missing_count)]) | |
adjusted_preds.append(pred) | |
return adjusted_preds | |
def extract_aspects(data, specific_key, specific_val): | |
"""Extracts and returns a list of specified aspect details from the nested 'aspects' data.""" | |
return [item[specific_key][specific_val] for item in data] | |
def absa_term_preprocess(references, predictions, subtask_key, subtask_value): | |
""" | |
Preprocess the terms and polarities for aspect-based sentiment analysis. | |
Args: | |
references (List[Dict]): A list of dictionaries containing the actual terms and polarities under 'aspects'. | |
predictions (List[Dict]): A list of dictionaries containing predicted aspect categories to terms and their sentiments. | |
Returns: | |
Tuple[List[str], List[str], List[str], List[str]]: A tuple containing lists of true aspect terms, | |
adjusted predicted aspect terms, true polarities, and adjusted predicted polarities. | |
""" | |
# Extract aspect terms and polarities | |
truth_aspect_terms = extract_aspects(references, subtask_key, subtask_value) | |
pred_aspect_terms = extract_aspects(predictions, subtask_key, subtask_value) | |
truth_polarities = extract_aspects(references, subtask_key, "polarity") | |
pred_polarities = extract_aspects(predictions, subtask_key, "polarity") | |
# Define adjustment parameters | |
special_token = "NONE" # For missing aspect terms | |
sentiment_choices = [ | |
"positive", | |
"negative", | |
"neutral", | |
"conflict", | |
] # For missing polarities | |
# Adjust the predictions to match the length of references | |
adjusted_pred_terms = adjust_predictions( | |
truth_aspect_terms, pred_aspect_terms, [special_token] | |
) | |
adjusted_pred_polarities = adjust_predictions( | |
truth_polarities, pred_polarities, sentiment_choices | |
) | |
return ( | |
flatten_list(truth_aspect_terms), | |
flatten_list(adjusted_pred_terms), | |
flatten_list(truth_polarities), | |
flatten_list(adjusted_pred_polarities), | |
) | |
def flatten_list(nested_list): | |
"""Flatten a nested list into a single-level list.""" | |
return list(chain.from_iterable(nested_list)) | |
def extract_pred_terms( | |
all_predictions: List[Dict[str, Dict[str, str]]] | |
) -> List[List]: | |
"""Extract and organize predicted terms from the sentiment analysis results.""" | |
pred_aspect_terms = [] | |
for pred in all_predictions: | |
terms = [term for cat in pred.values() for term in cat.keys()] | |
pred_aspect_terms.append(terms) | |
return pred_aspect_terms | |
def merge_aspects_and_categories(aspects, categories): | |
result = [] | |
# Assuming both lists are of the same length and corresponding indices match | |
for aspect, category in zip(aspects, categories): | |
combined_entry = { | |
"aspects": {"term": [], "polarity": []}, | |
"category": {"category": [], "polarity": []}, | |
} | |
# Process aspect entries | |
for cat_key, terms_dict in aspect.items(): | |
for term, polarity in terms_dict.items(): | |
combined_entry["aspects"]["term"].append(term) | |
combined_entry["aspects"]["polarity"].append(polarity) | |
# Add category details based on the aspect's key if available in categories | |
if cat_key in category: | |
combined_entry["category"]["category"].append(cat_key) | |
combined_entry["category"]["polarity"].append( | |
category[cat_key] | |
) | |
# Ensure all keys in category are accounted for | |
for cat_key, polarity in category.items(): | |
if cat_key not in combined_entry["category"]["category"]: | |
combined_entry["category"]["category"].append(cat_key) | |
combined_entry["category"]["polarity"].append(polarity) | |
result.append(combined_entry) | |
return result | |