from typing import Dict, List import evaluate from datasets import Features, Sequence, Value from sklearn.metrics import accuracy_score from itertools import chain from random import choice from typing import Any, Dict, List, Optional, Tuple _CITATION = """ """ _DESCRIPTION = """ Evaluation metrics for Aspect-Based Sentiment Analysis (ABSA) including precision, recall, and F1 score for aspect terms and polarities. """ _KWARGS_DESCRIPTION = """ Computes precision, recall, and F1 score for aspect terms and polarities in Aspect-Based Sentiment Analysis (ABSA). Args: predictions: List of ABSA predictions with the following structure: - 'aspects': Sequence of aspect annotations, each with the following keys: - 'term': Aspect term - 'polarity': Polarity of the aspect term references: List of ABSA references with the same structure as predictions. Returns: aspect_precision: Precision score for aspect terms aspect_recall: Recall score for aspect terms aspect_f1: F1 score for aspect terms polarity_precision: Precision score for aspect polarities polarity_recall: Recall score for aspect polarities polarity_f1: F1 score for aspect polarities """ class AbsaEvaluatorTest(evaluate.Metric): def _info(self): return evaluate.MetricInfo( description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, features=Features( { "predictions": Features( { "aspects": Features( { "term": Sequence(Value("string")), "polarity": Sequence(Value("string")), } ), "category": Features( { "category": Sequence(Value("string")), "polarity": Sequence(Value("string")), } ), } ), "references": Features( { "aspects": Features( { "term": Sequence(Value("string")), "polarity": Sequence(Value("string")), } ), "category": Features( { "category": Sequence(Value("string")), "polarity": Sequence(Value("string")), } ), } ), } ), ) def _compute(self, predictions, references): # preprocess aspect term ( truth_aspect_terms, pred_aspect_terms, truth_term_polarities, pred_term_polarities, ) = absa_term_preprocess( references=references, predictions=predictions, subtask_key="aspects", subtask_value="term", ) # evaluate term_results = self.semeval_metric( truth_aspect_terms, pred_aspect_terms ) term_polarity_acc = accuracy_score( truth_term_polarities, pred_term_polarities ) # preprocess category detection ( truth_categories, pred_categories, truth_cat_polarities, pred_cat_polarities, ) = absa_term_preprocess( references=references, predictions=predictions, subtask_key="category", subtask_value="category", ) # evaluate category_results = self.semeval_metric( truth_categories, pred_categories ) cat_polarity_acc = accuracy_score( truth_cat_polarities, pred_cat_polarities ) return { "term_extraction_results": term_results, "term_polarity_results_accuracy": term_polarity_acc, "category_detection_results": category_results, "category_polarity_results_accuracy": cat_polarity_acc, } def semeval_metric( self, truths: List[List[str]], preds: List[List[str]] ) -> Dict[str, float]: """ Implements evaluation for extraction tasks using precision, recall, and F1 score. Parameters: - truths: List of lists, where each list contains the ground truth labels for a sample. - preds: List of lists, where each list contains the predicted labels for a sample. Returns: - A dictionary containing the precision, recall, F1 score, and counts of common, retrieved, and relevant. link for code: link for this code: https://github.com/davidsbatista/Aspect-Based-Sentiment-Analysis/blob/1d9c8ec1131993d924e96676fa212db6b53cb870/libraries/baselines.py#L387 """ b = 1 common, relevant, retrieved = 0.0, 0.0, 0.0 for truth, pred in zip(truths, preds): common += len([a for a in pred if a in truth]) retrieved += len(pred) relevant += len(truth) precision = common / retrieved if retrieved > 0 else 0.0 recall = common / relevant if relevant > 0 else 0.0 f1 = ( (1 + (b**2)) * precision * recall / ((precision * b**2) + recall) if precision > 0 and recall > 0 else 0.0 ) return { "precision": precision, "recall": recall, "f1_score": f1, "common": common, "retrieved": retrieved, "relevant": relevant, } def adjust_predictions(refs, preds, choices): """Adjust predictions to match the length of references with either a special token or random choice.""" adjusted_preds = [] for ref, pred in zip(refs, preds): if len(pred) < len(ref): missing_count = len(ref) - len(pred) pred.extend([choice(choices) for _ in range(missing_count)]) adjusted_preds.append(pred) return adjusted_preds def extract_aspects(data, specific_key, specific_val): """Extracts and returns a list of specified aspect details from the nested 'aspects' data.""" return [item[specific_key][specific_val] for item in data] def absa_term_preprocess(references, predictions, subtask_key, subtask_value): """ Preprocess the terms and polarities for aspect-based sentiment analysis. Args: references (List[Dict]): A list of dictionaries containing the actual terms and polarities under 'aspects'. predictions (List[Dict]): A list of dictionaries containing predicted aspect categories to terms and their sentiments. Returns: Tuple[List[str], List[str], List[str], List[str]]: A tuple containing lists of true aspect terms, adjusted predicted aspect terms, true polarities, and adjusted predicted polarities. """ # Extract aspect terms and polarities truth_aspect_terms = extract_aspects(references, subtask_key, subtask_value) pred_aspect_terms = extract_aspects(predictions, subtask_key, subtask_value) truth_polarities = extract_aspects(references, subtask_key, "polarity") pred_polarities = extract_aspects(predictions, subtask_key, "polarity") # Define adjustment parameters special_token = "NONE" # For missing aspect terms sentiment_choices = [ "positive", "negative", "neutral", "conflict", ] # For missing polarities # Adjust the predictions to match the length of references adjusted_pred_terms = adjust_predictions( truth_aspect_terms, pred_aspect_terms, [special_token] ) adjusted_pred_polarities = adjust_predictions( truth_polarities, pred_polarities, sentiment_choices ) return ( flatten_list(truth_aspect_terms), flatten_list(adjusted_pred_terms), flatten_list(truth_polarities), flatten_list(adjusted_pred_polarities), ) def flatten_list(nested_list): """Flatten a nested list into a single-level list.""" return list(chain.from_iterable(nested_list)) def extract_pred_terms( all_predictions: List[Dict[str, Dict[str, str]]] ) -> List[List]: """Extract and organize predicted terms from the sentiment analysis results.""" pred_aspect_terms = [] for pred in all_predictions: terms = [term for cat in pred.values() for term in cat.keys()] pred_aspect_terms.append(terms) return pred_aspect_terms def merge_aspects_and_categories(aspects, categories): result = [] # Assuming both lists are of the same length and corresponding indices match for aspect, category in zip(aspects, categories): combined_entry = { "aspects": {"term": [], "polarity": []}, "category": {"category": [], "polarity": []}, } # Process aspect entries for cat_key, terms_dict in aspect.items(): for term, polarity in terms_dict.items(): combined_entry["aspects"]["term"].append(term) combined_entry["aspects"]["polarity"].append(polarity) # Add category details based on the aspect's key if available in categories if cat_key in category: combined_entry["category"]["category"].append(cat_key) combined_entry["category"]["polarity"].append( category[cat_key] ) # Ensure all keys in category are accounted for for cat_key, polarity in category.items(): if cat_key not in combined_entry["category"]["category"]: combined_entry["category"]["category"].append(cat_key) combined_entry["category"]["polarity"].append(polarity) result.append(combined_entry) return result