Spaces:
Sleeping
Sleeping
File size: 4,376 Bytes
22875cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
from itertools import chain
from random import choice
from typing import Any, Dict, List, Optional, Tuple
from datasets import Dataset
def adjust_predictions(refs, preds, choices):
"""Adjust predictions to match the length of references with either a special token or random choice."""
adjusted_preds = []
for ref, pred in zip(refs, preds):
if len(pred) < len(ref):
missing_count = len(ref) - len(pred)
pred.extend([choice(choices) for _ in range(missing_count)])
adjusted_preds.append(pred)
return adjusted_preds
def extract_aspects(data, specific_key, specific_val):
"""Extracts and returns a list of specified aspect details from the nested 'aspects' data."""
return [item[specific_key][specific_val] for item in data]
def absa_term_preprocess(references, predictions, subtask_key, subtask_value):
"""
Preprocess the terms and polarities for aspect-based sentiment analysis.
Args:
references (List[Dict]): A list of dictionaries containing the actual terms and polarities under 'aspects'.
predictions (List[Dict]): A list of dictionaries containing predicted aspect categories to terms and their sentiments.
Returns:
Tuple[List[str], List[str], List[str], List[str]]: A tuple containing lists of true aspect terms,
adjusted predicted aspect terms, true polarities, and adjusted predicted polarities.
"""
# Extract aspect terms and polarities
truth_aspect_terms = extract_aspects(references, subtask_key, subtask_value)
pred_aspect_terms = extract_aspects(predictions, subtask_key, subtask_value)
truth_polarities = extract_aspects(references, subtask_key, "polarity")
pred_polarities = extract_aspects(predictions, subtask_key, "polarity")
# Define adjustment parameters
special_token = "NONE" # For missing aspect terms
sentiment_choices = [
"positive",
"negative",
"neutral",
"conflict",
] # For missing polarities
# Adjust the predictions to match the length of references
adjusted_pred_terms = adjust_predictions(
truth_aspect_terms, pred_aspect_terms, [special_token]
)
adjusted_pred_polarities = adjust_predictions(
truth_polarities, pred_polarities, sentiment_choices
)
return (
flatten_list(truth_aspect_terms),
flatten_list(adjusted_pred_terms),
flatten_list(truth_polarities),
flatten_list(adjusted_pred_polarities),
)
def flatten_list(nested_list):
"""Flatten a nested list into a single-level list."""
return list(chain.from_iterable(nested_list))
def extract_pred_terms(
all_predictions: List[Dict[str, Dict[str, str]]]
) -> List[List]:
"""Extract and organize predicted terms from the sentiment analysis results."""
pred_aspect_terms = []
for pred in all_predictions:
terms = [term for cat in pred.values() for term in cat.keys()]
pred_aspect_terms.append(terms)
return pred_aspect_terms
def merge_aspects_and_categories(aspects, categories):
result = []
# Assuming both lists are of the same length and corresponding indices match
for aspect, category in zip(aspects, categories):
combined_entry = {
"aspects": {"term": [], "polarity": []},
"category": {"category": [], "polarity": []},
}
# Process aspect entries
for cat_key, terms_dict in aspect.items():
for term, polarity in terms_dict.items():
combined_entry["aspects"]["term"].append(term)
combined_entry["aspects"]["polarity"].append(polarity)
# Add category details based on the aspect's key if available in categories
if cat_key in category:
combined_entry["category"]["category"].append(cat_key)
combined_entry["category"]["polarity"].append(
category[cat_key]
)
# Ensure all keys in category are accounted for
for cat_key, polarity in category.items():
if cat_key not in combined_entry["category"]["category"]:
combined_entry["category"]["category"].append(cat_key)
combined_entry["category"]["polarity"].append(polarity)
result.append(combined_entry)
return result
|