HalteroXHunter commited on
Commit
22875cb
1 Parent(s): 22bd812

add extras

Browse files
Files changed (3) hide show
  1. absa_evaluator.py +1 -1
  2. app.py +6 -0
  3. preprocessing.py +115 -0
absa_evaluator.py CHANGED
@@ -4,7 +4,7 @@ import evaluate
4
  from datasets import Features, Sequence, Value
5
  from sklearn.metrics import accuracy_score
6
 
7
- from research_eval.utils.preprocessing import absa_term_preprocess
8
 
9
  _CITATION = """
10
  """
 
4
  from datasets import Features, Sequence, Value
5
  from sklearn.metrics import accuracy_score
6
 
7
+ from preprocessing import absa_term_preprocess
8
 
9
  _CITATION = """
10
  """
app.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import evaluate
2
+ from evaluate.utils import launch_gradio_widget
3
+
4
+
5
+ module = evaluate.load("absa_evaluator")
6
+ launch_gradio_widget(module)
preprocessing.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from itertools import chain
2
+ from random import choice
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+
5
+ from datasets import Dataset
6
+
7
+
8
+ def adjust_predictions(refs, preds, choices):
9
+ """Adjust predictions to match the length of references with either a special token or random choice."""
10
+ adjusted_preds = []
11
+ for ref, pred in zip(refs, preds):
12
+ if len(pred) < len(ref):
13
+ missing_count = len(ref) - len(pred)
14
+ pred.extend([choice(choices) for _ in range(missing_count)])
15
+ adjusted_preds.append(pred)
16
+ return adjusted_preds
17
+
18
+
19
+ def extract_aspects(data, specific_key, specific_val):
20
+ """Extracts and returns a list of specified aspect details from the nested 'aspects' data."""
21
+ return [item[specific_key][specific_val] for item in data]
22
+
23
+
24
+ def absa_term_preprocess(references, predictions, subtask_key, subtask_value):
25
+ """
26
+ Preprocess the terms and polarities for aspect-based sentiment analysis.
27
+
28
+ Args:
29
+ references (List[Dict]): A list of dictionaries containing the actual terms and polarities under 'aspects'.
30
+ predictions (List[Dict]): A list of dictionaries containing predicted aspect categories to terms and their sentiments.
31
+
32
+ Returns:
33
+ Tuple[List[str], List[str], List[str], List[str]]: A tuple containing lists of true aspect terms,
34
+ adjusted predicted aspect terms, true polarities, and adjusted predicted polarities.
35
+ """
36
+
37
+ # Extract aspect terms and polarities
38
+ truth_aspect_terms = extract_aspects(references, subtask_key, subtask_value)
39
+ pred_aspect_terms = extract_aspects(predictions, subtask_key, subtask_value)
40
+ truth_polarities = extract_aspects(references, subtask_key, "polarity")
41
+ pred_polarities = extract_aspects(predictions, subtask_key, "polarity")
42
+
43
+ # Define adjustment parameters
44
+ special_token = "NONE" # For missing aspect terms
45
+ sentiment_choices = [
46
+ "positive",
47
+ "negative",
48
+ "neutral",
49
+ "conflict",
50
+ ] # For missing polarities
51
+
52
+ # Adjust the predictions to match the length of references
53
+ adjusted_pred_terms = adjust_predictions(
54
+ truth_aspect_terms, pred_aspect_terms, [special_token]
55
+ )
56
+ adjusted_pred_polarities = adjust_predictions(
57
+ truth_polarities, pred_polarities, sentiment_choices
58
+ )
59
+
60
+ return (
61
+ flatten_list(truth_aspect_terms),
62
+ flatten_list(adjusted_pred_terms),
63
+ flatten_list(truth_polarities),
64
+ flatten_list(adjusted_pred_polarities),
65
+ )
66
+
67
+
68
+ def flatten_list(nested_list):
69
+ """Flatten a nested list into a single-level list."""
70
+ return list(chain.from_iterable(nested_list))
71
+
72
+
73
+ def extract_pred_terms(
74
+ all_predictions: List[Dict[str, Dict[str, str]]]
75
+ ) -> List[List]:
76
+ """Extract and organize predicted terms from the sentiment analysis results."""
77
+ pred_aspect_terms = []
78
+ for pred in all_predictions:
79
+ terms = [term for cat in pred.values() for term in cat.keys()]
80
+ pred_aspect_terms.append(terms)
81
+ return pred_aspect_terms
82
+
83
+
84
+ def merge_aspects_and_categories(aspects, categories):
85
+ result = []
86
+
87
+ # Assuming both lists are of the same length and corresponding indices match
88
+ for aspect, category in zip(aspects, categories):
89
+ combined_entry = {
90
+ "aspects": {"term": [], "polarity": []},
91
+ "category": {"category": [], "polarity": []},
92
+ }
93
+
94
+ # Process aspect entries
95
+ for cat_key, terms_dict in aspect.items():
96
+ for term, polarity in terms_dict.items():
97
+ combined_entry["aspects"]["term"].append(term)
98
+ combined_entry["aspects"]["polarity"].append(polarity)
99
+
100
+ # Add category details based on the aspect's key if available in categories
101
+ if cat_key in category:
102
+ combined_entry["category"]["category"].append(cat_key)
103
+ combined_entry["category"]["polarity"].append(
104
+ category[cat_key]
105
+ )
106
+
107
+ # Ensure all keys in category are accounted for
108
+ for cat_key, polarity in category.items():
109
+ if cat_key not in combined_entry["category"]["category"]:
110
+ combined_entry["category"]["category"].append(cat_key)
111
+ combined_entry["category"]["polarity"].append(polarity)
112
+
113
+ result.append(combined_entry)
114
+
115
+ return result