|
import torch |
|
|
|
def load_words_from_file(file_path): |
|
""" |
|
Load words from a text file and return them as a list. |
|
Each word should be on a separate line in the text file. |
|
""" |
|
with open(file_path, 'r', encoding='utf-8') as file: |
|
words = file.read().splitlines() |
|
return words |
|
|
|
def preprocess_with_negation_v2(text): |
|
from emotion_utils import load_words_from_file |
|
negation_words = load_words_from_file('./model/stopwords/negation_words.txt') |
|
emotion_words = load_words_from_file('./model/stopwords/emotion_words.txt') |
|
|
|
|
|
words = text.split() |
|
|
|
modified_words = words[:] |
|
|
|
|
|
for i, word in enumerate(words): |
|
if word in negation_words: |
|
|
|
for j in range(1, 4): |
|
if i - j >= 0 and words[i - j] in emotion_words: |
|
|
|
modified_words[i - j] = f"{words[i - j]} (Negative context)" |
|
break |
|
|
|
|
|
return " ".join(modified_words) |
|
|
|
|
|
def predict(text, model, tokenizer): |
|
from emotion_utils import preprocess_with_negation_v2 |
|
""" |
|
Predict the sentiment for a given text with advanced negation handling. |
|
""" |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model = model.to(device) |
|
|
|
|
|
processed_text = preprocess_with_negation_v2(text) |
|
|
|
|
|
|
|
inputs = tokenizer( |
|
processed_text, |
|
padding=True, |
|
truncation=True, |
|
max_length=512, |
|
return_tensors="pt" |
|
).to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
|
|
probs = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
|
|
|
|
pred_label_idx = probs.argmax(dim=-1).item() |
|
|
|
|
|
pred_label = model.config.id2label[pred_label_idx] |
|
|
|
|
|
negation_map = { |
|
"Sadness": "Optimistic", |
|
"Optimistic": "Sadness", |
|
"Anger": "Optimistic", |
|
} |
|
if "(Negative context)" in processed_text: |
|
pred_label = negation_map.get(pred_label, pred_label) |
|
|
|
return probs, pred_label_idx, pred_label |
|
|