sumonta056
commited on
Commit
•
88ba180
1
Parent(s):
d8cdd17
feat: working with negatiive emotion
Browse files- __pycache__/emotion_utils.cpython-312.pyc +0 -0
- app.py +5 -6
- emotion_utils.py +80 -0
- model/README.md +0 -13
- model/stopwords/emotion_words.txt +74 -0
- model/stopwords/negation_words.txt +4 -0
__pycache__/emotion_utils.cpython-312.pyc
ADDED
Binary file (2.87 kB). View file
|
|
app.py
CHANGED
@@ -1,18 +1,17 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import BertForSequenceClassification, BertTokenizerFast
|
|
|
3 |
|
4 |
# Load the BERT model and tokenizer
|
5 |
model_path = "./model/"
|
6 |
model = BertForSequenceClassification.from_pretrained(model_path)
|
7 |
tokenizer = BertTokenizerFast.from_pretrained(model_path)
|
8 |
-
nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
9 |
|
10 |
# Function to update sentiment analysis
|
11 |
def analyze_sentiment(text):
|
12 |
if text.strip():
|
13 |
-
|
14 |
-
|
15 |
-
score = result[0]['score']
|
16 |
return label, score
|
17 |
else:
|
18 |
return None, None
|
@@ -69,7 +68,7 @@ st.markdown("""
|
|
69 |
# Title and description
|
70 |
st.title("🌟 G-Bert: Emotion Analysis")
|
71 |
st.markdown("""
|
72 |
-
G-Bert is a
|
73 |
It can detect emotions like Anger, Astonished, Optimistic, and Sadness with a confidence score.
|
74 |
""")
|
75 |
# Text input
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import BertForSequenceClassification, BertTokenizerFast
|
3 |
+
from emotion_utils import predict # Custom module for prediction
|
4 |
|
5 |
# Load the BERT model and tokenizer
|
6 |
model_path = "./model/"
|
7 |
model = BertForSequenceClassification.from_pretrained(model_path)
|
8 |
tokenizer = BertTokenizerFast.from_pretrained(model_path)
|
|
|
9 |
|
10 |
# Function to update sentiment analysis
|
11 |
def analyze_sentiment(text):
|
12 |
if text.strip():
|
13 |
+
probs, _, label = predict(text, model, tokenizer)
|
14 |
+
score = probs.max().item() # Get the highest probability score
|
|
|
15 |
return label, score
|
16 |
else:
|
17 |
return None, None
|
|
|
68 |
# Title and description
|
69 |
st.title("🌟 G-Bert: Emotion Analysis")
|
70 |
st.markdown("""
|
71 |
+
G-Bert is a Bangla sentiment analysis tool that uses a pre-trained BERT model to analyze the emotion of any Bengali or religious (Gita) text.
|
72 |
It can detect emotions like Anger, Astonished, Optimistic, and Sadness with a confidence score.
|
73 |
""")
|
74 |
# Text input
|
emotion_utils.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
def load_words_from_file(file_path):
|
4 |
+
"""
|
5 |
+
Load words from a text file and return them as a list.
|
6 |
+
Each word should be on a separate line in the text file.
|
7 |
+
"""
|
8 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
9 |
+
words = file.read().splitlines()
|
10 |
+
return words
|
11 |
+
|
12 |
+
def preprocess_with_negation_v2(text):
|
13 |
+
from emotion_utils import load_words_from_file
|
14 |
+
negation_words = load_words_from_file('./model/stopwords/negation_words.txt')
|
15 |
+
emotion_words = load_words_from_file('./model/stopwords/emotion_words.txt')
|
16 |
+
|
17 |
+
# Tokenize the sentence into words
|
18 |
+
words = text.split()
|
19 |
+
|
20 |
+
modified_words = words[:] # Create a copy to modify
|
21 |
+
|
22 |
+
# Iterate through all words to detect negation-emotion pairs
|
23 |
+
for i, word in enumerate(words):
|
24 |
+
if word in negation_words:
|
25 |
+
# Check the previous 3 words for an emotion word
|
26 |
+
for j in range(1, 4):
|
27 |
+
if i - j >= 0 and words[i - j] in emotion_words:
|
28 |
+
# Mark the detected emotion with a negation label
|
29 |
+
modified_words[i - j] = f"{words[i - j]} (Negative context)"
|
30 |
+
break
|
31 |
+
|
32 |
+
# Reconstruct the text
|
33 |
+
return " ".join(modified_words)
|
34 |
+
|
35 |
+
|
36 |
+
def predict(text, model, tokenizer):
|
37 |
+
from emotion_utils import preprocess_with_negation_v2
|
38 |
+
"""
|
39 |
+
Predict the sentiment for a given text with advanced negation handling.
|
40 |
+
"""
|
41 |
+
# Ensure the model is on the correct device
|
42 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
43 |
+
model = model.to(device)
|
44 |
+
|
45 |
+
# Preprocess the text for advanced negation handling
|
46 |
+
processed_text = preprocess_with_negation_v2(text)
|
47 |
+
# print(processed_text)
|
48 |
+
|
49 |
+
# Tokenize the text
|
50 |
+
inputs = tokenizer(
|
51 |
+
processed_text,
|
52 |
+
padding=True,
|
53 |
+
truncation=True,
|
54 |
+
max_length=512,
|
55 |
+
return_tensors="pt"
|
56 |
+
).to(device)
|
57 |
+
|
58 |
+
# Perform inference
|
59 |
+
with torch.no_grad():
|
60 |
+
outputs = model(**inputs)
|
61 |
+
|
62 |
+
# Compute probabilities
|
63 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
64 |
+
|
65 |
+
# Get the class with the highest probability
|
66 |
+
pred_label_idx = probs.argmax(dim=-1).item()
|
67 |
+
|
68 |
+
# Map the index to the label
|
69 |
+
pred_label = model.config.id2label[pred_label_idx]
|
70 |
+
|
71 |
+
# Adjust prediction for negation context
|
72 |
+
negation_map = {
|
73 |
+
"Sadness": "Optimistic",
|
74 |
+
"Optimistic": "Sadness",
|
75 |
+
"Anger": "Optimistic",
|
76 |
+
}
|
77 |
+
if "(Negative context)" in processed_text:
|
78 |
+
pred_label = negation_map.get(pred_label, pred_label)
|
79 |
+
|
80 |
+
return probs, pred_label_idx, pred_label
|
model/README.md
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Gita Bengali Sentiment Predicition
|
3 |
-
emoji: 🦀
|
4 |
-
colorFrom: green
|
5 |
-
colorTo: blue
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.40.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: apache-2.0
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model/stopwords/emotion_words.txt
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ভালবাসা
|
2 |
+
ভালোবাসি
|
3 |
+
ভালোবাসো
|
4 |
+
ভালোবাসে
|
5 |
+
ভালো
|
6 |
+
খারাপ
|
7 |
+
দুঃখ
|
8 |
+
দুঃখী
|
9 |
+
আনন্দ
|
10 |
+
রাগ
|
11 |
+
হতাশ
|
12 |
+
হতাশা
|
13 |
+
অপমান
|
14 |
+
অপমানিত
|
15 |
+
হেলা
|
16 |
+
অবহেলা
|
17 |
+
অবহেলিত
|
18 |
+
হিংসা
|
19 |
+
শান্ত
|
20 |
+
শান্তি
|
21 |
+
প্রেম
|
22 |
+
ভয়
|
23 |
+
ঘৃণা
|
24 |
+
কষ্ট
|
25 |
+
বিজয়
|
26 |
+
সুখ
|
27 |
+
সুখী
|
28 |
+
হাসি
|
29 |
+
যত্ন
|
30 |
+
মমতা
|
31 |
+
স্বপ্ন
|
32 |
+
খুশী
|
33 |
+
খুশি
|
34 |
+
সন্তুষ্ট
|
35 |
+
অসন্তুষ্ট
|
36 |
+
আশা
|
37 |
+
আশাবাদ
|
38 |
+
প্রেরণা
|
39 |
+
উন্নতি
|
40 |
+
সাফল্য
|
41 |
+
সম্ভাবনা
|
42 |
+
বিজয়
|
43 |
+
নির্ভরতা
|
44 |
+
সাহস
|
45 |
+
আত্মবিশ্বাস
|
46 |
+
আনন্দদায়ক
|
47 |
+
পরিতোষ
|
48 |
+
অর্জন
|
49 |
+
সৌভাগ্য
|
50 |
+
খুশিময়
|
51 |
+
উদ্যম
|
52 |
+
উদ্ভাবনী
|
53 |
+
আনন্দময়
|
54 |
+
ভরসা
|
55 |
+
নিরাশা
|
56 |
+
হতাশাজনক
|
57 |
+
কষ্টকর
|
58 |
+
দুঃখজনক
|
59 |
+
অভিমান
|
60 |
+
ব্যথা
|
61 |
+
বিচ্ছেদ
|
62 |
+
নীরবতা
|
63 |
+
বেদনা
|
64 |
+
শূন্যতা
|
65 |
+
হাহাকার
|
66 |
+
বিষাদ
|
67 |
+
অশ্রু
|
68 |
+
পীড়া
|
69 |
+
ক্ষতি
|
70 |
+
বিপর্যয়
|
71 |
+
অনুশোচনা
|
72 |
+
দুঃসহ
|
73 |
+
অস্বস্তি
|
74 |
+
ভারাক্রান্ত
|
model/stopwords/negation_words.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
না
|
2 |
+
নেই
|
3 |
+
নয়
|
4 |
+
নাই
|