sumonta056 commited on
Commit
88ba180
1 Parent(s): d8cdd17

feat: working with negatiive emotion

Browse files
__pycache__/emotion_utils.cpython-312.pyc ADDED
Binary file (2.87 kB). View file
 
app.py CHANGED
@@ -1,18 +1,17 @@
1
  import streamlit as st
2
- from transformers import BertForSequenceClassification, BertTokenizerFast, pipeline
 
3
 
4
  # Load the BERT model and tokenizer
5
  model_path = "./model/"
6
  model = BertForSequenceClassification.from_pretrained(model_path)
7
  tokenizer = BertTokenizerFast.from_pretrained(model_path)
8
- nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
9
 
10
  # Function to update sentiment analysis
11
  def analyze_sentiment(text):
12
  if text.strip():
13
- result = nlp(text)
14
- label = result[0]['label']
15
- score = result[0]['score']
16
  return label, score
17
  else:
18
  return None, None
@@ -69,7 +68,7 @@ st.markdown("""
69
  # Title and description
70
  st.title("🌟 G-Bert: Emotion Analysis")
71
  st.markdown("""
72
- G-Bert is a bangla sentiment analysis tool that uses a pre-trained BERT model to analyze the emotion of any bengali or religious (gita) text.
73
  It can detect emotions like Anger, Astonished, Optimistic, and Sadness with a confidence score.
74
  """)
75
  # Text input
 
1
  import streamlit as st
2
+ from transformers import BertForSequenceClassification, BertTokenizerFast
3
+ from emotion_utils import predict # Custom module for prediction
4
 
5
  # Load the BERT model and tokenizer
6
  model_path = "./model/"
7
  model = BertForSequenceClassification.from_pretrained(model_path)
8
  tokenizer = BertTokenizerFast.from_pretrained(model_path)
 
9
 
10
  # Function to update sentiment analysis
11
  def analyze_sentiment(text):
12
  if text.strip():
13
+ probs, _, label = predict(text, model, tokenizer)
14
+ score = probs.max().item() # Get the highest probability score
 
15
  return label, score
16
  else:
17
  return None, None
 
68
  # Title and description
69
  st.title("🌟 G-Bert: Emotion Analysis")
70
  st.markdown("""
71
+ G-Bert is a Bangla sentiment analysis tool that uses a pre-trained BERT model to analyze the emotion of any Bengali or religious (Gita) text.
72
  It can detect emotions like Anger, Astonished, Optimistic, and Sadness with a confidence score.
73
  """)
74
  # Text input
emotion_utils.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ def load_words_from_file(file_path):
4
+ """
5
+ Load words from a text file and return them as a list.
6
+ Each word should be on a separate line in the text file.
7
+ """
8
+ with open(file_path, 'r', encoding='utf-8') as file:
9
+ words = file.read().splitlines()
10
+ return words
11
+
12
+ def preprocess_with_negation_v2(text):
13
+ from emotion_utils import load_words_from_file
14
+ negation_words = load_words_from_file('./model/stopwords/negation_words.txt')
15
+ emotion_words = load_words_from_file('./model/stopwords/emotion_words.txt')
16
+
17
+ # Tokenize the sentence into words
18
+ words = text.split()
19
+
20
+ modified_words = words[:] # Create a copy to modify
21
+
22
+ # Iterate through all words to detect negation-emotion pairs
23
+ for i, word in enumerate(words):
24
+ if word in negation_words:
25
+ # Check the previous 3 words for an emotion word
26
+ for j in range(1, 4):
27
+ if i - j >= 0 and words[i - j] in emotion_words:
28
+ # Mark the detected emotion with a negation label
29
+ modified_words[i - j] = f"{words[i - j]} (Negative context)"
30
+ break
31
+
32
+ # Reconstruct the text
33
+ return " ".join(modified_words)
34
+
35
+
36
+ def predict(text, model, tokenizer):
37
+ from emotion_utils import preprocess_with_negation_v2
38
+ """
39
+ Predict the sentiment for a given text with advanced negation handling.
40
+ """
41
+ # Ensure the model is on the correct device
42
+ device = "cuda" if torch.cuda.is_available() else "cpu"
43
+ model = model.to(device)
44
+
45
+ # Preprocess the text for advanced negation handling
46
+ processed_text = preprocess_with_negation_v2(text)
47
+ # print(processed_text)
48
+
49
+ # Tokenize the text
50
+ inputs = tokenizer(
51
+ processed_text,
52
+ padding=True,
53
+ truncation=True,
54
+ max_length=512,
55
+ return_tensors="pt"
56
+ ).to(device)
57
+
58
+ # Perform inference
59
+ with torch.no_grad():
60
+ outputs = model(**inputs)
61
+
62
+ # Compute probabilities
63
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
64
+
65
+ # Get the class with the highest probability
66
+ pred_label_idx = probs.argmax(dim=-1).item()
67
+
68
+ # Map the index to the label
69
+ pred_label = model.config.id2label[pred_label_idx]
70
+
71
+ # Adjust prediction for negation context
72
+ negation_map = {
73
+ "Sadness": "Optimistic",
74
+ "Optimistic": "Sadness",
75
+ "Anger": "Optimistic",
76
+ }
77
+ if "(Negative context)" in processed_text:
78
+ pred_label = negation_map.get(pred_label, pred_label)
79
+
80
+ return probs, pred_label_idx, pred_label
model/README.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Gita Bengali Sentiment Predicition
3
- emoji: 🦀
4
- colorFrom: green
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.40.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model/stopwords/emotion_words.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ভালবাসা
2
+ ভালোবাসি
3
+ ভালোবাসো
4
+ ভালোবাসে
5
+ ভালো
6
+ খারাপ
7
+ দুঃখ
8
+ দুঃখী
9
+ আনন্দ
10
+ রাগ
11
+ হতাশ
12
+ হতাশা
13
+ অপমান
14
+ অপমানিত
15
+ হেলা
16
+ অবহেলা
17
+ অবহেলিত
18
+ হিংসা
19
+ শান্ত
20
+ শান্তি
21
+ প্রেম
22
+ ভয়
23
+ ঘৃণা
24
+ কষ্ট
25
+ বিজয়
26
+ সুখ
27
+ সুখী
28
+ হাসি
29
+ যত্ন
30
+ মমতা
31
+ স্বপ্ন
32
+ খুশী
33
+ খুশি
34
+ সন্তুষ্ট
35
+ অসন্তুষ্ট
36
+ আশা
37
+ আশাবাদ
38
+ প্রেরণা
39
+ উন্নতি
40
+ সাফল্য
41
+ সম্ভাবনা
42
+ বিজয়
43
+ নির্ভরতা
44
+ সাহস
45
+ আত্মবিশ্বাস
46
+ আনন্দদায়ক
47
+ পরিতোষ
48
+ অর্জন
49
+ সৌভাগ্য
50
+ খুশিময়
51
+ উদ্যম
52
+ উদ্ভাবনী
53
+ আনন্দময়
54
+ ভরসা
55
+ নিরাশা
56
+ হতাশাজনক
57
+ কষ্টকর
58
+ দুঃখজনক
59
+ অভিমান
60
+ ব্যথা
61
+ বিচ্ছেদ
62
+ নীরবতা
63
+ বেদনা
64
+ শূন্যতা
65
+ হাহাকার
66
+ বিষাদ
67
+ অশ্রু
68
+ পীড়া
69
+ ক্ষতি
70
+ বিপর্যয়
71
+ অনুশোচনা
72
+ দুঃসহ
73
+ অস্বস্তি
74
+ ভারাক্রান্ত
model/stopwords/negation_words.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ না
2
+ নেই
3
+ নয়
4
+ নাই