BenjiELCA commited on
Commit
556b7be
1 Parent(s): d7a4161

move the sentiment detection to OCR file

Browse files
modules/OCR.py CHANGED
@@ -20,6 +20,35 @@ VISION_ENDPOINT = os.getenv("VISION_ENDPOINT")
20
 
21
  VISION_KEY = json_data["VISION_KEY"]
22
  VISION_ENDPOINT = json_data["VISION_ENDPOINT"]"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def sample_ocr_image_file(image_data):
25
  # Set the values of your computer vision endpoint and computer vision key
 
20
 
21
  VISION_KEY = json_data["VISION_KEY"]
22
  VISION_ENDPOINT = json_data["VISION_ENDPOINT"]"""
23
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
24
+ import torch
25
+ import logging
26
+
27
+ # Suppress specific warnings from transformers
28
+ logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
29
+
30
+ # Function to initialize the model and tokenizer
31
+ def initialize_model():
32
+ tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
33
+ model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
34
+ return tokenizer, model
35
+
36
+ # Initialize model and tokenizer
37
+ tokenizer, emotion_model = initialize_model()
38
+
39
+ # Function to perform sentiment analysis and return the highest scoring emotion and its score between positive and negative
40
+ def analyze_sentiment(sentence, tokenizer=tokenizer, model=emotion_model):
41
+ inputs = tokenizer(sentence, return_tensors="pt")
42
+ outputs = model(**inputs)
43
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist()
44
+ labels = ["negative", "neutral", "positive"]
45
+ results = dict(zip(labels, probs))
46
+
47
+ # Filter out the neutral score and get the highest score between positive and negative
48
+ relevant_results = {k: results[k] for k in ["positive", "negative"]}
49
+ highest_emotion = max(relevant_results, key=relevant_results.get)
50
+ highest_score = relevant_results[highest_emotion]
51
+ return highest_emotion, highest_score
52
 
53
  def sample_ocr_image_file(image_data):
54
  # Set the values of your computer vision endpoint and computer vision key
modules/streamlit_utils.py CHANGED
@@ -33,8 +33,6 @@ import time
33
  from modules.toXML import get_size_elements
34
 
35
 
36
-
37
-
38
  def get_memory_usage():
39
  process = psutil.Process()
40
  mem_info = process.memory_info()
@@ -52,8 +50,6 @@ def read_xml_file(filepath):
52
  return file.read()
53
 
54
 
55
-
56
-
57
  # Suppress the symlink warning
58
  os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
59
 
 
33
  from modules.toXML import get_size_elements
34
 
35
 
 
 
36
  def get_memory_usage():
37
  process = psutil.Process()
38
  mem_info = process.memory_info()
 
50
  return file.read()
51
 
52
 
 
 
53
  # Suppress the symlink warning
54
  os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
55
 
modules/toWizard.py CHANGED
@@ -2,37 +2,8 @@ import xml.etree.ElementTree as ET
2
  from modules.utils import class_dict
3
  from xml.dom import minidom
4
  from modules.utils import error
5
- from transformers import pipeline
6
 
7
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
- import torch
9
- import logging
10
-
11
- # Suppress specific warnings from transformers
12
- logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
13
-
14
- # Function to initialize the model and tokenizer
15
- def initialize_model():
16
- tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
17
- model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
18
- return tokenizer, model
19
-
20
- # Function to perform sentiment analysis and return the highest scoring emotion and its score between positive and negative
21
- def analyze_sentiment(sentence, tokenizer, model):
22
- inputs = tokenizer(sentence, return_tensors="pt")
23
- outputs = model(**inputs)
24
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist()
25
- labels = ["negative", "neutral", "positive"]
26
- results = dict(zip(labels, probs))
27
-
28
- # Filter out the neutral score and get the highest score between positive and negative
29
- relevant_results = {k: results[k] for k in ["positive", "negative"]}
30
- highest_emotion = max(relevant_results, key=relevant_results.get)
31
- highest_score = relevant_results[highest_emotion]
32
- return highest_emotion, highest_score
33
-
34
- # Initialize model and tokenizer
35
- tokenizer, model = initialize_model()
36
 
37
  def rescale(scale, boxes):
38
  for i in range(len(boxes)):
@@ -149,7 +120,7 @@ def find_positive_end(bpmn_ids, links, text_mapping):
149
  continue
150
  if check_end(links[idx]) and (bpmn_id.split('_')[0] in ['event', 'message']):
151
  # Perform sentiment analysis and get the highest scoring emotion and its score between positive and negative
152
- highest_emotion, highest_score = analyze_sentiment(text_mapping[bpmn_id], tokenizer, model)
153
  emotion_data.append((bpmn_id, highest_emotion, highest_score))
154
 
155
  # Sort by emotion label with 'positive' first and 'negative' second,
@@ -161,7 +132,7 @@ def find_positive_end(bpmn_ids, links, text_mapping):
161
  def find_best_direction(texts_list):
162
  emotion_data = []
163
  for text in texts_list:
164
- highest_emotion, highest_score = analyze_sentiment(text, tokenizer, model)
165
  emotion_data.append((text, highest_emotion, highest_score))
166
 
167
  # Sort by emotion label with 'positive' first and 'negative' second,
 
2
  from modules.utils import class_dict
3
  from xml.dom import minidom
4
  from modules.utils import error
5
+ from modules.OCR import analyze_sentiment
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def rescale(scale, boxes):
9
  for i in range(len(boxes)):
 
120
  continue
121
  if check_end(links[idx]) and (bpmn_id.split('_')[0] in ['event', 'message']):
122
  # Perform sentiment analysis and get the highest scoring emotion and its score between positive and negative
123
+ highest_emotion, highest_score = analyze_sentiment(text_mapping[bpmn_id])
124
  emotion_data.append((bpmn_id, highest_emotion, highest_score))
125
 
126
  # Sort by emotion label with 'positive' first and 'negative' second,
 
132
  def find_best_direction(texts_list):
133
  emotion_data = []
134
  for text in texts_list:
135
+ highest_emotion, highest_score = analyze_sentiment(text)
136
  emotion_data.append((text, highest_emotion, highest_score))
137
 
138
  # Sort by emotion label with 'positive' first and 'negative' second,