Spaces:

ELCA-SA
/

sketch-to-BPMN

Running

App Files Files Community

BenjiELCA commited on Jul 23

Commit

556b7be

•

1 Parent(s): d7a4161

move the sentiment detection to OCR file

Browse files

Files changed (3) hide show

modules/OCR.py +29 -0
modules/streamlit_utils.py +0 -4
modules/toWizard.py +3 -32

modules/OCR.py CHANGED Viewed

@@ -20,6 +20,35 @@ VISION_ENDPOINT = os.getenv("VISION_ENDPOINT")
 VISION_KEY = json_data["VISION_KEY"]
 VISION_ENDPOINT = json_data["VISION_ENDPOINT"]"""
 def sample_ocr_image_file(image_data):
     # Set the values of your computer vision endpoint and computer vision key

 VISION_KEY = json_data["VISION_KEY"]
 VISION_ENDPOINT = json_data["VISION_ENDPOINT"]"""
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import logging
+# Suppress specific warnings from transformers
+logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
+# Function to initialize the model and tokenizer
+def initialize_model():
+    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
+    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
+    return tokenizer, model
+# Initialize model and tokenizer
+tokenizer, emotion_model = initialize_model()
+# Function to perform sentiment analysis and return the highest scoring emotion and its score between positive and negative
+def analyze_sentiment(sentence, tokenizer=tokenizer, model=emotion_model):
+    inputs = tokenizer(sentence, return_tensors="pt")
+    outputs = model(**inputs)
+    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist()
+    labels = ["negative", "neutral", "positive"]
+    results = dict(zip(labels, probs))
+    # Filter out the neutral score and get the highest score between positive and negative
+    relevant_results = {k: results[k] for k in ["positive", "negative"]}
+    highest_emotion = max(relevant_results, key=relevant_results.get)
+    highest_score = relevant_results[highest_emotion]
+    return highest_emotion, highest_score
 def sample_ocr_image_file(image_data):
     # Set the values of your computer vision endpoint and computer vision key

modules/streamlit_utils.py CHANGED Viewed

@@ -33,8 +33,6 @@ import time
 from modules.toXML import get_size_elements
 def get_memory_usage():
     process = psutil.Process()
     mem_info = process.memory_info()
@@ -52,8 +50,6 @@ def read_xml_file(filepath):
         return file.read()
 # Suppress the symlink warning
 os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'

 from modules.toXML import get_size_elements
 def get_memory_usage():
     process = psutil.Process()
     mem_info = process.memory_info()
         return file.read()
 # Suppress the symlink warning
 os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'

modules/toWizard.py CHANGED Viewed

@@ -2,37 +2,8 @@ import xml.etree.ElementTree as ET
 from modules.utils import class_dict
 from xml.dom import minidom
 from modules.utils import error
-from transformers import pipeline
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-import logging
-# Suppress specific warnings from transformers
-logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
-# Function to initialize the model and tokenizer
-def initialize_model():
-    tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
-    model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
-    return tokenizer, model
-# Function to perform sentiment analysis and return the highest scoring emotion and its score between positive and negative
-def analyze_sentiment(sentence, tokenizer, model):
-    inputs = tokenizer(sentence, return_tensors="pt")
-    outputs = model(**inputs)
-    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).squeeze().tolist()
-    labels = ["negative", "neutral", "positive"]
-    results = dict(zip(labels, probs))
-    # Filter out the neutral score and get the highest score between positive and negative
-    relevant_results = {k: results[k] for k in ["positive", "negative"]}
-    highest_emotion = max(relevant_results, key=relevant_results.get)
-    highest_score = relevant_results[highest_emotion]
-    return highest_emotion, highest_score
-# Initialize model and tokenizer
-tokenizer, model = initialize_model()
 def rescale(scale, boxes):
     for i in range(len(boxes)):
@@ -149,7 +120,7 @@ def find_positive_end(bpmn_ids, links, text_mapping):
             continue
         if check_end(links[idx]) and (bpmn_id.split('_')[0] in ['event', 'message']):
             # Perform sentiment analysis and get the highest scoring emotion and its score between positive and negative
-            highest_emotion, highest_score = analyze_sentiment(text_mapping[bpmn_id], tokenizer, model)
             emotion_data.append((bpmn_id, highest_emotion, highest_score))
     # Sort by emotion label with 'positive' first and 'negative' second,
@@ -161,7 +132,7 @@ def find_positive_end(bpmn_ids, links, text_mapping):
 def find_best_direction(texts_list):
     emotion_data = []
     for text in texts_list:
-        highest_emotion, highest_score = analyze_sentiment(text, tokenizer, model)
         emotion_data.append((text, highest_emotion, highest_score))
     # Sort by emotion label with 'positive' first and 'negative' second,

 from modules.utils import class_dict
 from xml.dom import minidom
 from modules.utils import error
+from modules.OCR import analyze_sentiment
 def rescale(scale, boxes):
     for i in range(len(boxes)):
             continue
         if check_end(links[idx]) and (bpmn_id.split('_')[0] in ['event', 'message']):
             # Perform sentiment analysis and get the highest scoring emotion and its score between positive and negative
+            highest_emotion, highest_score = analyze_sentiment(text_mapping[bpmn_id])
             emotion_data.append((bpmn_id, highest_emotion, highest_score))
     # Sort by emotion label with 'positive' first and 'negative' second,
 def find_best_direction(texts_list):
     emotion_data = []
     for text in texts_list:
+        highest_emotion, highest_score = analyze_sentiment(text)
         emotion_data.append((text, highest_emotion, highest_score))
     # Sort by emotion label with 'positive' first and 'negative' second,