Spaces:

miesnerjacob
/

Multi-task-NLP

Running

App Files Files Community

miesnerjacob commited on Jul 25, 2022

Commit

a00f9ba

•

1 Parent(s): 59fcc9f

added class and method docstrings

Browse files

Files changed (5) hide show

emotion_detection.py +12 -10
keyword_extraction.py +52 -49
named_entity_recognition.py +10 -7
part_of_speech_tagging.py +5 -5
sentiment_analysis.py +16 -14

emotion_detection.py CHANGED Viewed

@@ -5,7 +5,8 @@ import pandas as pd
 class EmotionDetection:
-    """ Emotion Detection on text data
     Attributes:
         tokenizer: An instance of Hugging Face Tokenizer
@@ -21,13 +22,13 @@ class EmotionDetection:
     def justify(self, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         word_attributions = self.explainer(text)
@@ -37,13 +38,13 @@ class EmotionDetection:
     def classify(self, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
@@ -57,13 +58,14 @@ class EmotionDetection:
     def run(self, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         preds = self.classify(text)

 class EmotionDetection:
+    """
+    Emotion Detection on text data.
     Attributes:
         tokenizer: An instance of Hugging Face Tokenizer
     def justify(self, text):
         """
+        Get html annotation for displaying emotion justification over text.
         Parameters:
+            text (str): The user input string to emotion justification
         Returns:
+            html (hmtl): html object for plotting emotion prediction justification
         """
         word_attributions = self.explainer(text)
     def classify(self, text):
         """
+        Recognize Emotion in text.
         Parameters:
+            text (str): The user input string to perform emotion classification on
         Returns:
+            predictions (str): The predicted probabilities for emotion classes
         """
         tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
     def run(self, text):
         """
+        Classify and Justify Emotion in text.
         Parameters:
+            text (str): The user input string to perform emotion classification on
         Returns:
+            predictions (str): The predicted probabilities for emotion classes
+            html (hmtl): html object for plotting emotion prediction justification
         """
         preds = self.classify(text)

keyword_extraction.py CHANGED Viewed

@@ -1,12 +1,11 @@
-import spacy
-import pytextrank
 import re
 from operator import itemgetter
 import en_core_web_sm
 class KeywordExtractor:
-    """ Keyword Extraction on text data
     Attributes:
         nlp: An instance English pipeline optimized for CPU for spacy
@@ -18,13 +17,13 @@ class KeywordExtractor:
     def get_keywords(self, text, max_keywords):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         doc = self.nlp(text)
@@ -33,41 +32,43 @@ class KeywordExtractor:
         return kws
-    def get_keyword_indicies(self, string_list, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
-        out = []
-        for s in string_list:
-            indicies = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
-            out.extend(indicies)
-        return out
-    def merge_overlapping_indicies(self, indicies):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         # Sort the array on the basis of start values of intervals.
-        indicies.sort()
         stack = []
         # insert first interval into stack
-        stack.append(indicies[0])
-        for i in indicies[1:]:
             # Check for overlapping interval,
             # if interval overlap
             if (stack[-1][0] <= i[0] <= stack[-1][-1]) or (stack[-1][-1] == i[0]-1):
@@ -76,69 +77,71 @@ class KeywordExtractor:
                 stack.append(i)
         return stack
-    def merge_until_finished(self, indicies):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
-        len_indicies = 0
         while True:
-            merged = self.merge_overlapping_indicies(indicies)
-            if len_indicies == len(merged):
-                out_indicies = sorted(merged, key=itemgetter(0))
-                return out_indicies
             else:
-                len_indicies = len(merged)
-    def get_annotation(self, text, indicies, kws):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         arr = list(text)
-        for idx in sorted(indicies, reverse=True):
             arr.insert(idx[0], "<kw>")
             arr.insert(idx[1]+1, "XXXxxxXXXxxxXXX <kw>")
-        annotation = ''.join(arr)
-        split = annotation.split('<kw>')
-        final_annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
         kws_check = []
-        for i in final_annotation:
             if type(i) is tuple:
                 kws_check.append(i[0])
-        return final_annotation
     def generate(self, text, max_keywords):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         kws = self.get_keywords(text, max_keywords)
-        indicies = list(self.get_keyword_indicies(kws, text))
-        if indicies:
-            indicies_merged = self.merge_until_finished(indicies)
-            annotation = self.get_annotation(text, indicies_merged, kws)
         else:
             annotation = None

 import re
 from operator import itemgetter
 import en_core_web_sm
 class KeywordExtractor:
+    """
+    Keyword Extraction on text data
     Attributes:
         nlp: An instance English pipeline optimized for CPU for spacy
     def get_keywords(self, text, max_keywords):
         """
+        Extract keywords from text.
         Parameters:
+            text (str): The user input string to extract keywords from
         Returns:
+            kws (list): list of extracted keywords
         """
         doc = self.nlp(text)
         return kws
+    def get_keyword_indices(self, kws, text):
         """
+        Extract keywords from text.
         Parameters:
+            kws (list): list of extracted keywords
+            text (str): The user input string to extract keywords from
         Returns:
+            keyword_indices (list): list of indices for keyword boundaries in text
         """
+        keyword_indices = []
+        for s in kws:
+            indices = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
+            keyword_indices.extend(indices)
+        return keyword_indices
+    def merge_overlapping_indices(self, keyword_indices):
         """
+        Merge overlapping keyword indices.
         Parameters:
+            keyword_indices (list): list of indices for keyword boundaries in text
         Returns:
+            keyword_indices (list): list of indices for keyword boundaries in with overlapping combined
         """
         # Sort the array on the basis of start values of intervals.
+        keyword_indices.sort()
         stack = []
         # insert first interval into stack
+        stack.append(keyword_indices[0])
+        for i in keyword_indices[1:]:
             # Check for overlapping interval,
             # if interval overlap
             if (stack[-1][0] <= i[0] <= stack[-1][-1]) or (stack[-1][-1] == i[0]-1):
                 stack.append(i)
         return stack
+    def merge_until_finished(self, keyword_indices):
         """
+        Loop until no overlapping keyword indices left.
         Parameters:
+            keyword_indices (list): list of indices for keyword boundaries in text
         Returns:
+            keyword_indices (list): list of indices for keyword boundaries in with overlapping combined
         """
+        len_indices = 0
         while True:
+            merged = self.merge_overlapping_indices(keyword_indices)
+            if len_indices == len(merged):
+                out_indices = sorted(merged, key=itemgetter(0))
+                return out_indices
             else:
+                len_indices = len(merged)
+    def get_annotation(self, text, keyword_indices):
         """
+        Create text annotation for extracted keywords.
         Parameters:
+            keyword_indices (list): list of indices for keyword boundaries in text
         Returns:
+            annotation (list): list of tuples for generating html
         """
         arr = list(text)
+        for idx in sorted(keyword_indices, reverse=True):
             arr.insert(idx[0], "<kw>")
             arr.insert(idx[1]+1, "XXXxxxXXXxxxXXX <kw>")
+        joined_annotation = ''.join(arr)
+        split = joined_annotation.split('<kw>')
+        annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
         kws_check = []
+        for i in annotation:
             if type(i) is tuple:
                 kws_check.append(i[0])
+        return annotation
     def generate(self, text, max_keywords):
         """
+        Create text annotation for extracted keywords.
         Parameters:
+            text (str): The user input string to extract keywords from
+            max_keywords (int): Limit on number of keywords to generate
         Returns:
+            annotation (list): list of tuples for generating html
+            kws (list): list of extracted keywords
         """
         kws = self.get_keywords(text, max_keywords)
+        indices = list(self.get_keyword_indices(kws, text))
+        if indices:
+            indices_merged = self.merge_until_finished(indices)
+            annotation = self.get_annotation(text, indices_merged, kws)
         else:
             annotation = None

named_entity_recognition.py CHANGED Viewed

@@ -3,7 +3,8 @@ from transformers import pipeline
 class NamedEntityRecognition:
-    """ Named Entity Recognition on text data
     Attributes:
         tokenizer: An instance of Hugging Face Tokenizer
@@ -18,13 +19,14 @@ class NamedEntityRecognition:
     def get_annotation(self, preds, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         splits = [0]
@@ -48,13 +50,14 @@ class NamedEntityRecognition:
     def classify(self, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         preds = self.nlp(text)

 class NamedEntityRecognition:
+    """
+    Named Entity Recognition on text data.
     Attributes:
         tokenizer: An instance of Hugging Face Tokenizer
     def get_annotation(self, preds, text):
         """
+        Get html annotation for displaying entities over text.
         Parameters:
+            preds (dict): List of entities and their associated metadata
+            text (str): The user input string to generate entity tags for
         Returns:
+            final_annotation (list): List of tuples to pass to text annotation html creator
         """
         splits = [0]
     def classify(self, text):
         """
+        Recognize Named Entities in text.
         Parameters:
+            text (str): The user input string to generate entity tags for
         Returns:
+            predictions (str): The user input string to generate entity tags for
+            ner_annotation (str): The user input string to generate entity tags for
         """
         preds = self.nlp(text)

part_of_speech_tagging.py CHANGED Viewed

@@ -12,15 +12,15 @@ class POSTagging:
     def classify(self, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         text = word_tokenize(text)
-        preds = nltk.pos_tag(text)
-        return preds

     def classify(self, text):
         """
+        Generate Part of Speech tags.
         Parameters:
+            text (str): The user input string to generate tags for
         Returns:
+            predictions (list): list of tuples containing words and their respective tags
         """
         text = word_tokenize(text)
+        predictions = nltk.pos_tag(text)
+        return predictions

sentiment_analysis.py CHANGED Viewed

@@ -5,7 +5,8 @@ import pandas as pd
 class SentimentAnalysis:
-    """ Sentiment on text data
     Attributes:
         tokenizer: An instance of Hugging Face Tokenizer
@@ -32,13 +33,13 @@ class SentimentAnalysis:
     def justify(self, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         word_attributions = self.explainer(text)
@@ -48,35 +49,36 @@ class SentimentAnalysis:
     def classify(self, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
         tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
         outputs = self.model(**tokens)
         probs = torch.nn.functional.softmax(outputs[0], dim=-1)
         probs = probs.mean(dim=0).detach().numpy()
-        preds = pd.Series(probs, index=["Negative", "Neutral", "Positive"], name='Predicted Probability')
-        return preds
     def run(self, text):
         """
-        The function to add two Complex Numbers.
         Parameters:
-            num (ComplexNumber): The complex number to be added.
         Returns:
-            ComplexNumber: A complex number which contains the sum.
         """
-        preds = self.classify(text)
         html = self.justify(text)
-        return preds, html

 class SentimentAnalysis:
+    """
+    Sentiment on text data.
     Attributes:
         tokenizer: An instance of Hugging Face Tokenizer
     def justify(self, text):
         """
+        Get html annotation for displaying sentiment justification over text.
         Parameters:
+            text (str): The user input string to sentiment justification
         Returns:
+            html (hmtl): html object for plotting sentiment prediction justification
         """
         word_attributions = self.explainer(text)
     def classify(self, text):
         """
+        Recognize Sentiment in text.
         Parameters:
+            text (str): The user input string to perform sentiment classification on
         Returns:
+            predictions (str): The predicted probabilities for sentiment classes
         """
         tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
         outputs = self.model(**tokens)
         probs = torch.nn.functional.softmax(outputs[0], dim=-1)
         probs = probs.mean(dim=0).detach().numpy()
+        predictions = pd.Series(probs, index=["Negative", "Neutral", "Positive"], name='Predicted Probability')
+        return predictions
     def run(self, text):
         """
+        Classify and Justify Sentiment in text.
         Parameters:
+            text (str): The user input string to perform sentiment classification on
         Returns:
+            predictions (str): The predicted probabilities for sentiment classes
+            html (hmtl): html object for plotting sentiment prediction justification
         """
+        predictions = self.classify(text)
         html = self.justify(text)
+        return predictions, html