miesnerjacob commited on
Commit
a00f9ba
β€’
1 Parent(s): 59fcc9f

added class and method docstrings

Browse files
emotion_detection.py CHANGED
@@ -5,7 +5,8 @@ import pandas as pd
5
 
6
 
7
  class EmotionDetection:
8
- """ Emotion Detection on text data
 
9
 
10
  Attributes:
11
  tokenizer: An instance of Hugging Face Tokenizer
@@ -21,13 +22,13 @@ class EmotionDetection:
21
 
22
  def justify(self, text):
23
  """
24
- The function to add two Complex Numbers.
25
 
26
  Parameters:
27
- num (ComplexNumber): The complex number to be added.
28
 
29
  Returns:
30
- ComplexNumber: A complex number which contains the sum.
31
  """
32
 
33
  word_attributions = self.explainer(text)
@@ -37,13 +38,13 @@ class EmotionDetection:
37
 
38
  def classify(self, text):
39
  """
40
- The function to add two Complex Numbers.
41
 
42
  Parameters:
43
- num (ComplexNumber): The complex number to be added.
44
 
45
  Returns:
46
- ComplexNumber: A complex number which contains the sum.
47
  """
48
 
49
  tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
@@ -57,13 +58,14 @@ class EmotionDetection:
57
 
58
  def run(self, text):
59
  """
60
- The function to add two Complex Numbers.
61
 
62
  Parameters:
63
- num (ComplexNumber): The complex number to be added.
64
 
65
  Returns:
66
- ComplexNumber: A complex number which contains the sum.
 
67
  """
68
 
69
  preds = self.classify(text)
 
5
 
6
 
7
  class EmotionDetection:
8
+ """
9
+ Emotion Detection on text data.
10
 
11
  Attributes:
12
  tokenizer: An instance of Hugging Face Tokenizer
 
22
 
23
  def justify(self, text):
24
  """
25
+ Get html annotation for displaying emotion justification over text.
26
 
27
  Parameters:
28
+ text (str): The user input string to emotion justification
29
 
30
  Returns:
31
+ html (hmtl): html object for plotting emotion prediction justification
32
  """
33
 
34
  word_attributions = self.explainer(text)
 
38
 
39
  def classify(self, text):
40
  """
41
+ Recognize Emotion in text.
42
 
43
  Parameters:
44
+ text (str): The user input string to perform emotion classification on
45
 
46
  Returns:
47
+ predictions (str): The predicted probabilities for emotion classes
48
  """
49
 
50
  tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
 
58
 
59
  def run(self, text):
60
  """
61
+ Classify and Justify Emotion in text.
62
 
63
  Parameters:
64
+ text (str): The user input string to perform emotion classification on
65
 
66
  Returns:
67
+ predictions (str): The predicted probabilities for emotion classes
68
+ html (hmtl): html object for plotting emotion prediction justification
69
  """
70
 
71
  preds = self.classify(text)
keyword_extraction.py CHANGED
@@ -1,12 +1,11 @@
1
- import spacy
2
- import pytextrank
3
  import re
4
  from operator import itemgetter
5
  import en_core_web_sm
6
 
7
 
8
  class KeywordExtractor:
9
- """ Keyword Extraction on text data
 
10
 
11
  Attributes:
12
  nlp: An instance English pipeline optimized for CPU for spacy
@@ -18,13 +17,13 @@ class KeywordExtractor:
18
 
19
  def get_keywords(self, text, max_keywords):
20
  """
21
- The function to add two Complex Numbers.
22
 
23
  Parameters:
24
- num (ComplexNumber): The complex number to be added.
25
 
26
  Returns:
27
- ComplexNumber: A complex number which contains the sum.
28
  """
29
 
30
  doc = self.nlp(text)
@@ -33,41 +32,43 @@ class KeywordExtractor:
33
 
34
  return kws
35
 
36
- def get_keyword_indicies(self, string_list, text):
37
  """
38
- The function to add two Complex Numbers.
39
 
40
  Parameters:
41
- num (ComplexNumber): The complex number to be added.
 
42
 
43
  Returns:
44
- ComplexNumber: A complex number which contains the sum.
45
  """
46
 
47
- out = []
48
- for s in string_list:
49
- indicies = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
50
- out.extend(indicies)
51
 
52
- return out
53
 
54
- def merge_overlapping_indicies(self, indicies):
55
  """
56
- The function to add two Complex Numbers.
57
 
58
  Parameters:
59
- num (ComplexNumber): The complex number to be added.
60
 
61
  Returns:
62
- ComplexNumber: A complex number which contains the sum.
63
  """
64
 
65
  # Sort the array on the basis of start values of intervals.
66
- indicies.sort()
 
67
  stack = []
68
  # insert first interval into stack
69
- stack.append(indicies[0])
70
- for i in indicies[1:]:
71
  # Check for overlapping interval,
72
  # if interval overlap
73
  if (stack[-1][0] <= i[0] <= stack[-1][-1]) or (stack[-1][-1] == i[0]-1):
@@ -76,69 +77,71 @@ class KeywordExtractor:
76
  stack.append(i)
77
  return stack
78
 
79
- def merge_until_finished(self, indicies):
80
  """
81
- The function to add two Complex Numbers.
82
 
83
  Parameters:
84
- num (ComplexNumber): The complex number to be added.
85
 
86
  Returns:
87
- ComplexNumber: A complex number which contains the sum.
88
  """
89
 
90
- len_indicies = 0
91
  while True:
92
- merged = self.merge_overlapping_indicies(indicies)
93
- if len_indicies == len(merged):
94
- out_indicies = sorted(merged, key=itemgetter(0))
95
- return out_indicies
96
  else:
97
- len_indicies = len(merged)
98
 
99
- def get_annotation(self, text, indicies, kws):
100
  """
101
- The function to add two Complex Numbers.
102
 
103
  Parameters:
104
- num (ComplexNumber): The complex number to be added.
105
 
106
  Returns:
107
- ComplexNumber: A complex number which contains the sum.
108
  """
109
 
110
  arr = list(text)
111
- for idx in sorted(indicies, reverse=True):
112
  arr.insert(idx[0], "<kw>")
113
  arr.insert(idx[1]+1, "XXXxxxXXXxxxXXX <kw>")
114
- annotation = ''.join(arr)
115
- split = annotation.split('<kw>')
116
- final_annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
117
 
118
  kws_check = []
119
- for i in final_annotation:
120
  if type(i) is tuple:
121
  kws_check.append(i[0])
122
 
123
- return final_annotation
124
 
125
  def generate(self, text, max_keywords):
126
  """
127
- The function to add two Complex Numbers.
128
 
129
  Parameters:
130
- num (ComplexNumber): The complex number to be added.
 
131
 
132
  Returns:
133
- ComplexNumber: A complex number which contains the sum.
 
134
  """
135
 
136
  kws = self.get_keywords(text, max_keywords)
137
 
138
- indicies = list(self.get_keyword_indicies(kws, text))
139
- if indicies:
140
- indicies_merged = self.merge_until_finished(indicies)
141
- annotation = self.get_annotation(text, indicies_merged, kws)
142
  else:
143
  annotation = None
144
 
 
 
 
1
  import re
2
  from operator import itemgetter
3
  import en_core_web_sm
4
 
5
 
6
  class KeywordExtractor:
7
+ """
8
+ Keyword Extraction on text data
9
 
10
  Attributes:
11
  nlp: An instance English pipeline optimized for CPU for spacy
 
17
 
18
  def get_keywords(self, text, max_keywords):
19
  """
20
+ Extract keywords from text.
21
 
22
  Parameters:
23
+ text (str): The user input string to extract keywords from
24
 
25
  Returns:
26
+ kws (list): list of extracted keywords
27
  """
28
 
29
  doc = self.nlp(text)
 
32
 
33
  return kws
34
 
35
+ def get_keyword_indices(self, kws, text):
36
  """
37
+ Extract keywords from text.
38
 
39
  Parameters:
40
+ kws (list): list of extracted keywords
41
+ text (str): The user input string to extract keywords from
42
 
43
  Returns:
44
+ keyword_indices (list): list of indices for keyword boundaries in text
45
  """
46
 
47
+ keyword_indices = []
48
+ for s in kws:
49
+ indices = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
50
+ keyword_indices.extend(indices)
51
 
52
+ return keyword_indices
53
 
54
+ def merge_overlapping_indices(self, keyword_indices):
55
  """
56
+ Merge overlapping keyword indices.
57
 
58
  Parameters:
59
+ keyword_indices (list): list of indices for keyword boundaries in text
60
 
61
  Returns:
62
+ keyword_indices (list): list of indices for keyword boundaries in with overlapping combined
63
  """
64
 
65
  # Sort the array on the basis of start values of intervals.
66
+ keyword_indices.sort()
67
+
68
  stack = []
69
  # insert first interval into stack
70
+ stack.append(keyword_indices[0])
71
+ for i in keyword_indices[1:]:
72
  # Check for overlapping interval,
73
  # if interval overlap
74
  if (stack[-1][0] <= i[0] <= stack[-1][-1]) or (stack[-1][-1] == i[0]-1):
 
77
  stack.append(i)
78
  return stack
79
 
80
+ def merge_until_finished(self, keyword_indices):
81
  """
82
+ Loop until no overlapping keyword indices left.
83
 
84
  Parameters:
85
+ keyword_indices (list): list of indices for keyword boundaries in text
86
 
87
  Returns:
88
+ keyword_indices (list): list of indices for keyword boundaries in with overlapping combined
89
  """
90
 
91
+ len_indices = 0
92
  while True:
93
+ merged = self.merge_overlapping_indices(keyword_indices)
94
+ if len_indices == len(merged):
95
+ out_indices = sorted(merged, key=itemgetter(0))
96
+ return out_indices
97
  else:
98
+ len_indices = len(merged)
99
 
100
+ def get_annotation(self, text, keyword_indices):
101
  """
102
+ Create text annotation for extracted keywords.
103
 
104
  Parameters:
105
+ keyword_indices (list): list of indices for keyword boundaries in text
106
 
107
  Returns:
108
+ annotation (list): list of tuples for generating html
109
  """
110
 
111
  arr = list(text)
112
+ for idx in sorted(keyword_indices, reverse=True):
113
  arr.insert(idx[0], "<kw>")
114
  arr.insert(idx[1]+1, "XXXxxxXXXxxxXXX <kw>")
115
+ joined_annotation = ''.join(arr)
116
+ split = joined_annotation.split('<kw>')
117
+ annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
118
 
119
  kws_check = []
120
+ for i in annotation:
121
  if type(i) is tuple:
122
  kws_check.append(i[0])
123
 
124
+ return annotation
125
 
126
  def generate(self, text, max_keywords):
127
  """
128
+ Create text annotation for extracted keywords.
129
 
130
  Parameters:
131
+ text (str): The user input string to extract keywords from
132
+ max_keywords (int): Limit on number of keywords to generate
133
 
134
  Returns:
135
+ annotation (list): list of tuples for generating html
136
+ kws (list): list of extracted keywords
137
  """
138
 
139
  kws = self.get_keywords(text, max_keywords)
140
 
141
+ indices = list(self.get_keyword_indices(kws, text))
142
+ if indices:
143
+ indices_merged = self.merge_until_finished(indices)
144
+ annotation = self.get_annotation(text, indices_merged, kws)
145
  else:
146
  annotation = None
147
 
named_entity_recognition.py CHANGED
@@ -3,7 +3,8 @@ from transformers import pipeline
3
 
4
 
5
  class NamedEntityRecognition:
6
- """ Named Entity Recognition on text data
 
7
 
8
  Attributes:
9
  tokenizer: An instance of Hugging Face Tokenizer
@@ -18,13 +19,14 @@ class NamedEntityRecognition:
18
 
19
  def get_annotation(self, preds, text):
20
  """
21
- The function to add two Complex Numbers.
22
 
23
  Parameters:
24
- num (ComplexNumber): The complex number to be added.
 
25
 
26
  Returns:
27
- ComplexNumber: A complex number which contains the sum.
28
  """
29
 
30
  splits = [0]
@@ -48,13 +50,14 @@ class NamedEntityRecognition:
48
 
49
  def classify(self, text):
50
  """
51
- The function to add two Complex Numbers.
52
 
53
  Parameters:
54
- num (ComplexNumber): The complex number to be added.
55
 
56
  Returns:
57
- ComplexNumber: A complex number which contains the sum.
 
58
  """
59
 
60
  preds = self.nlp(text)
 
3
 
4
 
5
  class NamedEntityRecognition:
6
+ """
7
+ Named Entity Recognition on text data.
8
 
9
  Attributes:
10
  tokenizer: An instance of Hugging Face Tokenizer
 
19
 
20
  def get_annotation(self, preds, text):
21
  """
22
+ Get html annotation for displaying entities over text.
23
 
24
  Parameters:
25
+ preds (dict): List of entities and their associated metadata
26
+ text (str): The user input string to generate entity tags for
27
 
28
  Returns:
29
+ final_annotation (list): List of tuples to pass to text annotation html creator
30
  """
31
 
32
  splits = [0]
 
50
 
51
  def classify(self, text):
52
  """
53
+ Recognize Named Entities in text.
54
 
55
  Parameters:
56
+ text (str): The user input string to generate entity tags for
57
 
58
  Returns:
59
+ predictions (str): The user input string to generate entity tags for
60
+ ner_annotation (str): The user input string to generate entity tags for
61
  """
62
 
63
  preds = self.nlp(text)
part_of_speech_tagging.py CHANGED
@@ -12,15 +12,15 @@ class POSTagging:
12
 
13
  def classify(self, text):
14
  """
15
- The function to add two Complex Numbers.
16
 
17
  Parameters:
18
- num (ComplexNumber): The complex number to be added.
19
 
20
  Returns:
21
- ComplexNumber: A complex number which contains the sum.
22
  """
23
 
24
  text = word_tokenize(text)
25
- preds = nltk.pos_tag(text)
26
- return preds
 
12
 
13
  def classify(self, text):
14
  """
15
+ Generate Part of Speech tags.
16
 
17
  Parameters:
18
+ text (str): The user input string to generate tags for
19
 
20
  Returns:
21
+ predictions (list): list of tuples containing words and their respective tags
22
  """
23
 
24
  text = word_tokenize(text)
25
+ predictions = nltk.pos_tag(text)
26
+ return predictions
sentiment_analysis.py CHANGED
@@ -5,7 +5,8 @@ import pandas as pd
5
 
6
 
7
  class SentimentAnalysis:
8
- """ Sentiment on text data
 
9
 
10
  Attributes:
11
  tokenizer: An instance of Hugging Face Tokenizer
@@ -32,13 +33,13 @@ class SentimentAnalysis:
32
 
33
  def justify(self, text):
34
  """
35
- The function to add two Complex Numbers.
36
 
37
  Parameters:
38
- num (ComplexNumber): The complex number to be added.
39
 
40
  Returns:
41
- ComplexNumber: A complex number which contains the sum.
42
  """
43
 
44
  word_attributions = self.explainer(text)
@@ -48,35 +49,36 @@ class SentimentAnalysis:
48
 
49
  def classify(self, text):
50
  """
51
- The function to add two Complex Numbers.
52
 
53
  Parameters:
54
- num (ComplexNumber): The complex number to be added.
55
 
56
  Returns:
57
- ComplexNumber: A complex number which contains the sum.
58
  """
59
 
60
  tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
61
  outputs = self.model(**tokens)
62
  probs = torch.nn.functional.softmax(outputs[0], dim=-1)
63
  probs = probs.mean(dim=0).detach().numpy()
64
- preds = pd.Series(probs, index=["Negative", "Neutral", "Positive"], name='Predicted Probability')
65
 
66
- return preds
67
 
68
  def run(self, text):
69
  """
70
- The function to add two Complex Numbers.
71
 
72
  Parameters:
73
- num (ComplexNumber): The complex number to be added.
74
 
75
  Returns:
76
- ComplexNumber: A complex number which contains the sum.
 
77
  """
78
 
79
- preds = self.classify(text)
80
  html = self.justify(text)
81
 
82
- return preds, html
 
5
 
6
 
7
  class SentimentAnalysis:
8
+ """
9
+ Sentiment on text data.
10
 
11
  Attributes:
12
  tokenizer: An instance of Hugging Face Tokenizer
 
33
 
34
  def justify(self, text):
35
  """
36
+ Get html annotation for displaying sentiment justification over text.
37
 
38
  Parameters:
39
+ text (str): The user input string to sentiment justification
40
 
41
  Returns:
42
+ html (hmtl): html object for plotting sentiment prediction justification
43
  """
44
 
45
  word_attributions = self.explainer(text)
 
49
 
50
  def classify(self, text):
51
  """
52
+ Recognize Sentiment in text.
53
 
54
  Parameters:
55
+ text (str): The user input string to perform sentiment classification on
56
 
57
  Returns:
58
+ predictions (str): The predicted probabilities for sentiment classes
59
  """
60
 
61
  tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
62
  outputs = self.model(**tokens)
63
  probs = torch.nn.functional.softmax(outputs[0], dim=-1)
64
  probs = probs.mean(dim=0).detach().numpy()
65
+ predictions = pd.Series(probs, index=["Negative", "Neutral", "Positive"], name='Predicted Probability')
66
 
67
+ return predictions
68
 
69
  def run(self, text):
70
  """
71
+ Classify and Justify Sentiment in text.
72
 
73
  Parameters:
74
+ text (str): The user input string to perform sentiment classification on
75
 
76
  Returns:
77
+ predictions (str): The predicted probabilities for sentiment classes
78
+ html (hmtl): html object for plotting sentiment prediction justification
79
  """
80
 
81
+ predictions = self.classify(text)
82
  html = self.justify(text)
83
 
84
+ return predictions, html