Files changed (3) hide show
  1. handler.py +68 -64
  2. requirements.txt +1 -0
  3. utils.py +513 -410
handler.py CHANGED
@@ -1,5 +1,6 @@
1
  from typing import Dict, List, Any
2
  from scipy.special import softmax
 
3
  import numpy as np
4
  import weakref
5
  import re
@@ -9,7 +10,7 @@ nltk.download('stopwords')
9
 
10
  from utils import clean_str, clean_str_nopunct
11
  import torch
12
- from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS
13
 
14
  import transformers
15
  from transformers import BertTokenizer, BertForSequenceClassification
@@ -94,7 +95,6 @@ class Utterance:
94
  f"text='{self.text}', uid={self.uid}," \
95
  f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
96
 
97
-
98
  class Transcript:
99
  def __init__(self, **kwargs):
100
  self.utterances = []
@@ -152,45 +152,42 @@ class Transcript:
152
  return {'teacher': teacher_percentage, 'student': student_percentage}, {'teacher': avg_teacher_length, 'student': avg_student_length}
153
 
154
  def get_word_clouds(self):
155
- teacher_dict = {}
156
- student_dict = {}
157
- uptake_teacher_dict = {}
 
158
  stop_words = stopwords.words('english')
 
 
159
  for utt in self.utterances:
160
- words = (utt.get_clean_text(remove_punct=True)).split(' ')
161
- for word in words:
162
- if word in stop_words or word in ['inaudible', 'crosstalk']: continue
163
- # handle uptake case
164
- if utt.role == 'teacher':
165
- if utt.uptake == 1:
166
- if word not in uptake_teacher_dict:
167
- uptake_teacher_dict[word] = 0
168
- uptake_teacher_dict[word] += 1
169
- # ignore math words so they don't get tagged as general
170
- if any(math_word in word for math_word in utt.math_terms): continue
171
- if utt.role == 'teacher':
172
- if word not in teacher_dict:
173
- teacher_dict[word] = 0
174
- teacher_dict[word] += 1
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- else:
177
- if word not in student_dict:
178
- student_dict[word] = 0
179
- student_dict[word] += 1
180
- dict_list = []
181
- uptake_dict_list = []
182
- teacher_dict_list = []
183
- student_dict_list = []
184
- for word in uptake_teacher_dict.keys():
185
- uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
186
- for word in teacher_dict.keys():
187
- teacher_dict_list.append(
188
- {'text': word, 'value': teacher_dict[word], 'category': 'general'})
189
- dict_list.append({'text': word, 'value': teacher_dict[word], 'category': 'general'})
190
- for word in student_dict.keys():
191
- student_dict_list.append(
192
- {'text': word, 'value': student_dict[word], 'category': 'general'})
193
- dict_list.append({'text': word, 'value': student_dict[word], 'category': 'general'})
194
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
195
  sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
196
  sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
@@ -219,7 +216,6 @@ class Transcript:
219
  def __repr__(self):
220
  return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
221
 
222
-
223
  class QuestionModel:
224
  def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
225
  print("Loading models...")
@@ -260,7 +256,6 @@ class QuestionModel:
260
  return_pooler_output=False)
261
  return output
262
 
263
-
264
  class ReasoningModel:
265
  def __init__(self, device, tokenizer, input_builder, max_length=128, path=REASONING_MODEL):
266
  print("Loading models...")
@@ -294,7 +289,6 @@ class ReasoningModel:
294
  token_type_ids=instance["token_type_ids"])
295
  return output
296
 
297
-
298
  class UptakeModel:
299
  def __init__(self, device, tokenizer, input_builder, max_length=120, path=UPTAKE_MODEL):
300
  print("Loading models...")
@@ -373,16 +367,24 @@ class FocusingQuestionModel:
373
  token_type_ids=instance["token_type_ids"])
374
  return output
375
 
 
 
 
 
 
 
 
376
  def load_math_terms():
377
  math_regexes = []
378
  math_terms_dict = {}
 
 
 
 
379
  for term in MATH_WORDS:
380
- if term in MATH_PREFIXES:
381
- math_terms_dict[rf"\b{term}(s|es|d|ed)?\b"] = term
382
- math_regexes.append(rf"\b{term}(s|es|d|ed)?\b")
383
- else:
384
- math_regexes.append(rf"\b{term}\b")
385
  math_terms_dict[rf"\b{term}\b"] = term
 
386
  return math_regexes, math_terms_dict
387
 
388
  def run_math_density(transcript):
@@ -390,16 +392,16 @@ def run_math_density(transcript):
390
  sorted_regexes = sorted(math_regexes, key=len, reverse=True)
391
  teacher_math_word_cloud = {}
392
  student_math_word_cloud = {}
 
393
  for i, utt in enumerate(transcript.utterances):
394
  text = utt.get_clean_text(remove_punct=True)
395
  num_matches = 0
396
  matched_positions = set()
397
- match_list = []
398
  for regex in sorted_regexes:
399
  matches = list(re.finditer(regex, text, re.IGNORECASE))
400
  # Filter out matches that share positions with longer terms
401
  matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
402
- # matched_text = [match.group(0) for match in matches]
403
  if len(matches) > 0:
404
  if utt.role == "teacher":
405
  if math_terms_dict[regex] not in teacher_math_word_cloud:
@@ -409,30 +411,32 @@ def run_math_density(transcript):
409
  if math_terms_dict[regex] not in student_math_word_cloud:
410
  student_math_word_cloud[math_terms_dict[regex]] = 0
411
  student_math_word_cloud[math_terms_dict[regex]] += len(matches)
412
- match_list.append(math_terms_dict[regex])
413
- # Update matched positions
414
- matched_positions.update((match.start(), match.end()) for match in matches)
415
  num_matches += len(matches)
416
- # print("match group list: ", [match.group(0) for match in matches])
417
  utt.num_math_terms = num_matches
418
- utt.math_terms = match_list
419
- # utt.math_match_positions = list(matched_positions)
420
- # utt.math_terms_raw = [text[start:end] for start, end in matched_positions]
421
  teacher_dict_list = []
422
  student_dict_list = []
423
  dict_list = []
424
- for word in teacher_math_word_cloud.keys():
425
- teacher_dict_list.append(
426
- {'text': word, 'value': teacher_math_word_cloud[word], 'category': "math"})
427
- dict_list.append({'text': word, 'value': teacher_math_word_cloud[word], 'category': "math"})
428
- for word in student_math_word_cloud.keys():
429
- student_dict_list.append(
430
- {'text': word, 'value': student_math_word_cloud[word], 'category': "math"})
431
- dict_list.append({'text': word, 'value': student_math_word_cloud[word], 'category': "math"})
 
 
432
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
433
  sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
434
  sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
435
- # return sorted_dict_list[:50]
 
436
  return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
437
 
438
  class EndpointHandler():
 
1
  from typing import Dict, List, Any
2
  from scipy.special import softmax
3
+ from collections import Counter
4
  import numpy as np
5
  import weakref
6
  import re
 
10
 
11
  from utils import clean_str, clean_str_nopunct
12
  import torch
13
+ from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS, plural_to_singular
14
 
15
  import transformers
16
  from transformers import BertTokenizer, BertForSequenceClassification
 
95
  f"text='{self.text}', uid={self.uid}," \
96
  f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
97
 
 
98
  class Transcript:
99
  def __init__(self, **kwargs):
100
  self.utterances = []
 
152
  return {'teacher': teacher_percentage, 'student': student_percentage}, {'teacher': avg_teacher_length, 'student': avg_student_length}
153
 
154
  def get_word_clouds(self):
155
+ # Initialize dictionaries
156
+ teacher_dict = Counter()
157
+ student_dict = Counter()
158
+ uptake_teacher_dict = Counter()
159
  stop_words = stopwords.words('english')
160
+
161
+ # Go through the utterances
162
  for utt in self.utterances:
163
+ # Get clean text
164
+ clean_text = utt.get_clean_text(remove_punct=True)
165
+ words = clean_text.split()
166
+ words = [word for word in words if word not in stop_words and word not in ['inaudible', 'crosstalk']]
167
+
168
+ # Handle uptake case
169
+ if utt.role == 'teacher' and utt.uptake == 1:
170
+ uptake_teacher_dict.update(words)
171
+
172
+ general_text = ' '.join(words)
173
+ # Replace math terms with empty strings
174
+ for math_term in utt.math_terms:
175
+ general_text = general_text.replace(math_term, '')
176
+ general_text = general_text.replace(' ', ' ')
177
+
178
+ general_words = general_text.split()
179
+ # Update the appropriate dictionary
180
+ if utt.role == 'teacher':
181
+ teacher_dict.update(general_words)
182
+ else:
183
+ student_dict.update(general_words)
184
+
185
+ # Sorting and trimming dictionaries
186
+ dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
187
+ uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
188
+ teacher_dict_list = dict_to_list(teacher_dict, 'general')
189
+ student_dict_list = dict_to_list(student_dict, 'general')
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
192
  sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
193
  sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
 
216
  def __repr__(self):
217
  return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
218
 
 
219
  class QuestionModel:
220
  def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
221
  print("Loading models...")
 
256
  return_pooler_output=False)
257
  return output
258
 
 
259
  class ReasoningModel:
260
  def __init__(self, device, tokenizer, input_builder, max_length=128, path=REASONING_MODEL):
261
  print("Loading models...")
 
289
  token_type_ids=instance["token_type_ids"])
290
  return output
291
 
 
292
  class UptakeModel:
293
  def __init__(self, device, tokenizer, input_builder, max_length=120, path=UPTAKE_MODEL):
294
  print("Loading models...")
 
367
  token_type_ids=instance["token_type_ids"])
368
  return output
369
 
370
+ def dict_to_list(d, category):
371
+ combined_dict = Counter()
372
+ for word, count in d.items():
373
+ singular_word = plural_to_singular(word)
374
+ combined_dict[singular_word] += count
375
+ return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
376
+
377
  def load_math_terms():
378
  math_regexes = []
379
  math_terms_dict = {}
380
+ for term in MATH_PREFIXES:
381
+ math_terms_dict[rf"\b{term}(s|es|d|ed)?\b"] = term
382
+ math_regexes.append(rf"\b{term}(s|es|d|ed)?\b")
383
+
384
  for term in MATH_WORDS:
385
+ if not term in MATH_PREFIXES:
 
 
 
 
386
  math_terms_dict[rf"\b{term}\b"] = term
387
+ math_regexes.append(rf"\b{term}\b")
388
  return math_regexes, math_terms_dict
389
 
390
  def run_math_density(transcript):
 
392
  sorted_regexes = sorted(math_regexes, key=len, reverse=True)
393
  teacher_math_word_cloud = {}
394
  student_math_word_cloud = {}
395
+
396
  for i, utt in enumerate(transcript.utterances):
397
  text = utt.get_clean_text(remove_punct=True)
398
  num_matches = 0
399
  matched_positions = set()
400
+ match_list = set()
401
  for regex in sorted_regexes:
402
  matches = list(re.finditer(regex, text, re.IGNORECASE))
403
  # Filter out matches that share positions with longer terms
404
  matches = [match for match in matches if not any(match.start() in range(existing[0], existing[1]) for existing in matched_positions)]
 
405
  if len(matches) > 0:
406
  if utt.role == "teacher":
407
  if math_terms_dict[regex] not in teacher_math_word_cloud:
 
411
  if math_terms_dict[regex] not in student_math_word_cloud:
412
  student_math_word_cloud[math_terms_dict[regex]] = 0
413
  student_math_word_cloud[math_terms_dict[regex]] += len(matches)
414
+ for match in matches:
415
+ match_list.add(match.group())
416
+ matched_positions.add((match.start(), match.end()))
417
  num_matches += len(matches)
 
418
  utt.num_math_terms = num_matches
419
+ utt.math_terms = list(match_list)
420
+
421
+ # Initialize lists
422
  teacher_dict_list = []
423
  student_dict_list = []
424
  dict_list = []
425
+
426
+ # Process teacher_math_word_cloud
427
+ teacher_dict_list = dict_to_list(teacher_math_word_cloud, 'math')
428
+ dict_list.extend(teacher_dict_list)
429
+
430
+ # Process student_math_word_cloud
431
+ student_dict_list = dict_to_list(student_math_word_cloud, 'math')
432
+ dict_list.extend(student_dict_list)
433
+
434
+ # Sort the lists
435
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
436
  sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
437
  sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
438
+
439
+ # Return the sorted lists
440
  return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
441
 
442
  class EndpointHandler():
requirements.txt CHANGED
@@ -5,3 +5,4 @@ scipy==1.9.2
5
  torch==2.3.1
6
  transformers==4.46.1
7
  nltk==3.9.1
 
 
5
  torch==2.3.1
6
  transformers==4.46.1
7
  nltk==3.9.1
8
+ inflect==7.5.0
utils.py CHANGED
@@ -7,6 +7,7 @@ from cleantext import clean
7
  from num2words import num2words
8
  import re
9
  import string
 
10
 
11
  punct_chars = list((set(string.punctuation) | {'’', '‘', '–', '—', '~', '|', '“', '”', '…', "'", "`", '_'}))
12
  punct_chars.sort()
@@ -34,530 +35,708 @@ MATH_PREFIXES = [
34
  "median",
35
  "ratio",
36
  "area",
37
- ]
38
 
39
- MATH_WORDS = [
40
- "absolute value",
41
- "algebra",
42
- "area",
43
- "average",
44
- "base of",
45
- "box plot",
46
- "categorical",
47
- "coefficient",
48
- "common factor",
49
- "common multiple",
50
- "compose",
51
- "coordinate",
52
- "cubed",
53
- "decompose",
54
- "dependent variable",
55
- "distribution",
56
- "dot plot",
57
- "double number line diagram",
58
- "equivalent",
59
- "equivalent expression",
60
- "ratio",
61
- "exponent",
62
- "frequency",
63
- "greatest common factor",
64
- "gcd",
65
- "height of",
66
- "histogram",
67
- "independent variable",
68
- "integer",
69
- "interquartile range",
70
- "iqr",
71
- "least common multiple",
72
- "long division",
73
- "mean absolute deviation",
74
- "median",
75
- "negative number",
76
- "opposite vertex",
77
- "parallelogram",
78
- "percent",
79
- "polygon",
80
- "polyhedron",
81
- "positive number",
82
- "prism",
83
- "pyramid",
84
- "quadrant",
85
- "quadrilateral",
86
- "quartile",
87
- "rational number",
88
- "reciprocal",
89
- "equality",
90
- "inequality",
91
- "squared",
92
- "statistic",
93
- "surface area",
94
- "identity property",
95
- "addend",
96
- "unit",
97
- "number sentence",
98
- "make ten",
99
- "take from ten",
100
- "number bond",
101
- "total",
102
- "estimate",
103
- "hashmark",
104
- "meter",
105
- "number line",
106
- "ruler",
107
- "centimeter",
108
- "base ten",
109
- "expanded form",
110
- "hundred",
111
- "thousand",
112
- "place value",
113
- "number disk",
114
- "standard form",
115
- "unit form",
116
- "word form",
117
- "tens place",
118
- "algorithm",
119
- "equation",
120
- "simplif",
121
- "addition",
122
- "subtract",
123
- "array",
124
- "even number",
125
- "odd number",
126
- "repeated addition",
127
- "tessellat",
128
- "whole number",
129
- "number path",
130
- "rectangle",
131
- "square",
132
- "bar graph",
133
- "data",
134
- "degree",
135
- "line plot",
136
- "picture graph",
137
- "scale",
138
- "survey",
139
- "thermometer",
140
- "estimat",
141
- "tape diagram",
142
- "value",
143
- "analog",
144
- "angle",
145
- "parallel",
146
- "partition",
147
- "pentagon",
148
- "right angle",
149
- "cube",
150
- "digital",
151
- "quarter of",
152
- "tangram",
153
- "circle",
154
- "hexagon",
155
- "half circle",
156
- "half-circle",
157
- "quarter circle",
158
- "quarter-circle",
159
- "semicircle",
160
- "semi-circle",
161
- "rectang",
162
- "rhombus",
163
- "trapezoid",
164
- "triangle",
165
- "commutative",
166
- "equal group",
167
- "distributive",
168
- "divide",
169
- "division",
170
  "multipl",
171
- "parentheses",
172
- "quotient",
173
- "rotate",
174
- "unknown",
175
- "add",
176
- "capacity",
177
- "continuous",
178
- "endpoint",
179
- "gram",
180
- "interval",
181
- "kilogram",
182
- "volume",
183
- "liter",
184
- "milliliter",
185
- "approximate",
186
- "area model",
187
- "square unit",
188
- "unit square",
189
  "geometr",
190
- "equivalent fraction",
191
- "fraction form",
192
- "fractional unit",
193
- "unit fraction",
194
- "unit interval",
195
- "measur",
196
- "graph",
197
- "scaled graph",
198
- "diagonal",
199
- "perimeter",
200
- "regular polygon",
201
- "tessellate",
202
- "tetromino",
203
- "heptagon",
204
- "octagon",
205
- "digit",
206
- "expression",
207
- "sum",
208
- "kilometer",
209
- "mass",
210
- "mixed unit",
211
- "length",
212
  "measure",
213
- "simplify",
214
- "associative",
215
- "composite",
216
- "divisible",
217
- "divisor",
218
- "partial product",
219
- "prime number",
220
- "remainder",
221
- "acute",
222
- "arc",
223
- "collinear",
224
- "equilateral",
225
- "intersect",
226
- "isosceles",
227
- "symmetry",
228
- "line segment",
229
- "line",
230
- "obtuse",
231
- "perpendicular",
232
- "protractor",
233
- "scalene",
234
- "straight angle",
235
- "supplementary angle",
236
- "vertex",
237
- "common denominator",
238
- "denominator",
239
- "fraction",
240
- "mixed number",
241
- "numerator",
242
- "whole",
243
- "decimal expanded form",
244
- "decimal",
245
- "hundredth",
246
- "tenth",
247
- "customary system of measurement",
248
- "customary unit",
249
- "gallon",
250
- "metric",
251
- "metric unit",
252
- "ounce",
253
- "pint",
254
- "quart",
255
- "convert",
256
- "distance",
257
- "millimeter",
258
- "thousandth",
259
- "hundredths",
260
- "conversion factor",
261
- "decimal fraction",
262
- "multiplier",
263
- "equivalence",
264
- "multiple",
265
- "product",
266
- "benchmark fraction",
267
- "cup",
268
- "pound",
269
- "yard",
270
- "whole unit",
271
- "decimal divisor",
272
- "factors",
273
- "bisect",
274
- "cubic units",
275
- "hierarchy",
276
- "unit cube",
277
- "attribute",
278
- "kite",
279
- "bisector",
280
- "solid figure",
281
- "square units",
282
- "dimension",
283
- "axis",
284
- "ordered pair",
285
- "angle measure",
286
- "horizontal",
287
- "vertical",
288
- "categorical data",
289
- "lcm",
290
- "measure of center",
291
- "meters per second",
292
- "numerical",
293
- "solution",
294
- "unit price",
295
- "unit rate",
296
- "variability",
297
- "variable",
298
  "abundant number",
299
  "accurate",
300
  "acre",
 
 
 
301
  "addition fact",
 
 
 
 
 
302
  "algebraic",
 
 
303
  "altitude",
 
 
 
 
304
  "apex",
305
- "arithmetic facts",
 
 
 
 
 
 
306
  "associative property",
 
307
  "astronomical unit",
 
 
 
 
 
 
 
 
 
 
 
308
  "base",
309
  "baseline",
 
310
  "billion",
 
 
 
 
 
 
 
 
 
311
  "celsius",
312
  "census",
313
  "cent",
314
  "center of a circle",
 
315
  "center of a sphere",
 
 
 
 
 
316
  "chance",
 
317
  "circle graph",
 
 
 
 
 
 
 
318
  "column",
 
319
  "combine",
 
 
320
  "common fraction",
 
 
 
321
  "comparison diagram",
322
  "comparison story",
323
  "compass",
324
  "complement",
 
 
 
325
  "concave polygon",
326
  "concentric circles",
 
 
 
327
  "consecutive",
 
328
  "constant",
329
  "continuous model of area",
330
  "continuous model of volume",
 
331
  "contour",
332
  "conversion fact",
 
 
 
333
  "convex polygon",
 
 
 
 
334
  "counting numbers",
335
  "counting up subtraction",
 
 
336
  "cover-up method",
337
  "cross multiplication",
 
 
 
 
 
 
 
338
  "cubic",
339
  "cubit",
 
340
  "curved surface",
 
 
341
  "cylinder",
 
 
342
  "decagon",
 
 
 
 
 
343
  "decimeter",
 
344
  "deficient number",
 
 
 
345
  "density",
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  "discrete model",
347
  "displacement method",
 
 
 
 
 
348
  "divisibility test",
349
  "divisible by",
 
 
 
350
  "dodecahedron",
 
 
351
  "double stem plot",
352
  "doubles fact",
 
353
  "egyptian multiplication",
354
  "elevation",
355
  "embed figure",
356
  "end point",
 
357
  "enlarge",
 
 
358
  "equal",
359
- "equal groups",
360
- "equal parts",
361
- "equidistant marks",
362
  "equilateral polygon",
363
- "equivalent fractions",
 
 
 
 
 
 
 
 
 
364
  "european subtraction",
 
 
 
 
365
  "expanded notation",
366
  "expected outcome",
367
- "exponential",
368
- "extended facts",
 
 
 
 
 
369
  "fact power",
370
  "fact triangle",
371
  "factor",
372
- "factors of numbers",
 
373
  "fahrenheit",
374
  "false number sentence",
375
- "figurate numbers",
376
  "flowchart",
377
  "fluid ounce",
 
 
 
378
  "fractional part",
 
 
379
  "fulcrum",
380
  "function machine",
 
381
  "furlong",
 
 
382
  "genus",
383
  "geoboard",
 
384
  "geometric solid",
385
  "geometry template",
386
  "girth",
387
  "golden ratio",
388
  "golden rectangle",
 
389
  "graph key",
 
 
 
390
  "grouping symbol",
 
 
 
 
 
 
391
  "hemisphere",
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  "icosahedron",
 
 
 
 
393
  "improper fraction",
394
  "inch",
395
- "index of locations",
 
 
 
 
396
  "indirect measurement",
 
 
397
  "input",
 
398
  "inscribed polygon",
399
  "instance of a pattern",
 
 
 
 
400
  "interior of a figure",
401
  "interpolate",
 
 
 
 
 
 
 
 
402
  "irrational",
403
  "isometry transformation",
404
  "isosceles trapezoid",
 
 
 
 
405
  "juxtapose",
406
  "key sequence",
 
 
 
407
  "label",
408
  "landmark",
409
  "latitude",
410
  "lattice multiplication",
 
 
 
411
  "left to right subtraction",
412
  "leg of a right triangle",
 
 
 
413
  "like terms",
414
  "line graph",
415
  "line of reflection",
416
  "line of symmetry",
 
 
417
  "line symmetry",
 
 
418
  "lines of latitude",
419
  "lines of longitude",
 
 
 
 
 
 
 
 
 
420
  "longitude",
 
421
  "magnitude estimate",
 
422
  "map legend",
423
  "map scale",
 
424
  "maximum",
 
 
 
 
 
425
  "measurement division",
 
426
  "measurement unit",
 
427
  "meridian bar",
 
 
428
  "metric system",
 
 
429
  "midpoint",
430
  "mile",
 
 
431
  "millisecond",
432
  "minimum",
433
  "minuend",
434
  "mirror image",
 
 
435
  "mobius",
436
  "modal",
 
 
 
437
  "multiplication counting principle",
438
  "multiplication diagram",
439
  "multiplication fact",
440
- "multiplication symbols",
441
  "multiplication use class",
442
- "negative rational numbers",
 
 
 
 
 
 
 
 
443
  "nested parentheses",
444
  "net score",
445
  "net weight",
 
446
  "nonagon",
447
  "nonconvex polygon",
 
 
448
  "normal span",
 
 
 
449
  "number grid",
 
 
 
450
  "number sequence",
451
  "numeral",
452
  "numeration",
 
 
 
 
 
 
453
  "octahedron",
 
454
  "open proportion",
455
- "operation",
456
  "operation symbol",
 
457
  "opposite angle",
458
  "opposite change rule",
459
  "opposite of a number",
460
  "opposite side",
 
 
461
  "order of magnitude",
462
  "order of operations",
463
  "order of rotation symmetry",
 
 
 
464
  "ordinal number",
 
 
 
 
465
  "pan balance",
466
  "parabola",
467
  "parallel lines",
468
- "parallel planes",
 
 
 
469
  "part to part ratio",
470
  "part to whole ratio",
471
  "part whole fraction",
472
  "partial differences subtraction",
 
473
  "partial products multiplication",
474
  "partial quotients division",
475
  "partial sums addition",
 
476
  "partitive division",
477
  "parts and total diagram",
 
 
478
  "per capita",
479
  "per unit rate",
 
480
  "percent circle",
 
 
481
  "perfect number",
 
 
 
 
 
482
  "perpetual calendar",
 
 
483
  "pie graph",
484
- "plane",
 
 
485
  "plane figure",
 
486
  "point symmetry",
 
 
 
 
 
487
  "population density",
 
 
 
 
 
488
  "precise",
489
  "predict",
490
  "prediction line",
491
  "preimage",
 
492
  "prime factorization",
493
  "prime meridian",
494
- "probability",
 
495
  "probability meter",
496
  "probability tree diagram",
 
 
497
  "proper factor",
498
  "proper fraction",
499
  "property",
 
 
 
 
 
 
500
  "quadrangle",
 
 
 
 
 
 
 
 
501
  "quick common denominator",
 
502
  "quotitive division",
 
 
 
503
  "random draw",
504
  "random experiment",
505
  "random number",
506
  "random sample",
 
 
507
  "rank",
508
  "rate diagram",
509
  "rate multiplication ",
 
510
  "rate unit",
 
 
 
 
 
 
 
511
  "recall survey",
 
 
 
512
  "rectangular array",
513
  "rectangular coordinate grid",
514
  "rectangular prism",
515
  "rectangular pyramid",
 
516
  "rectilinear figure",
517
  "reflection",
518
  "reflex angle",
 
 
519
  "regular polyhedron",
520
  "regular tessellation",
521
  "relation symbol",
 
 
 
 
 
522
  "revolution",
 
 
523
  "right cone",
524
  "right cylinder",
525
  "right prism",
526
  "right pyramid",
527
  "right triangle",
 
528
  "roman numerals",
 
 
529
  "rotation symmetry",
 
 
 
 
530
  "same change rule for subtraction",
 
 
 
531
  "scale model",
532
  "scale of a map",
533
  "scale of a number line",
 
 
 
 
 
 
 
534
  "sector",
535
  "segment",
 
 
536
  "sequence",
537
- "significant digits",
 
 
 
538
  "similar figures",
 
539
  "simpler form",
 
 
540
  "situtation diagram",
541
- "skew lines",
542
  "slanted",
543
  "slide rule",
 
 
 
544
  "span",
 
 
 
 
 
 
545
  "stacked bar graph",
 
546
  "standard unit",
 
547
  "stem and leaf plot",
548
  "step graph",
 
549
  "straightedge",
 
550
  "substitute",
 
551
  "subtrahend",
 
 
 
 
552
  "surface",
 
553
  "symmetric",
 
 
 
 
 
554
  "tally",
 
555
  "tangent",
556
- "tangent circles",
 
557
  "temperature",
558
  "template",
 
 
 
 
 
 
 
559
  "tetrahedron",
 
560
  "theorem",
 
 
 
561
  "tile",
562
  "tiling",
563
  "time graph",
@@ -565,159 +744,83 @@ MATH_WORDS = [
565
  "top heavy fraction",
566
  "topological",
567
  "topology",
 
 
 
 
568
  "trade first subtraction",
 
 
 
 
569
  "tree diagram",
 
570
  "triangular",
571
  "true number sentence",
572
  "truncate",
573
- "twin primes",
574
- "unlike denominators",
575
- "unlike fractions",
 
 
 
 
 
 
 
 
 
 
 
576
  "vanishing ",
 
 
 
577
  "venn diagram",
578
  "vernal equinox",
 
 
 
 
579
  "weight",
 
 
 
580
  "width",
581
- "base of a prism",
582
- "base of a pyramid",
583
- "face",
584
- "numerical data",
585
- "opposite",
586
- "pace",
587
- "per",
588
- "region",
589
- "sign",
590
- "alternate interior angles",
591
- "base of an exponent",
592
- "cone",
593
- "congruent",
594
- "counterclockwise",
595
- "cube root",
596
- "hypotenuse",
597
- "irrational number",
598
- "linear relationship",
599
- "positive association",
600
- "rate of change",
601
- "translation",
602
- "transversal",
603
- "circumference",
604
- "corresponding",
605
- "expand",
606
- "population",
607
- "proportion",
608
- "radius",
609
- "random",
610
- "repeating decimal",
611
- "representative",
612
- "scaled",
613
  "withdrawal",
614
- "center",
615
- "edge",
616
- "height of a parallelogram or triangle",
617
- "net",
618
- "speed",
619
- "table",
620
- "term",
621
- "adjacent",
622
- "complementary",
623
- "cross-section",
624
- "cross section",
625
- "deposit",
626
- "event",
627
- "measurement error",
628
- "proportional",
629
- "simulation",
630
- "center of a dilation",
631
- "clockwise",
632
- "dilation",
633
- "function",
634
- "negative association",
635
- "pythagorean theorem",
636
- "relative frequency",
637
- "rigid transformation",
638
- "scale factor",
639
- "scatter plot",
640
- "similar",
641
- "sphere",
642
- "two-way table",
643
- "additive identity",
644
- "additive inverse",
645
- "box and whisker plot",
646
- "cartesian coordinates",
647
- "central angle",
648
- "chord",
649
- "combination",
650
- "commutative property",
651
- "coplanar",
652
- "cross product",
653
- "dependent events",
654
- "difference",
655
- "dividend",
656
- "equilateral triangle",
657
- "error of measurement",
658
- "factorial",
659
- "formula",
660
- "identity property of",
661
- "independent events",
662
- "infinity",
663
- "inscribed angle",
664
- "intercept",
665
- "intercepted arc",
666
- "inverse",
667
- "inverse operations",
668
- "isosceles triangle",
669
- "least common denominator",
670
- "like fractions",
671
- "locus",
672
- "logic",
673
- "lowest terms",
674
- "mode",
675
- "multiplicative identity",
676
- "multiplicative inverse",
677
- "mutually exclusive events",
678
- "natural numbers",
679
- "normal",
680
- "permutation",
681
- "pi",
682
- "point",
683
- "power",
684
- "range",
685
- "rate",
686
- "ray",
687
- "real numbers",
688
- "rectangular",
689
- "root",
690
- "rotation",
691
- "scalene triangle",
692
- "scattergram",
693
- "set",
694
- "statistics",
695
- "terminating decimal",
696
- "transformation",
697
  "x intercept",
 
698
  "x-axis",
699
- "x-intercept",
 
700
  "y intercept",
 
701
  "y-axis",
702
  "y-intercept",
703
- "zero",
704
  "zero property of multiplication",
705
- "base of a parallelogram",
706
- "base of a triangle",
707
- "height",
708
- "chance experiment",
709
- "diameter",
710
- "mean",
711
- "percentage",
712
- "sample",
713
- "legs",
714
- "outlier",
715
- "slope",
716
- "square root",
717
- "system of equations",
718
- "tessellation",
719
  ]
720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  def get_num_words(text):
722
  if not isinstance(text, str):
723
  print("%s is not a string" % text)
 
7
  from num2words import num2words
8
  import re
9
  import string
10
+ import inflect
11
 
12
  punct_chars = list((set(string.punctuation) | {'’', '‘', '–', '—', '~', '|', '“', '”', '…', "'", "`", '_'}))
13
  punct_chars.sort()
 
35
  "median",
36
  "ratio",
37
  "area",
 
38
 
39
+ # added
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  "multipl",
41
+ "divid",
42
+ "subtrac",
43
+ "logarit",
44
+ "algebr",
45
+ "calcul",
46
+ "matri",
47
+ "vect",
 
 
 
 
 
 
 
 
 
 
 
48
  "geometr",
49
+ "statist",
50
+ "probabli",
51
+ "coeffi",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  "measure",
53
+ "simplif"
54
+ ]
55
+
56
+ MATH_WORDS = [
57
+ "absolute deviation",
58
+ "absolute value",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "abundant number",
60
  "accurate",
61
  "acre",
62
+ "acute",
63
+ "add",
64
+ "addend",
65
  "addition fact",
66
+ "addition",
67
+ "additive identity",
68
+ "additive inverse",
69
+ "adjacent",
70
+ "algebra",
71
  "algebraic",
72
+ "algorithm",
73
+ "alternate interior angle",
74
  "altitude",
75
+ "analog",
76
+ "angle measure",
77
+ "angle",
78
+ "angular",
79
  "apex",
80
+ "approximate",
81
+ "arc",
82
+ "area model",
83
+ "area",
84
+ "arithmetic fact",
85
+ "arithmetic",
86
+ "array",
87
  "associative property",
88
+ "associative",
89
  "astronomical unit",
90
+ "attribute",
91
+ "average",
92
+ "axis",
93
+ "bar graph",
94
+ "base of a parallelogram",
95
+ "base of a prism",
96
+ "base of a pyramid",
97
+ "base of a triangle",
98
+ "base of an exponent",
99
+ "base of",
100
+ "base ten",
101
  "base",
102
  "baseline",
103
+ "benchmark fraction",
104
  "billion",
105
+ "binomial",
106
+ "bisect",
107
+ "bisector",
108
+ "box and whisker plot",
109
+ "box plot",
110
+ "capacity",
111
+ "cartesian coordinate",
112
+ "categorical data",
113
+ "categorical",
114
  "celsius",
115
  "census",
116
  "cent",
117
  "center of a circle",
118
+ "center of a dilation",
119
  "center of a sphere",
120
+ "center",
121
+ "centimeter",
122
+ "central angle",
123
+ "centroid",
124
+ "chance experiment",
125
  "chance",
126
+ "chord",
127
  "circle graph",
128
+ "circle",
129
+ "circular",
130
+ "circumference",
131
+ "clockwise",
132
+ "coefficient",
133
+ "collinear",
134
+ "column matrix"
135
  "column",
136
+ "combination",
137
  "combine",
138
+ "common denominator",
139
+ "common factor",
140
  "common fraction",
141
+ "common multiple",
142
+ "commutative property",
143
+ "commutative",
144
  "comparison diagram",
145
  "comparison story",
146
  "compass",
147
  "complement",
148
+ "complementary",
149
+ "compose",
150
+ "composite",
151
  "concave polygon",
152
  "concentric circles",
153
+ "concentric",
154
+ "cone",
155
+ "congruent",
156
  "consecutive",
157
+ "constant function",
158
  "constant",
159
  "continuous model of area",
160
  "continuous model of volume",
161
+ "continuous",
162
  "contour",
163
  "conversion fact",
164
+ "conversion factor",
165
+ "convert",
166
+ "convex function",
167
  "convex polygon",
168
+ "coordinate",
169
+ "coplanar",
170
+ "corresponding",
171
+ "counterclockwise",
172
  "counting numbers",
173
  "counting up subtraction",
174
+ "covariance",
175
+ "covariate",
176
  "cover-up method",
177
  "cross multiplication",
178
+ "cross product",
179
+ "cross section",
180
+ "cross-section",
181
+ "cube root",
182
+ "cube",
183
+ "cubed",
184
+ "cubic unit",
185
  "cubic",
186
  "cubit",
187
+ "cup",
188
  "curved surface",
189
+ "customary system of measurement",
190
+ "customary unit",
191
  "cylinder",
192
+ "cylindrical",
193
+ "data",
194
  "decagon",
195
+ "decimal divisor",
196
+ "decimal expanded form",
197
+ "decimal fraction",
198
+ "decimal point",
199
+ "decimal",
200
  "decimeter",
201
+ "decompose",
202
  "deficient number",
203
+ "degree",
204
+ "delta",
205
+ "denominator",
206
  "density",
207
+ "dependent event",
208
+ "dependent variable",
209
+ "deposit",
210
+ "derivative",
211
+ "determinant",
212
+ "diagonal",
213
+ "diameter",
214
+ "difference",
215
+ "differential"
216
+ "digit",
217
+ "digital",
218
+ "dilation",
219
+ "dimension",
220
  "discrete model",
221
  "displacement method",
222
+ "distance",
223
+ "distribution",
224
+ "distributive",
225
+ "divide",
226
+ "dividend",
227
  "divisibility test",
228
  "divisible by",
229
+ "divisible",
230
+ "division",
231
+ "divisor",
232
  "dodecahedron",
233
+ "dot plot",
234
+ "double number line diagram",
235
  "double stem plot",
236
  "doubles fact",
237
+ "edge",
238
  "egyptian multiplication",
239
  "elevation",
240
  "embed figure",
241
  "end point",
242
+ "endpoint",
243
  "enlarge",
244
+ "equal group",
245
+ "equal part",
246
  "equal",
247
+ "equality",
248
+ "equation",
249
+ "equidistant mark",
250
  "equilateral polygon",
251
+ "equilateral triangle",
252
+ "equilateral",
253
+ "equivalence",
254
+ "equivalent expression",
255
+ "equivalent fraction",
256
+ "equivalent",
257
+ "error bound",
258
+ "error of measurement",
259
+ "estimat",
260
+ "estimate",
261
  "european subtraction",
262
+ "even number",
263
+ "event",
264
+ "expand",
265
+ "expanded form",
266
  "expanded notation",
267
  "expected outcome",
268
+ "expected value",
269
+ "exponent",
270
+ "exponential function",
271
+ "exponential growth",
272
+ "expression",
273
+ "extended fact",
274
+ "face",
275
  "fact power",
276
  "fact triangle",
277
  "factor",
278
+ "factorial",
279
+ "factors of number",
280
  "fahrenheit",
281
  "false number sentence",
282
+ "figurate number",
283
  "flowchart",
284
  "fluid ounce",
285
+ "formula",
286
+ "fraction form",
287
+ "fraction",
288
  "fractional part",
289
+ "fractional unit",
290
+ "frequency",
291
  "fulcrum",
292
  "function machine",
293
+ "function",
294
  "furlong",
295
+ "gallon",
296
+ "gcd",
297
  "genus",
298
  "geoboard",
299
+ "geometr",
300
  "geometric solid",
301
  "geometry template",
302
  "girth",
303
  "golden ratio",
304
  "golden rectangle",
305
+ "gram",
306
  "graph key",
307
+ "graph",
308
+ "greatest common divisor"
309
+ "greatest common factor",
310
  "grouping symbol",
311
+ "half circle",
312
+ "half-circle",
313
+ "hashmark",
314
+ "height of a parallelogram or triangle",
315
+ "height of",
316
+ "height",
317
  "hemisphere",
318
+ "heptagon",
319
+ "heptagonal",
320
+ "hexagon",
321
+ "hexagonal",
322
+ "hierarchy",
323
+ "histogram",
324
+ "horizontal shift",
325
+ "horizontal stretch",
326
+ "horizontal",
327
+ "hundred",
328
+ "hundredth",
329
+ "hypotenuse",
330
+ "hypothesis",
331
  "icosahedron",
332
+ "identity function",
333
+ "identity matrix",
334
+ "identity property of",
335
+ "identity property",
336
  "improper fraction",
337
  "inch",
338
+ "incircle",
339
+ "indefinite integral",
340
+ "independent event",
341
+ "independent variable",
342
+ "index of location",
343
  "indirect measurement",
344
+ "inequality",
345
+ "infinity",
346
  "input",
347
+ "inscribed angle",
348
  "inscribed polygon",
349
  "instance of a pattern",
350
+ "integer",
351
+ "intercept",
352
+ "intercepted arc",
353
+ "interior angle",
354
  "interior of a figure",
355
  "interpolate",
356
+ "interquartile range",
357
+ "intersect",
358
+ "interval",
359
+ "inverse operation",
360
+ "inverse",
361
+ "iqr",
362
+ "irrational number",
363
+ "irrational root",
364
  "irrational",
365
  "isometry transformation",
366
  "isosceles trapezoid",
367
+ "isosceles triangle",
368
+ "isosceles",
369
+ "joint probability",
370
+ "joint variation",
371
  "juxtapose",
372
  "key sequence",
373
+ "kilogram",
374
+ "kilometer",
375
+ "kite",
376
  "label",
377
  "landmark",
378
  "latitude",
379
  "lattice multiplication",
380
+ "lcm",
381
+ "least common denominator",
382
+ "least common multiple",
383
  "left to right subtraction",
384
  "leg of a right triangle",
385
+ "legs",
386
+ "length",
387
+ "like fraction",
388
  "like terms",
389
  "line graph",
390
  "line of reflection",
391
  "line of symmetry",
392
+ "line plot",
393
+ "line segment",
394
  "line symmetry",
395
+ "line",
396
+ "linear relationship",
397
  "lines of latitude",
398
  "lines of longitude",
399
+ "liter",
400
+ "local maximum",
401
+ "local minimum",
402
+ "locus",
403
+ "logarithm",
404
+ "logarithmic function",
405
+ "logarithmic scale",
406
+ "logic",
407
+ "long division",
408
  "longitude",
409
+ "lowest term",
410
  "magnitude estimate",
411
+ "make ten",
412
  "map legend",
413
  "map scale",
414
+ "mass",
415
  "maximum",
416
+ "mean absolute deviation",
417
+ "mean value",
418
+ "mean",
419
+ "measure of center",
420
+ "measure",
421
  "measurement division",
422
+ "measurement error",
423
  "measurement unit",
424
+ "median",
425
  "meridian bar",
426
+ "meter",
427
+ "meters per second",
428
  "metric system",
429
+ "metric unit",
430
+ "metric",
431
  "midpoint",
432
  "mile",
433
+ "milliliter",
434
+ "millimeter",
435
  "millisecond",
436
  "minimum",
437
  "minuend",
438
  "mirror image",
439
+ "mixed number",
440
+ "mixed unit",
441
  "mobius",
442
  "modal",
443
+ "mode",
444
+ "multipl",
445
+ "multiple",
446
  "multiplication counting principle",
447
  "multiplication diagram",
448
  "multiplication fact",
449
+ "multiplication symbol",
450
  "multiplication use class",
451
+ "multiplicative identity",
452
+ "multiplicative inverse",
453
+ "multiplier",
454
+ "mutually exclusive event",
455
+ "natural number",
456
+ "negative association",
457
+ "negative exponent",
458
+ "negative number",
459
+ "negative rational number",
460
  "nested parentheses",
461
  "net score",
462
  "net weight",
463
+ "net",
464
  "nonagon",
465
  "nonconvex polygon",
466
+ "nonlinear",
467
+ "normal distribution",
468
  "normal span",
469
+ "normal",
470
+ "number bond",
471
+ "number disk",
472
  "number grid",
473
+ "number line",
474
+ "number path",
475
+ "number sentence",
476
  "number sequence",
477
  "numeral",
478
  "numeration",
479
+ "numerator",
480
+ "numerical data",
481
+ "numerical",
482
+ "obtuse",
483
+ "octagon",
484
+ "octagonal",
485
  "octahedron",
486
+ "odd number",
487
  "open proportion",
 
488
  "operation symbol",
489
+ "operational",
490
  "opposite angle",
491
  "opposite change rule",
492
  "opposite of a number",
493
  "opposite side",
494
+ "opposite vertex",
495
+ "opposite",
496
  "order of magnitude",
497
  "order of operations",
498
  "order of rotation symmetry",
499
+ "order of",
500
+ "ordered pair",
501
+ "ordered",
502
  "ordinal number",
503
+ "orthogonal",
504
+ "ounce",
505
+ "outlier",
506
+ "pace",
507
  "pan balance",
508
  "parabola",
509
  "parallel lines",
510
+ "parallel plane",
511
+ "parallel",
512
+ "parallelogram",
513
+ "parentheses",
514
  "part to part ratio",
515
  "part to whole ratio",
516
  "part whole fraction",
517
  "partial differences subtraction",
518
+ "partial product",
519
  "partial products multiplication",
520
  "partial quotients division",
521
  "partial sums addition",
522
+ "partition",
523
  "partitive division",
524
  "parts and total diagram",
525
+ "pentagon",
526
+ "pentagonal",
527
  "per capita",
528
  "per unit rate",
529
+ "per",
530
  "percent circle",
531
+ "percent",
532
+ "percentage",
533
  "perfect number",
534
+ "perfect square",
535
+ "perfect triangle",
536
+ "perimeter",
537
+ "permutation",
538
+ "perpendicular",
539
  "perpetual calendar",
540
+ "pi",
541
+ "picture graph",
542
  "pie graph",
543
+ "pint",
544
+ "pivot",
545
+ "place value",
546
  "plane figure",
547
+ "plane",
548
  "point symmetry",
549
+ "point",
550
+ "polar coordinate",
551
+ "polygon",
552
+ "polyhedron",
553
+ "polynominal"
554
  "population density",
555
+ "population",
556
+ "positive association",
557
+ "positive number",
558
+ "pound",
559
+ "power",
560
  "precise",
561
  "predict",
562
  "prediction line",
563
  "preimage",
564
+ "prime factor",
565
  "prime factorization",
566
  "prime meridian",
567
+ "prime number",
568
+ "prism",
569
  "probability meter",
570
  "probability tree diagram",
571
+ "probability",
572
+ "product",
573
  "proper factor",
574
  "proper fraction",
575
  "property",
576
+ "proportion",
577
+ "proportional",
578
+ "proportionality",
579
+ "protractor",
580
+ "pyramid",
581
+ "pythagorean theorem",
582
  "quadrangle",
583
+ "quadrant",
584
+ "quadratic",
585
+ "quadrilateral",
586
+ "quart",
587
+ "quarter circle",
588
+ "quarter of",
589
+ "quarter-circle",
590
+ "quartile",
591
  "quick common denominator",
592
+ "quotient",
593
  "quotitive division",
594
+ "radian",
595
+ "radius of"
596
+ "radius",
597
  "random draw",
598
  "random experiment",
599
  "random number",
600
  "random sample",
601
+ "random",
602
+ "range",
603
  "rank",
604
  "rate diagram",
605
  "rate multiplication ",
606
+ "rate of change",
607
  "rate unit",
608
+ "rate",
609
+ "ratio of",
610
+ "ratio",
611
+ "rational equation",
612
+ "rational number",
613
+ "ray",
614
+ "real number",
615
  "recall survey",
616
+ "reciprocal",
617
+ "rectang",
618
+ "rectangle",
619
  "rectangular array",
620
  "rectangular coordinate grid",
621
  "rectangular prism",
622
  "rectangular pyramid",
623
+ "rectangular",
624
  "rectilinear figure",
625
  "reflection",
626
  "reflex angle",
627
+ "region",
628
+ "regular polygon",
629
  "regular polyhedron",
630
  "regular tessellation",
631
  "relation symbol",
632
+ "relative frequency",
633
+ "remainder",
634
+ "repeated addition",
635
+ "repeating decimal",
636
+ "representative",
637
  "revolution",
638
+ "rhombus",
639
+ "right angle",
640
  "right cone",
641
  "right cylinder",
642
  "right prism",
643
  "right pyramid",
644
  "right triangle",
645
+ "rigid transformation",
646
  "roman numerals",
647
+ "root",
648
+ "rotate",
649
  "rotation symmetry",
650
+ "rotation",
651
+ "round off",
652
+ "round-off",
653
+ "ruler",
654
  "same change rule for subtraction",
655
+ "sample",
656
+ "scalar",
657
+ "scale factor",
658
  "scale model",
659
  "scale of a map",
660
  "scale of a number line",
661
+ "scale",
662
+ "scaled graph",
663
+ "scaled",
664
+ "scalene triangle",
665
+ "scalene",
666
+ "scatter plot",
667
+ "scattergram",
668
  "sector",
669
  "segment",
670
+ "semi-circle",
671
+ "semicircle",
672
  "sequence",
673
+ "set",
674
+ "sign",
675
+ "significant digit",
676
+ "significant figure",
677
  "similar figures",
678
+ "similar",
679
  "simpler form",
680
+ "simplify",
681
+ "simulation",
682
  "situtation diagram",
683
+ "skew line",
684
  "slanted",
685
  "slide rule",
686
+ "slope",
687
+ "solid figure",
688
+ "solution",
689
  "span",
690
+ "speed",
691
+ "sphere",
692
+ "square root",
693
+ "square unit",
694
+ "square",
695
+ "squared",
696
  "stacked bar graph",
697
+ "standard form",
698
  "standard unit",
699
+ "statistic",
700
  "stem and leaf plot",
701
  "step graph",
702
+ "straight angle",
703
  "straightedge",
704
+ "subset of"
705
  "substitute",
706
+ "subtract",
707
  "subtrahend",
708
+ "sum of",
709
+ "sum",
710
+ "supplementary angle",
711
+ "surface area",
712
  "surface",
713
+ "survey",
714
  "symmetric",
715
+ "symmetry",
716
+ "system of equation",
717
+ "system of",
718
+ "table",
719
+ "take from ten",
720
  "tally",
721
+ "tangent circle",
722
  "tangent",
723
+ "tangram",
724
+ "tape diagram",
725
  "temperature",
726
  "template",
727
+ "tens place",
728
+ "tenth",
729
+ "term",
730
+ "terminating decimal",
731
+ "tessellat",
732
+ "tessellate",
733
+ "tessellation",
734
  "tetrahedron",
735
+ "tetromino",
736
  "theorem",
737
+ "thermometer",
738
+ "thousand",
739
+ "thousandth",
740
  "tile",
741
  "tiling",
742
  "time graph",
 
744
  "top heavy fraction",
745
  "topological",
746
  "topology",
747
+ "total area",
748
+ "total of",
749
+ "total surface",
750
+ "total volume",
751
  "trade first subtraction",
752
+ "transformation",
753
+ "translation",
754
+ "transversal",
755
+ "trapezoid",
756
  "tree diagram",
757
+ "triangle",
758
  "triangular",
759
  "true number sentence",
760
  "truncate",
761
+ "twin prime",
762
+ "two-way table",
763
+ "unit cube",
764
+ "unit form",
765
+ "unit fraction",
766
+ "unit interval",
767
+ "unit price",
768
+ "unit rate",
769
+ "unit square",
770
+ "unit",
771
+ "unknown",
772
+ "unlike denominator",
773
+ "unlike fraction",
774
+ "value",
775
  "vanishing ",
776
+ "variability",
777
+ "variable",
778
+ "velocity",
779
  "venn diagram",
780
  "vernal equinox",
781
+ "vertex",
782
+ "vertical",
783
+ "volume of",
784
+ "volume",
785
  "weight",
786
+ "whole number",
787
+ "whole unit",
788
+ "whole",
789
  "width",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
  "withdrawal",
791
+ "word form",
792
+ "x axes",
793
+ "x axis",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
  "x intercept",
795
+ "x-axes",
796
  "x-axis",
797
+ "y axes",
798
+ "y axis",
799
  "y intercept",
800
+ "y-axes",
801
  "y-axis",
802
  "y-intercept",
803
+ "yard",
804
  "zero property of multiplication",
805
+ "zero",
 
 
 
 
 
 
 
 
 
 
 
 
 
806
  ]
807
 
808
+
809
+ p = inflect.engine()
810
+
811
+ def singular_to_plural(word):
812
+ """Convert singular words to plural using inflect."""
813
+ plural = p.plural(word)
814
+ return plural or word
815
+
816
+ def plural_to_singular(word):
817
+ """Convert plural word to singular using inflect."""
818
+ return p.singular_noun(word) or word
819
+
820
+ plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
821
+
822
+ MATH_WORDS += plural_MATH_WORDS
823
+
824
  def get_num_words(text):
825
  if not isinstance(text, str):
826
  print("%s is not a string" % text)