ikarasz commited on
Commit
3f3a914
·
1 Parent(s): 50279c7

update math words

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. utils.py +509 -410
requirements.txt CHANGED
@@ -5,3 +5,4 @@ scipy==1.9.2
5
  torch==2.3.1
6
  transformers==4.46.1
7
  nltk==3.9.1
 
 
5
  torch==2.3.1
6
  transformers==4.46.1
7
  nltk==3.9.1
8
+ inflect==7.5.0
utils.py CHANGED
@@ -7,6 +7,7 @@ from cleantext import clean
7
  from num2words import num2words
8
  import re
9
  import string
 
10
 
11
  punct_chars = list((set(string.punctuation) | {'’', '‘', '–', '—', '~', '|', '“', '”', '…', "'", "`", '_'}))
12
  punct_chars.sort()
@@ -34,530 +35,708 @@ MATH_PREFIXES = [
34
  "median",
35
  "ratio",
36
  "area",
37
- ]
38
 
39
- MATH_WORDS = [
40
- "absolute value",
41
- "algebra",
42
- "area",
43
- "average",
44
- "base of",
45
- "box plot",
46
- "categorical",
47
- "coefficient",
48
- "common factor",
49
- "common multiple",
50
- "compose",
51
- "coordinate",
52
- "cubed",
53
- "decompose",
54
- "dependent variable",
55
- "distribution",
56
- "dot plot",
57
- "double number line diagram",
58
- "equivalent",
59
- "equivalent expression",
60
- "ratio",
61
- "exponent",
62
- "frequency",
63
- "greatest common factor",
64
- "gcd",
65
- "height of",
66
- "histogram",
67
- "independent variable",
68
- "integer",
69
- "interquartile range",
70
- "iqr",
71
- "least common multiple",
72
- "long division",
73
- "mean absolute deviation",
74
- "median",
75
- "negative number",
76
- "opposite vertex",
77
- "parallelogram",
78
- "percent",
79
- "polygon",
80
- "polyhedron",
81
- "positive number",
82
- "prism",
83
- "pyramid",
84
- "quadrant",
85
- "quadrilateral",
86
- "quartile",
87
- "rational number",
88
- "reciprocal",
89
- "equality",
90
- "inequality",
91
- "squared",
92
- "statistic",
93
- "surface area",
94
- "identity property",
95
- "addend",
96
- "unit",
97
- "number sentence",
98
- "make ten",
99
- "take from ten",
100
- "number bond",
101
- "total",
102
- "estimate",
103
- "hashmark",
104
- "meter",
105
- "number line",
106
- "ruler",
107
- "centimeter",
108
- "base ten",
109
- "expanded form",
110
- "hundred",
111
- "thousand",
112
- "place value",
113
- "number disk",
114
- "standard form",
115
- "unit form",
116
- "word form",
117
- "tens place",
118
- "algorithm",
119
- "equation",
120
- "simplif",
121
- "addition",
122
- "subtract",
123
- "array",
124
- "even number",
125
- "odd number",
126
- "repeated addition",
127
- "tessellat",
128
- "whole number",
129
- "number path",
130
- "rectangle",
131
- "square",
132
- "bar graph",
133
- "data",
134
- "degree",
135
- "line plot",
136
- "picture graph",
137
- "scale",
138
- "survey",
139
- "thermometer",
140
- "estimat",
141
- "tape diagram",
142
- "value",
143
- "analog",
144
- "angle",
145
- "parallel",
146
- "partition",
147
- "pentagon",
148
- "right angle",
149
- "cube",
150
- "digital",
151
- "quarter of",
152
- "tangram",
153
- "circle",
154
- "hexagon",
155
- "half circle",
156
- "half-circle",
157
- "quarter circle",
158
- "quarter-circle",
159
- "semicircle",
160
- "semi-circle",
161
- "rectang",
162
- "rhombus",
163
- "trapezoid",
164
- "triangle",
165
- "commutative",
166
- "equal group",
167
- "distributive",
168
- "divide",
169
- "division",
170
  "multipl",
171
- "parentheses",
172
- "quotient",
173
- "rotate",
174
- "unknown",
175
- "add",
176
- "capacity",
177
- "continuous",
178
- "endpoint",
179
- "gram",
180
- "interval",
181
- "kilogram",
182
- "volume",
183
- "liter",
184
- "milliliter",
185
- "approximate",
186
- "area model",
187
- "square unit",
188
- "unit square",
189
  "geometr",
190
- "equivalent fraction",
191
- "fraction form",
192
- "fractional unit",
193
- "unit fraction",
194
- "unit interval",
195
- "measur",
196
- "graph",
197
- "scaled graph",
198
- "diagonal",
199
- "perimeter",
200
- "regular polygon",
201
- "tessellate",
202
- "tetromino",
203
- "heptagon",
204
- "octagon",
205
- "digit",
206
- "expression",
207
- "sum",
208
- "kilometer",
209
- "mass",
210
- "mixed unit",
211
- "length",
212
  "measure",
213
- "simplify",
214
- "associative",
215
- "composite",
216
- "divisible",
217
- "divisor",
218
- "partial product",
219
- "prime number",
220
- "remainder",
221
- "acute",
222
- "arc",
223
- "collinear",
224
- "equilateral",
225
- "intersect",
226
- "isosceles",
227
- "symmetry",
228
- "line segment",
229
- "line",
230
- "obtuse",
231
- "perpendicular",
232
- "protractor",
233
- "scalene",
234
- "straight angle",
235
- "supplementary angle",
236
- "vertex",
237
- "common denominator",
238
- "denominator",
239
- "fraction",
240
- "mixed number",
241
- "numerator",
242
- "whole",
243
- "decimal expanded form",
244
- "decimal",
245
- "hundredth",
246
- "tenth",
247
- "customary system of measurement",
248
- "customary unit",
249
- "gallon",
250
- "metric",
251
- "metric unit",
252
- "ounce",
253
- "pint",
254
- "quart",
255
- "convert",
256
- "distance",
257
- "millimeter",
258
- "thousandth",
259
- "hundredths",
260
- "conversion factor",
261
- "decimal fraction",
262
- "multiplier",
263
- "equivalence",
264
- "multiple",
265
- "product",
266
- "benchmark fraction",
267
- "cup",
268
- "pound",
269
- "yard",
270
- "whole unit",
271
- "decimal divisor",
272
- "factors",
273
- "bisect",
274
- "cubic units",
275
- "hierarchy",
276
- "unit cube",
277
- "attribute",
278
- "kite",
279
- "bisector",
280
- "solid figure",
281
- "square units",
282
- "dimension",
283
- "axis",
284
- "ordered pair",
285
- "angle measure",
286
- "horizontal",
287
- "vertical",
288
- "categorical data",
289
- "lcm",
290
- "measure of center",
291
- "meters per second",
292
- "numerical",
293
- "solution",
294
- "unit price",
295
- "unit rate",
296
- "variability",
297
- "variable",
298
  "abundant number",
299
  "accurate",
300
  "acre",
 
 
 
301
  "addition fact",
 
 
 
 
 
302
  "algebraic",
 
 
303
  "altitude",
 
 
 
 
304
  "apex",
305
- "arithmetic facts",
 
 
 
 
 
 
306
  "associative property",
 
307
  "astronomical unit",
 
 
 
 
 
 
 
 
 
 
 
308
  "base",
309
  "baseline",
 
310
  "billion",
 
 
 
 
 
 
 
 
 
311
  "celsius",
312
  "census",
313
  "cent",
314
  "center of a circle",
 
315
  "center of a sphere",
 
 
 
 
 
316
  "chance",
 
317
  "circle graph",
 
 
 
 
 
 
 
318
  "column",
 
319
  "combine",
 
 
320
  "common fraction",
 
 
 
321
  "comparison diagram",
322
  "comparison story",
323
  "compass",
324
  "complement",
 
 
 
325
  "concave polygon",
326
  "concentric circles",
 
 
 
327
  "consecutive",
 
328
  "constant",
329
  "continuous model of area",
330
  "continuous model of volume",
 
331
  "contour",
332
  "conversion fact",
 
 
 
333
  "convex polygon",
 
 
 
 
334
  "counting numbers",
335
  "counting up subtraction",
 
 
336
  "cover-up method",
337
  "cross multiplication",
 
 
 
 
 
 
 
338
  "cubic",
339
  "cubit",
 
340
  "curved surface",
 
 
341
  "cylinder",
 
 
342
  "decagon",
 
 
 
 
 
343
  "decimeter",
 
344
  "deficient number",
 
 
 
345
  "density",
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  "discrete model",
347
  "displacement method",
 
 
 
 
 
348
  "divisibility test",
349
  "divisible by",
 
 
 
350
  "dodecahedron",
 
 
351
  "double stem plot",
352
  "doubles fact",
 
353
  "egyptian multiplication",
354
  "elevation",
355
  "embed figure",
356
  "end point",
 
357
  "enlarge",
 
 
358
  "equal",
359
- "equal groups",
360
- "equal parts",
361
- "equidistant marks",
362
  "equilateral polygon",
363
- "equivalent fractions",
 
 
 
 
 
 
 
 
 
364
  "european subtraction",
 
 
 
 
365
  "expanded notation",
366
  "expected outcome",
367
- "exponential",
368
- "extended facts",
 
 
 
 
 
369
  "fact power",
370
  "fact triangle",
371
  "factor",
372
- "factors of numbers",
 
373
  "fahrenheit",
374
  "false number sentence",
375
- "figurate numbers",
376
  "flowchart",
377
  "fluid ounce",
 
 
 
378
  "fractional part",
 
 
379
  "fulcrum",
380
  "function machine",
 
381
  "furlong",
 
 
382
  "genus",
383
  "geoboard",
 
384
  "geometric solid",
385
  "geometry template",
386
  "girth",
387
  "golden ratio",
388
  "golden rectangle",
 
389
  "graph key",
 
 
 
390
  "grouping symbol",
 
 
 
 
 
 
391
  "hemisphere",
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  "icosahedron",
 
 
 
 
393
  "improper fraction",
394
  "inch",
395
- "index of locations",
 
 
 
 
396
  "indirect measurement",
 
 
397
  "input",
 
398
  "inscribed polygon",
399
  "instance of a pattern",
 
 
 
 
400
  "interior of a figure",
401
  "interpolate",
 
 
 
 
 
 
 
 
402
  "irrational",
403
  "isometry transformation",
404
  "isosceles trapezoid",
 
 
 
 
405
  "juxtapose",
406
  "key sequence",
 
 
 
407
  "label",
408
  "landmark",
409
  "latitude",
410
  "lattice multiplication",
 
 
 
411
  "left to right subtraction",
412
  "leg of a right triangle",
 
 
 
413
  "like terms",
414
  "line graph",
415
  "line of reflection",
416
  "line of symmetry",
 
 
417
  "line symmetry",
 
 
418
  "lines of latitude",
419
  "lines of longitude",
 
 
 
 
 
 
 
 
 
420
  "longitude",
 
421
  "magnitude estimate",
 
422
  "map legend",
423
  "map scale",
 
424
  "maximum",
 
 
 
 
 
425
  "measurement division",
 
426
  "measurement unit",
 
427
  "meridian bar",
 
 
428
  "metric system",
 
 
429
  "midpoint",
430
  "mile",
 
 
431
  "millisecond",
432
  "minimum",
433
  "minuend",
434
  "mirror image",
 
 
435
  "mobius",
436
  "modal",
 
 
 
437
  "multiplication counting principle",
438
  "multiplication diagram",
439
  "multiplication fact",
440
- "multiplication symbols",
441
  "multiplication use class",
442
- "negative rational numbers",
 
 
 
 
 
 
 
 
443
  "nested parentheses",
444
  "net score",
445
  "net weight",
 
446
  "nonagon",
447
  "nonconvex polygon",
 
 
448
  "normal span",
 
 
 
449
  "number grid",
 
 
 
450
  "number sequence",
451
  "numeral",
452
  "numeration",
 
 
 
 
 
 
453
  "octahedron",
 
454
  "open proportion",
455
- "operation",
456
  "operation symbol",
 
457
  "opposite angle",
458
  "opposite change rule",
459
  "opposite of a number",
460
  "opposite side",
 
 
461
  "order of magnitude",
462
  "order of operations",
463
  "order of rotation symmetry",
 
 
 
464
  "ordinal number",
 
 
 
 
465
  "pan balance",
466
  "parabola",
467
  "parallel lines",
468
- "parallel planes",
 
 
 
469
  "part to part ratio",
470
  "part to whole ratio",
471
  "part whole fraction",
472
  "partial differences subtraction",
 
473
  "partial products multiplication",
474
  "partial quotients division",
475
  "partial sums addition",
 
476
  "partitive division",
477
  "parts and total diagram",
 
 
478
  "per capita",
479
  "per unit rate",
 
480
  "percent circle",
 
 
481
  "perfect number",
 
 
 
 
 
482
  "perpetual calendar",
 
 
483
  "pie graph",
484
- "plane",
 
 
485
  "plane figure",
 
486
  "point symmetry",
 
 
 
 
 
487
  "population density",
 
 
 
 
 
488
  "precise",
489
  "predict",
490
  "prediction line",
491
  "preimage",
 
492
  "prime factorization",
493
  "prime meridian",
494
- "probability",
 
495
  "probability meter",
496
  "probability tree diagram",
 
 
497
  "proper factor",
498
  "proper fraction",
499
  "property",
 
 
 
 
 
 
500
  "quadrangle",
 
 
 
 
 
 
 
 
501
  "quick common denominator",
 
502
  "quotitive division",
 
 
 
503
  "random draw",
504
  "random experiment",
505
  "random number",
506
  "random sample",
 
 
507
  "rank",
508
  "rate diagram",
509
  "rate multiplication ",
 
510
  "rate unit",
 
 
 
 
 
 
 
511
  "recall survey",
 
 
 
512
  "rectangular array",
513
  "rectangular coordinate grid",
514
  "rectangular prism",
515
  "rectangular pyramid",
 
516
  "rectilinear figure",
517
  "reflection",
518
  "reflex angle",
 
 
519
  "regular polyhedron",
520
  "regular tessellation",
521
  "relation symbol",
 
 
 
 
 
522
  "revolution",
 
 
523
  "right cone",
524
  "right cylinder",
525
  "right prism",
526
  "right pyramid",
527
  "right triangle",
 
528
  "roman numerals",
 
 
529
  "rotation symmetry",
 
 
 
 
530
  "same change rule for subtraction",
 
 
 
531
  "scale model",
532
  "scale of a map",
533
  "scale of a number line",
 
 
 
 
 
 
 
534
  "sector",
535
  "segment",
 
 
536
  "sequence",
537
- "significant digits",
 
 
 
538
  "similar figures",
 
539
  "simpler form",
 
 
540
  "situtation diagram",
541
- "skew lines",
542
  "slanted",
543
  "slide rule",
 
 
 
544
  "span",
 
 
 
 
 
 
545
  "stacked bar graph",
 
546
  "standard unit",
 
547
  "stem and leaf plot",
548
  "step graph",
 
549
  "straightedge",
 
550
  "substitute",
 
551
  "subtrahend",
 
 
 
 
552
  "surface",
 
553
  "symmetric",
 
 
 
 
 
554
  "tally",
 
555
  "tangent",
556
- "tangent circles",
 
557
  "temperature",
558
  "template",
 
 
 
 
 
 
 
559
  "tetrahedron",
 
560
  "theorem",
 
 
 
561
  "tile",
562
  "tiling",
563
  "time graph",
@@ -565,159 +744,79 @@ MATH_WORDS = [
565
  "top heavy fraction",
566
  "topological",
567
  "topology",
 
 
 
 
568
  "trade first subtraction",
 
 
 
 
569
  "tree diagram",
 
570
  "triangular",
571
  "true number sentence",
572
  "truncate",
573
- "twin primes",
574
- "unlike denominators",
575
- "unlike fractions",
 
 
 
 
 
 
 
 
 
 
 
576
  "vanishing ",
 
 
 
577
  "venn diagram",
578
  "vernal equinox",
 
 
 
 
579
  "weight",
 
 
 
580
  "width",
581
- "base of a prism",
582
- "base of a pyramid",
583
- "face",
584
- "numerical data",
585
- "opposite",
586
- "pace",
587
- "per",
588
- "region",
589
- "sign",
590
- "alternate interior angles",
591
- "base of an exponent",
592
- "cone",
593
- "congruent",
594
- "counterclockwise",
595
- "cube root",
596
- "hypotenuse",
597
- "irrational number",
598
- "linear relationship",
599
- "positive association",
600
- "rate of change",
601
- "translation",
602
- "transversal",
603
- "circumference",
604
- "corresponding",
605
- "expand",
606
- "population",
607
- "proportion",
608
- "radius",
609
- "random",
610
- "repeating decimal",
611
- "representative",
612
- "scaled",
613
  "withdrawal",
614
- "center",
615
- "edge",
616
- "height of a parallelogram or triangle",
617
- "net",
618
- "speed",
619
- "table",
620
- "term",
621
- "adjacent",
622
- "complementary",
623
- "cross-section",
624
- "cross section",
625
- "deposit",
626
- "event",
627
- "measurement error",
628
- "proportional",
629
- "simulation",
630
- "center of a dilation",
631
- "clockwise",
632
- "dilation",
633
- "function",
634
- "negative association",
635
- "pythagorean theorem",
636
- "relative frequency",
637
- "rigid transformation",
638
- "scale factor",
639
- "scatter plot",
640
- "similar",
641
- "sphere",
642
- "two-way table",
643
- "additive identity",
644
- "additive inverse",
645
- "box and whisker plot",
646
- "cartesian coordinates",
647
- "central angle",
648
- "chord",
649
- "combination",
650
- "commutative property",
651
- "coplanar",
652
- "cross product",
653
- "dependent events",
654
- "difference",
655
- "dividend",
656
- "equilateral triangle",
657
- "error of measurement",
658
- "factorial",
659
- "formula",
660
- "identity property of",
661
- "independent events",
662
- "infinity",
663
- "inscribed angle",
664
- "intercept",
665
- "intercepted arc",
666
- "inverse",
667
- "inverse operations",
668
- "isosceles triangle",
669
- "least common denominator",
670
- "like fractions",
671
- "locus",
672
- "logic",
673
- "lowest terms",
674
- "mode",
675
- "multiplicative identity",
676
- "multiplicative inverse",
677
- "mutually exclusive events",
678
- "natural numbers",
679
- "normal",
680
- "permutation",
681
- "pi",
682
- "point",
683
- "power",
684
- "range",
685
- "rate",
686
- "ray",
687
- "real numbers",
688
- "rectangular",
689
- "root",
690
- "rotation",
691
- "scalene triangle",
692
- "scattergram",
693
- "set",
694
- "statistics",
695
- "terminating decimal",
696
- "transformation",
697
  "x intercept",
 
698
  "x-axis",
699
- "x-intercept",
 
700
  "y intercept",
 
701
  "y-axis",
702
  "y-intercept",
703
- "zero",
704
  "zero property of multiplication",
705
- "base of a parallelogram",
706
- "base of a triangle",
707
- "height",
708
- "chance experiment",
709
- "diameter",
710
- "mean",
711
- "percentage",
712
- "sample",
713
- "legs",
714
- "outlier",
715
- "slope",
716
- "square root",
717
- "system of equations",
718
- "tessellation",
719
  ]
720
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  def get_num_words(text):
722
  if not isinstance(text, str):
723
  print("%s is not a string" % text)
 
7
  from num2words import num2words
8
  import re
9
  import string
10
+ import inflect
11
 
12
  punct_chars = list((set(string.punctuation) | {'’', '‘', '–', '—', '~', '|', '“', '”', '…', "'", "`", '_'}))
13
  punct_chars.sort()
 
35
  "median",
36
  "ratio",
37
  "area",
 
38
 
39
+ # added
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  "multipl",
41
+ "divid",
42
+ "subtrac",
43
+ "logarit",
44
+ "algebr",
45
+ "calcul",
46
+ "matri",
47
+ "vect",
 
 
 
 
 
 
 
 
 
 
 
48
  "geometr",
49
+ "statist",
50
+ "probabli",
51
+ "coeffi",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  "measure",
53
+ "simplif"
54
+ ]
55
+
56
+ MATH_WORDS = [
57
+ "absolute deviation",
58
+ "absolute value",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "abundant number",
60
  "accurate",
61
  "acre",
62
+ "acute",
63
+ "add",
64
+ "addend",
65
  "addition fact",
66
+ "addition",
67
+ "additive identity",
68
+ "additive inverse",
69
+ "adjacent",
70
+ "algebra",
71
  "algebraic",
72
+ "algorithm",
73
+ "alternate interior angle",
74
  "altitude",
75
+ "analog",
76
+ "angle measure",
77
+ "angle",
78
+ "angular",
79
  "apex",
80
+ "approximate",
81
+ "arc",
82
+ "area model",
83
+ "area",
84
+ "arithmetic fact",
85
+ "arithmetic",
86
+ "array",
87
  "associative property",
88
+ "associative",
89
  "astronomical unit",
90
+ "attribute",
91
+ "average",
92
+ "axis",
93
+ "bar graph",
94
+ "base of a parallelogram",
95
+ "base of a prism",
96
+ "base of a pyramid",
97
+ "base of a triangle",
98
+ "base of an exponent",
99
+ "base of",
100
+ "base ten",
101
  "base",
102
  "baseline",
103
+ "benchmark fraction",
104
  "billion",
105
+ "binomial",
106
+ "bisect",
107
+ "bisector",
108
+ "box and whisker plot",
109
+ "box plot",
110
+ "capacity",
111
+ "cartesian coordinate",
112
+ "categorical data",
113
+ "categorical",
114
  "celsius",
115
  "census",
116
  "cent",
117
  "center of a circle",
118
+ "center of a dilation",
119
  "center of a sphere",
120
+ "center",
121
+ "centimeter",
122
+ "central angle",
123
+ "centroid",
124
+ "chance experiment",
125
  "chance",
126
+ "chord",
127
  "circle graph",
128
+ "circle",
129
+ "circular",
130
+ "circumference",
131
+ "clockwise",
132
+ "coefficient",
133
+ "collinear",
134
+ "column matrix"
135
  "column",
136
+ "combination",
137
  "combine",
138
+ "common denominator",
139
+ "common factor",
140
  "common fraction",
141
+ "common multiple",
142
+ "commutative property",
143
+ "commutative",
144
  "comparison diagram",
145
  "comparison story",
146
  "compass",
147
  "complement",
148
+ "complementary",
149
+ "compose",
150
+ "composite",
151
  "concave polygon",
152
  "concentric circles",
153
+ "concentric",
154
+ "cone",
155
+ "congruent",
156
  "consecutive",
157
+ "constant function",
158
  "constant",
159
  "continuous model of area",
160
  "continuous model of volume",
161
+ "continuous",
162
  "contour",
163
  "conversion fact",
164
+ "conversion factor",
165
+ "convert",
166
+ "convex function",
167
  "convex polygon",
168
+ "coordinate",
169
+ "coplanar",
170
+ "corresponding",
171
+ "counterclockwise",
172
  "counting numbers",
173
  "counting up subtraction",
174
+ "covariance",
175
+ "covariate",
176
  "cover-up method",
177
  "cross multiplication",
178
+ "cross product",
179
+ "cross section",
180
+ "cross-section",
181
+ "cube root",
182
+ "cube",
183
+ "cubed",
184
+ "cubic unit",
185
  "cubic",
186
  "cubit",
187
+ "cup",
188
  "curved surface",
189
+ "customary system of measurement",
190
+ "customary unit",
191
  "cylinder",
192
+ "cylindrical",
193
+ "data",
194
  "decagon",
195
+ "decimal divisor",
196
+ "decimal expanded form",
197
+ "decimal fraction",
198
+ "decimal point",
199
+ "decimal",
200
  "decimeter",
201
+ "decompose",
202
  "deficient number",
203
+ "degree",
204
+ "delta",
205
+ "denominator",
206
  "density",
207
+ "dependent event",
208
+ "dependent variable",
209
+ "deposit",
210
+ "derivative",
211
+ "determinant",
212
+ "diagonal",
213
+ "diameter",
214
+ "difference",
215
+ "differential"
216
+ "digit",
217
+ "digital",
218
+ "dilation",
219
+ "dimension",
220
  "discrete model",
221
  "displacement method",
222
+ "distance",
223
+ "distribution",
224
+ "distributive",
225
+ "divide",
226
+ "dividend",
227
  "divisibility test",
228
  "divisible by",
229
+ "divisible",
230
+ "division",
231
+ "divisor",
232
  "dodecahedron",
233
+ "dot plot",
234
+ "double number line diagram",
235
  "double stem plot",
236
  "doubles fact",
237
+ "edge",
238
  "egyptian multiplication",
239
  "elevation",
240
  "embed figure",
241
  "end point",
242
+ "endpoint",
243
  "enlarge",
244
+ "equal group",
245
+ "equal part",
246
  "equal",
247
+ "equality",
248
+ "equation",
249
+ "equidistant mark",
250
  "equilateral polygon",
251
+ "equilateral triangle",
252
+ "equilateral",
253
+ "equivalence",
254
+ "equivalent expression",
255
+ "equivalent fraction",
256
+ "equivalent",
257
+ "error bound",
258
+ "error of measurement",
259
+ "estimat",
260
+ "estimate",
261
  "european subtraction",
262
+ "even number",
263
+ "event",
264
+ "expand",
265
+ "expanded form",
266
  "expanded notation",
267
  "expected outcome",
268
+ "expected value",
269
+ "exponent",
270
+ "exponential function",
271
+ "exponential growth",
272
+ "expression",
273
+ "extended fact",
274
+ "face",
275
  "fact power",
276
  "fact triangle",
277
  "factor",
278
+ "factorial",
279
+ "factors of number",
280
  "fahrenheit",
281
  "false number sentence",
282
+ "figurate number",
283
  "flowchart",
284
  "fluid ounce",
285
+ "formula",
286
+ "fraction form",
287
+ "fraction",
288
  "fractional part",
289
+ "fractional unit",
290
+ "frequency",
291
  "fulcrum",
292
  "function machine",
293
+ "function",
294
  "furlong",
295
+ "gallon",
296
+ "gcd",
297
  "genus",
298
  "geoboard",
299
+ "geometr",
300
  "geometric solid",
301
  "geometry template",
302
  "girth",
303
  "golden ratio",
304
  "golden rectangle",
305
+ "gram",
306
  "graph key",
307
+ "graph",
308
+ "greatest common divisor"
309
+ "greatest common factor",
310
  "grouping symbol",
311
+ "half circle",
312
+ "half-circle",
313
+ "hashmark",
314
+ "height of a parallelogram or triangle",
315
+ "height of",
316
+ "height",
317
  "hemisphere",
318
+ "heptagon",
319
+ "heptagonal",
320
+ "hexagon",
321
+ "hexagonal",
322
+ "hierarchy",
323
+ "histogram",
324
+ "horizontal shift",
325
+ "horizontal stretch",
326
+ "horizontal",
327
+ "hundred",
328
+ "hundredth",
329
+ "hypotenuse",
330
+ "hypothesis",
331
  "icosahedron",
332
+ "identity function",
333
+ "identity matrix",
334
+ "identity property of",
335
+ "identity property",
336
  "improper fraction",
337
  "inch",
338
+ "incircle",
339
+ "indefinite integral",
340
+ "independent event",
341
+ "independent variable",
342
+ "index of location",
343
  "indirect measurement",
344
+ "inequality",
345
+ "infinity",
346
  "input",
347
+ "inscribed angle",
348
  "inscribed polygon",
349
  "instance of a pattern",
350
+ "integer",
351
+ "intercept",
352
+ "intercepted arc",
353
+ "interior angle",
354
  "interior of a figure",
355
  "interpolate",
356
+ "interquartile range",
357
+ "intersect",
358
+ "interval",
359
+ "inverse operation",
360
+ "inverse",
361
+ "iqr",
362
+ "irrational number",
363
+ "irrational root",
364
  "irrational",
365
  "isometry transformation",
366
  "isosceles trapezoid",
367
+ "isosceles triangle",
368
+ "isosceles",
369
+ "joint probability",
370
+ "joint variation",
371
  "juxtapose",
372
  "key sequence",
373
+ "kilogram",
374
+ "kilometer",
375
+ "kite",
376
  "label",
377
  "landmark",
378
  "latitude",
379
  "lattice multiplication",
380
+ "lcm",
381
+ "least common denominator",
382
+ "least common multiple",
383
  "left to right subtraction",
384
  "leg of a right triangle",
385
+ "legs",
386
+ "length",
387
+ "like fraction",
388
  "like terms",
389
  "line graph",
390
  "line of reflection",
391
  "line of symmetry",
392
+ "line plot",
393
+ "line segment",
394
  "line symmetry",
395
+ "line",
396
+ "linear relationship",
397
  "lines of latitude",
398
  "lines of longitude",
399
+ "liter",
400
+ "local maximum",
401
+ "local minimum",
402
+ "locus",
403
+ "logarithm",
404
+ "logarithmic function",
405
+ "logarithmic scale",
406
+ "logic",
407
+ "long division",
408
  "longitude",
409
+ "lowest term",
410
  "magnitude estimate",
411
+ "make ten",
412
  "map legend",
413
  "map scale",
414
+ "mass",
415
  "maximum",
416
+ "mean absolute deviation",
417
+ "mean value",
418
+ "mean",
419
+ "measure of center",
420
+ "measure",
421
  "measurement division",
422
+ "measurement error",
423
  "measurement unit",
424
+ "median",
425
  "meridian bar",
426
+ "meter",
427
+ "meters per second",
428
  "metric system",
429
+ "metric unit",
430
+ "metric",
431
  "midpoint",
432
  "mile",
433
+ "milliliter",
434
+ "millimeter",
435
  "millisecond",
436
  "minimum",
437
  "minuend",
438
  "mirror image",
439
+ "mixed number",
440
+ "mixed unit",
441
  "mobius",
442
  "modal",
443
+ "mode",
444
+ "multipl",
445
+ "multiple",
446
  "multiplication counting principle",
447
  "multiplication diagram",
448
  "multiplication fact",
449
+ "multiplication symbol",
450
  "multiplication use class",
451
+ "multiplicative identity",
452
+ "multiplicative inverse",
453
+ "multiplier",
454
+ "mutually exclusive event",
455
+ "natural number",
456
+ "negative association",
457
+ "negative exponent",
458
+ "negative number",
459
+ "negative rational number",
460
  "nested parentheses",
461
  "net score",
462
  "net weight",
463
+ "net",
464
  "nonagon",
465
  "nonconvex polygon",
466
+ "nonlinear",
467
+ "normal distribution",
468
  "normal span",
469
+ "normal",
470
+ "number bond",
471
+ "number disk",
472
  "number grid",
473
+ "number line",
474
+ "number path",
475
+ "number sentence",
476
  "number sequence",
477
  "numeral",
478
  "numeration",
479
+ "numerator",
480
+ "numerical data",
481
+ "numerical",
482
+ "obtuse",
483
+ "octagon",
484
+ "octagonal",
485
  "octahedron",
486
+ "odd number",
487
  "open proportion",
 
488
  "operation symbol",
489
+ "operational",
490
  "opposite angle",
491
  "opposite change rule",
492
  "opposite of a number",
493
  "opposite side",
494
+ "opposite vertex",
495
+ "opposite",
496
  "order of magnitude",
497
  "order of operations",
498
  "order of rotation symmetry",
499
+ "order of",
500
+ "ordered pair",
501
+ "ordered",
502
  "ordinal number",
503
+ "orthogonal",
504
+ "ounce",
505
+ "outlier",
506
+ "pace",
507
  "pan balance",
508
  "parabola",
509
  "parallel lines",
510
+ "parallel plane",
511
+ "parallel",
512
+ "parallelogram",
513
+ "parentheses",
514
  "part to part ratio",
515
  "part to whole ratio",
516
  "part whole fraction",
517
  "partial differences subtraction",
518
+ "partial product",
519
  "partial products multiplication",
520
  "partial quotients division",
521
  "partial sums addition",
522
+ "partition",
523
  "partitive division",
524
  "parts and total diagram",
525
+ "pentagon",
526
+ "pentagonal",
527
  "per capita",
528
  "per unit rate",
529
+ "per",
530
  "percent circle",
531
+ "percent",
532
+ "percentage",
533
  "perfect number",
534
+ "perfect square",
535
+ "perfect triangle",
536
+ "perimeter",
537
+ "permutation",
538
+ "perpendicular",
539
  "perpetual calendar",
540
+ "pi",
541
+ "picture graph",
542
  "pie graph",
543
+ "pint",
544
+ "pivot",
545
+ "place value",
546
  "plane figure",
547
+ "plane",
548
  "point symmetry",
549
+ "point",
550
+ "polar coordinate",
551
+ "polygon",
552
+ "polyhedron",
553
+ "polynominal"
554
  "population density",
555
+ "population",
556
+ "positive association",
557
+ "positive number",
558
+ "pound",
559
+ "power",
560
  "precise",
561
  "predict",
562
  "prediction line",
563
  "preimage",
564
+ "prime factor",
565
  "prime factorization",
566
  "prime meridian",
567
+ "prime number",
568
+ "prism",
569
  "probability meter",
570
  "probability tree diagram",
571
+ "probability",
572
+ "product",
573
  "proper factor",
574
  "proper fraction",
575
  "property",
576
+ "proportion",
577
+ "proportional",
578
+ "proportionality",
579
+ "protractor",
580
+ "pyramid",
581
+ "pythagorean theorem",
582
  "quadrangle",
583
+ "quadrant",
584
+ "quadratic",
585
+ "quadrilateral",
586
+ "quart",
587
+ "quarter circle",
588
+ "quarter of",
589
+ "quarter-circle",
590
+ "quartile",
591
  "quick common denominator",
592
+ "quotient",
593
  "quotitive division",
594
+ "radian",
595
+ "radius of"
596
+ "radius",
597
  "random draw",
598
  "random experiment",
599
  "random number",
600
  "random sample",
601
+ "random",
602
+ "range",
603
  "rank",
604
  "rate diagram",
605
  "rate multiplication ",
606
+ "rate of change",
607
  "rate unit",
608
+ "rate",
609
+ "ratio of",
610
+ "ratio",
611
+ "rational equation",
612
+ "rational number",
613
+ "ray",
614
+ "real number",
615
  "recall survey",
616
+ "reciprocal",
617
+ "rectang",
618
+ "rectangle",
619
  "rectangular array",
620
  "rectangular coordinate grid",
621
  "rectangular prism",
622
  "rectangular pyramid",
623
+ "rectangular",
624
  "rectilinear figure",
625
  "reflection",
626
  "reflex angle",
627
+ "region",
628
+ "regular polygon",
629
  "regular polyhedron",
630
  "regular tessellation",
631
  "relation symbol",
632
+ "relative frequency",
633
+ "remainder",
634
+ "repeated addition",
635
+ "repeating decimal",
636
+ "representative",
637
  "revolution",
638
+ "rhombus",
639
+ "right angle",
640
  "right cone",
641
  "right cylinder",
642
  "right prism",
643
  "right pyramid",
644
  "right triangle",
645
+ "rigid transformation",
646
  "roman numerals",
647
+ "root",
648
+ "rotate",
649
  "rotation symmetry",
650
+ "rotation",
651
+ "round off",
652
+ "round-off",
653
+ "ruler",
654
  "same change rule for subtraction",
655
+ "sample",
656
+ "scalar",
657
+ "scale factor",
658
  "scale model",
659
  "scale of a map",
660
  "scale of a number line",
661
+ "scale",
662
+ "scaled graph",
663
+ "scaled",
664
+ "scalene triangle",
665
+ "scalene",
666
+ "scatter plot",
667
+ "scattergram",
668
  "sector",
669
  "segment",
670
+ "semi-circle",
671
+ "semicircle",
672
  "sequence",
673
+ "set",
674
+ "sign",
675
+ "significant digit",
676
+ "significant figure",
677
  "similar figures",
678
+ "similar",
679
  "simpler form",
680
+ "simplify",
681
+ "simulation",
682
  "situtation diagram",
683
+ "skew line",
684
  "slanted",
685
  "slide rule",
686
+ "slope",
687
+ "solid figure",
688
+ "solution",
689
  "span",
690
+ "speed",
691
+ "sphere",
692
+ "square root",
693
+ "square unit",
694
+ "square",
695
+ "squared",
696
  "stacked bar graph",
697
+ "standard form",
698
  "standard unit",
699
+ "statistic",
700
  "stem and leaf plot",
701
  "step graph",
702
+ "straight angle",
703
  "straightedge",
704
+ "subset of"
705
  "substitute",
706
+ "subtract",
707
  "subtrahend",
708
+ "sum of",
709
+ "sum",
710
+ "supplementary angle",
711
+ "surface area",
712
  "surface",
713
+ "survey",
714
  "symmetric",
715
+ "symmetry",
716
+ "system of equation",
717
+ "system of",
718
+ "table",
719
+ "take from ten",
720
  "tally",
721
+ "tangent circle",
722
  "tangent",
723
+ "tangram",
724
+ "tape diagram",
725
  "temperature",
726
  "template",
727
+ "tens place",
728
+ "tenth",
729
+ "term",
730
+ "terminating decimal",
731
+ "tessellat",
732
+ "tessellate",
733
+ "tessellation",
734
  "tetrahedron",
735
+ "tetromino",
736
  "theorem",
737
+ "thermometer",
738
+ "thousand",
739
+ "thousandth",
740
  "tile",
741
  "tiling",
742
  "time graph",
 
744
  "top heavy fraction",
745
  "topological",
746
  "topology",
747
+ "total area",
748
+ "total of",
749
+ "total surface",
750
+ "total volume",
751
  "trade first subtraction",
752
+ "transformation",
753
+ "translation",
754
+ "transversal",
755
+ "trapezoid",
756
  "tree diagram",
757
+ "triangle",
758
  "triangular",
759
  "true number sentence",
760
  "truncate",
761
+ "twin prime",
762
+ "two-way table",
763
+ "unit cube",
764
+ "unit form",
765
+ "unit fraction",
766
+ "unit interval",
767
+ "unit price",
768
+ "unit rate",
769
+ "unit square",
770
+ "unit",
771
+ "unknown",
772
+ "unlike denominator",
773
+ "unlike fraction",
774
+ "value",
775
  "vanishing ",
776
+ "variability",
777
+ "variable",
778
+ "velocity",
779
  "venn diagram",
780
  "vernal equinox",
781
+ "vertex",
782
+ "vertical",
783
+ "volume of",
784
+ "volume",
785
  "weight",
786
+ "whole number",
787
+ "whole unit",
788
+ "whole",
789
  "width",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
  "withdrawal",
791
+ "word form",
792
+ "x axes",
793
+ "x axis",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
  "x intercept",
795
+ "x-axes",
796
  "x-axis",
797
+ "y axes",
798
+ "y axis",
799
  "y intercept",
800
+ "y-axes",
801
  "y-axis",
802
  "y-intercept",
803
+ "yard",
804
  "zero property of multiplication",
805
+ "zero",
 
 
 
 
 
 
 
 
 
 
 
 
 
806
  ]
807
 
808
+
809
+ p = inflect.engine()
810
+
811
+ def singular_to_plural(word):
812
+ """Convert singular words to plural using inflect."""
813
+ plural = p.plural(word)
814
+ return plural if plural else word
815
+
816
+ plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
817
+
818
+ MATH_WORDS += plural_MATH_WORDS
819
+
820
  def get_num_words(text):
821
  if not isinstance(text, str):
822
  print("%s is not a string" % text)