group singular and plural forms in clouds
Browse files- handler.py +15 -17
- utils.py +5 -1
handler.py
CHANGED
@@ -10,7 +10,7 @@ nltk.download('stopwords')
|
|
10 |
|
11 |
from utils import clean_str, clean_str_nopunct
|
12 |
import torch
|
13 |
-
from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS
|
14 |
|
15 |
import transformers
|
16 |
from transformers import BertTokenizer, BertForSequenceClassification
|
@@ -182,20 +182,20 @@ class Transcript:
|
|
182 |
teacher_dict.update(general_words)
|
183 |
else:
|
184 |
student_dict.update(general_words)
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
200 |
sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
|
201 |
sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
|
@@ -265,7 +265,6 @@ class QuestionModel:
|
|
265 |
return_pooler_output=False)
|
266 |
return output
|
267 |
|
268 |
-
|
269 |
class ReasoningModel:
|
270 |
def __init__(self, device, tokenizer, input_builder, max_length=128, path=REASONING_MODEL):
|
271 |
print("Loading models...")
|
@@ -299,7 +298,6 @@ class ReasoningModel:
|
|
299 |
token_type_ids=instance["token_type_ids"])
|
300 |
return output
|
301 |
|
302 |
-
|
303 |
class UptakeModel:
|
304 |
def __init__(self, device, tokenizer, input_builder, max_length=120, path=UPTAKE_MODEL):
|
305 |
print("Loading models...")
|
|
|
10 |
|
11 |
from utils import clean_str, clean_str_nopunct
|
12 |
import torch
|
13 |
+
from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS, plural_to_singular
|
14 |
|
15 |
import transformers
|
16 |
from transformers import BertTokenizer, BertForSequenceClassification
|
|
|
182 |
teacher_dict.update(general_words)
|
183 |
else:
|
184 |
student_dict.update(general_words)
|
185 |
+
|
186 |
+
def dict_to_list(d, category):
|
187 |
+
combined_dict = Counter()
|
188 |
+
for word, count in d.items():
|
189 |
+
singular_word = plural_to_singular(word)
|
190 |
+
combined_dict[singular_word] += count
|
191 |
+
return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
|
192 |
+
|
193 |
+
# Sorting and trimming dictionaries
|
194 |
+
dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
|
195 |
+
uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
|
196 |
+
teacher_dict_list = dict_to_list(teacher_dict, 'general')
|
197 |
+
student_dict_list = dict_to_list(student_dict, 'general')
|
198 |
+
|
199 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
200 |
sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
|
201 |
sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
|
|
|
265 |
return_pooler_output=False)
|
266 |
return output
|
267 |
|
|
|
268 |
class ReasoningModel:
|
269 |
def __init__(self, device, tokenizer, input_builder, max_length=128, path=REASONING_MODEL):
|
270 |
print("Loading models...")
|
|
|
298 |
token_type_ids=instance["token_type_ids"])
|
299 |
return output
|
300 |
|
|
|
301 |
class UptakeModel:
|
302 |
def __init__(self, device, tokenizer, input_builder, max_length=120, path=UPTAKE_MODEL):
|
303 |
print("Loading models...")
|
utils.py
CHANGED
@@ -811,7 +811,11 @@ p = inflect.engine()
|
|
811 |
def singular_to_plural(word):
|
812 |
"""Convert singular words to plural using inflect."""
|
813 |
plural = p.plural(word)
|
814 |
-
return plural
|
|
|
|
|
|
|
|
|
815 |
|
816 |
plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
|
817 |
|
|
|
811 |
def singular_to_plural(word):
|
812 |
"""Convert singular words to plural using inflect."""
|
813 |
plural = p.plural(word)
|
814 |
+
return plural or word
|
815 |
+
|
816 |
+
def plural_to_singular(word):
|
817 |
+
"""Convert plural word to singular using inflect."""
|
818 |
+
return p.singular_noun(word) or word
|
819 |
|
820 |
plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
|
821 |
|