handle plurals of math words
Browse files- handler.py +22 -18
handler.py
CHANGED
@@ -95,7 +95,6 @@ class Utterance:
|
|
95 |
f"text='{self.text}', uid={self.uid}," \
|
96 |
f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
|
97 |
|
98 |
-
|
99 |
class Transcript:
|
100 |
def __init__(self, **kwargs):
|
101 |
self.utterances = []
|
@@ -183,13 +182,6 @@ class Transcript:
|
|
183 |
else:
|
184 |
student_dict.update(general_words)
|
185 |
|
186 |
-
def dict_to_list(d, category):
|
187 |
-
combined_dict = Counter()
|
188 |
-
for word, count in d.items():
|
189 |
-
singular_word = plural_to_singular(word)
|
190 |
-
combined_dict[singular_word] += count
|
191 |
-
return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
|
192 |
-
|
193 |
# Sorting and trimming dictionaries
|
194 |
dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
|
195 |
uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
|
@@ -224,7 +216,6 @@ class Transcript:
|
|
224 |
def __repr__(self):
|
225 |
return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
|
226 |
|
227 |
-
|
228 |
class QuestionModel:
|
229 |
def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
|
230 |
print("Loading models...")
|
@@ -376,6 +367,13 @@ class FocusingQuestionModel:
|
|
376 |
token_type_ids=instance["token_type_ids"])
|
377 |
return output
|
378 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
379 |
def load_math_terms():
|
380 |
math_regexes = []
|
381 |
math_terms_dict = {}
|
@@ -394,6 +392,7 @@ def run_math_density(transcript):
|
|
394 |
sorted_regexes = sorted(math_regexes, key=len, reverse=True)
|
395 |
teacher_math_word_cloud = {}
|
396 |
student_math_word_cloud = {}
|
|
|
397 |
for i, utt in enumerate(transcript.utterances):
|
398 |
text = utt.get_clean_text(remove_punct=True)
|
399 |
num_matches = 0
|
@@ -418,21 +417,26 @@ def run_math_density(transcript):
|
|
418 |
num_matches += len(matches)
|
419 |
utt.num_math_terms = num_matches
|
420 |
utt.math_terms = list(match_list)
|
|
|
|
|
421 |
teacher_dict_list = []
|
422 |
student_dict_list = []
|
423 |
dict_list = []
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
|
|
|
|
432 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
433 |
sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
|
434 |
sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
|
435 |
-
|
|
|
436 |
return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
|
437 |
|
438 |
class EndpointHandler():
|
|
|
95 |
f"text='{self.text}', uid={self.uid}," \
|
96 |
f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
|
97 |
|
|
|
98 |
class Transcript:
|
99 |
def __init__(self, **kwargs):
|
100 |
self.utterances = []
|
|
|
182 |
else:
|
183 |
student_dict.update(general_words)
|
184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
# Sorting and trimming dictionaries
|
186 |
dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
|
187 |
uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
|
|
|
216 |
def __repr__(self):
|
217 |
return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
|
218 |
|
|
|
219 |
class QuestionModel:
|
220 |
def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
|
221 |
print("Loading models...")
|
|
|
367 |
token_type_ids=instance["token_type_ids"])
|
368 |
return output
|
369 |
|
370 |
+
def dict_to_list(d, category):
|
371 |
+
combined_dict = Counter()
|
372 |
+
for word, count in d.items():
|
373 |
+
singular_word = plural_to_singular(word)
|
374 |
+
combined_dict[singular_word] += count
|
375 |
+
return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
|
376 |
+
|
377 |
def load_math_terms():
|
378 |
math_regexes = []
|
379 |
math_terms_dict = {}
|
|
|
392 |
sorted_regexes = sorted(math_regexes, key=len, reverse=True)
|
393 |
teacher_math_word_cloud = {}
|
394 |
student_math_word_cloud = {}
|
395 |
+
|
396 |
for i, utt in enumerate(transcript.utterances):
|
397 |
text = utt.get_clean_text(remove_punct=True)
|
398 |
num_matches = 0
|
|
|
417 |
num_matches += len(matches)
|
418 |
utt.num_math_terms = num_matches
|
419 |
utt.math_terms = list(match_list)
|
420 |
+
|
421 |
+
# Initialize lists
|
422 |
teacher_dict_list = []
|
423 |
student_dict_list = []
|
424 |
dict_list = []
|
425 |
+
|
426 |
+
# Process teacher_math_word_cloud
|
427 |
+
teacher_dict_list = dict_to_list(teacher_math_word_cloud, 'math')
|
428 |
+
dict_list.extend(teacher_dict_list)
|
429 |
+
|
430 |
+
# Process student_math_word_cloud
|
431 |
+
student_dict_list = dict_to_list(student_math_word_cloud, 'math')
|
432 |
+
dict_list.extend(student_dict_list)
|
433 |
+
|
434 |
+
# Sort the lists
|
435 |
sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
|
436 |
sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
|
437 |
sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
|
438 |
+
|
439 |
+
# Return the sorted lists
|
440 |
return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
|
441 |
|
442 |
class EndpointHandler():
|