ikarasz commited on
Commit
9b16b70
·
1 Parent(s): 199c4b9

handle plurals of math words

Browse files
Files changed (1) hide show
  1. handler.py +22 -18
handler.py CHANGED
@@ -95,7 +95,6 @@ class Utterance:
95
  f"text='{self.text}', uid={self.uid}," \
96
  f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
97
 
98
-
99
  class Transcript:
100
  def __init__(self, **kwargs):
101
  self.utterances = []
@@ -183,13 +182,6 @@ class Transcript:
183
  else:
184
  student_dict.update(general_words)
185
 
186
- def dict_to_list(d, category):
187
- combined_dict = Counter()
188
- for word, count in d.items():
189
- singular_word = plural_to_singular(word)
190
- combined_dict[singular_word] += count
191
- return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
192
-
193
  # Sorting and trimming dictionaries
194
  dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
195
  uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
@@ -224,7 +216,6 @@ class Transcript:
224
  def __repr__(self):
225
  return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
226
 
227
-
228
  class QuestionModel:
229
  def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
230
  print("Loading models...")
@@ -376,6 +367,13 @@ class FocusingQuestionModel:
376
  token_type_ids=instance["token_type_ids"])
377
  return output
378
 
 
 
 
 
 
 
 
379
  def load_math_terms():
380
  math_regexes = []
381
  math_terms_dict = {}
@@ -394,6 +392,7 @@ def run_math_density(transcript):
394
  sorted_regexes = sorted(math_regexes, key=len, reverse=True)
395
  teacher_math_word_cloud = {}
396
  student_math_word_cloud = {}
 
397
  for i, utt in enumerate(transcript.utterances):
398
  text = utt.get_clean_text(remove_punct=True)
399
  num_matches = 0
@@ -418,21 +417,26 @@ def run_math_density(transcript):
418
  num_matches += len(matches)
419
  utt.num_math_terms = num_matches
420
  utt.math_terms = list(match_list)
 
 
421
  teacher_dict_list = []
422
  student_dict_list = []
423
  dict_list = []
424
- for word in teacher_math_word_cloud.keys():
425
- teacher_dict_list.append(
426
- {'text': word, 'value': teacher_math_word_cloud[word], 'category': "math"})
427
- dict_list.append({'text': word, 'value': teacher_math_word_cloud[word], 'category': "math"})
428
- for word in student_math_word_cloud.keys():
429
- student_dict_list.append(
430
- {'text': word, 'value': student_math_word_cloud[word], 'category': "math"})
431
- dict_list.append({'text': word, 'value': student_math_word_cloud[word], 'category': "math"})
 
 
432
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
433
  sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
434
  sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
435
- # return sorted_dict_list[:50]
 
436
  return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
437
 
438
  class EndpointHandler():
 
95
  f"text='{self.text}', uid={self.uid}," \
96
  f"starttime={self.starttime}, endtime={self.endtime}, props={self.props})"
97
 
 
98
  class Transcript:
99
  def __init__(self, **kwargs):
100
  self.utterances = []
 
182
  else:
183
  student_dict.update(general_words)
184
 
 
 
 
 
 
 
 
185
  # Sorting and trimming dictionaries
186
  dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
187
  uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
 
216
  def __repr__(self):
217
  return f"Transcript(utterances={self.utterances}, custom_params={self.params})"
218
 
 
219
  class QuestionModel:
220
  def __init__(self, device, tokenizer, input_builder, max_length=300, path=QUESTION_MODEL):
221
  print("Loading models...")
 
367
  token_type_ids=instance["token_type_ids"])
368
  return output
369
 
370
+ def dict_to_list(d, category):
371
+ combined_dict = Counter()
372
+ for word, count in d.items():
373
+ singular_word = plural_to_singular(word)
374
+ combined_dict[singular_word] += count
375
+ return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
376
+
377
  def load_math_terms():
378
  math_regexes = []
379
  math_terms_dict = {}
 
392
  sorted_regexes = sorted(math_regexes, key=len, reverse=True)
393
  teacher_math_word_cloud = {}
394
  student_math_word_cloud = {}
395
+
396
  for i, utt in enumerate(transcript.utterances):
397
  text = utt.get_clean_text(remove_punct=True)
398
  num_matches = 0
 
417
  num_matches += len(matches)
418
  utt.num_math_terms = num_matches
419
  utt.math_terms = list(match_list)
420
+
421
+ # Initialize lists
422
  teacher_dict_list = []
423
  student_dict_list = []
424
  dict_list = []
425
+
426
+ # Process teacher_math_word_cloud
427
+ teacher_dict_list = dict_to_list(teacher_math_word_cloud, 'math')
428
+ dict_list.extend(teacher_dict_list)
429
+
430
+ # Process student_math_word_cloud
431
+ student_dict_list = dict_to_list(student_math_word_cloud, 'math')
432
+ dict_list.extend(student_dict_list)
433
+
434
+ # Sort the lists
435
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
436
  sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
437
  sorted_student_dict_list = sorted(student_dict_list, key=lambda x: x['value'], reverse=True)
438
+
439
+ # Return the sorted lists
440
  return sorted_dict_list[:50], sorted_teacher_dict_list[:50], sorted_student_dict_list[:50]
441
 
442
  class EndpointHandler():