ikarasz commited on
Commit
199c4b9
·
1 Parent(s): f0d3fb6

group singular and plural forms in clouds

Browse files
Files changed (2) hide show
  1. handler.py +15 -17
  2. utils.py +5 -1
handler.py CHANGED
@@ -10,7 +10,7 @@ nltk.download('stopwords')
10
 
11
  from utils import clean_str, clean_str_nopunct
12
  import torch
13
- from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS
14
 
15
  import transformers
16
  from transformers import BertTokenizer, BertForSequenceClassification
@@ -182,20 +182,20 @@ class Transcript:
182
  teacher_dict.update(general_words)
183
  else:
184
  student_dict.update(general_words)
185
- dict_list = []
186
- uptake_dict_list = []
187
- teacher_dict_list = []
188
- student_dict_list = []
189
- for word in uptake_teacher_dict.keys():
190
- uptake_dict_list.append({'text': word, 'value': uptake_teacher_dict[word], 'category': 'teacher'})
191
- for word in teacher_dict.keys():
192
- teacher_dict_list.append(
193
- {'text': word, 'value': teacher_dict[word], 'category': 'general'})
194
- dict_list.append({'text': word, 'value': teacher_dict[word], 'category': 'general'})
195
- for word in student_dict.keys():
196
- student_dict_list.append(
197
- {'text': word, 'value': student_dict[word], 'category': 'general'})
198
- dict_list.append({'text': word, 'value': student_dict[word], 'category': 'general'})
199
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
200
  sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
201
  sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
@@ -265,7 +265,6 @@ class QuestionModel:
265
  return_pooler_output=False)
266
  return output
267
 
268
-
269
  class ReasoningModel:
270
  def __init__(self, device, tokenizer, input_builder, max_length=128, path=REASONING_MODEL):
271
  print("Loading models...")
@@ -299,7 +298,6 @@ class ReasoningModel:
299
  token_type_ids=instance["token_type_ids"])
300
  return output
301
 
302
-
303
  class UptakeModel:
304
  def __init__(self, device, tokenizer, input_builder, max_length=120, path=UPTAKE_MODEL):
305
  print("Loading models...")
 
10
 
11
  from utils import clean_str, clean_str_nopunct
12
  import torch
13
+ from utils import MultiHeadModel, BertInputBuilder, get_num_words, MATH_PREFIXES, MATH_WORDS, plural_to_singular
14
 
15
  import transformers
16
  from transformers import BertTokenizer, BertForSequenceClassification
 
182
  teacher_dict.update(general_words)
183
  else:
184
  student_dict.update(general_words)
185
+
186
+ def dict_to_list(d, category):
187
+ combined_dict = Counter()
188
+ for word, count in d.items():
189
+ singular_word = plural_to_singular(word)
190
+ combined_dict[singular_word] += count
191
+ return [{'text': word, 'value': count, 'category': category} for word, count in combined_dict.items()]
192
+
193
+ # Sorting and trimming dictionaries
194
+ dict_list = dict_to_list(teacher_dict, 'general') + dict_to_list(student_dict, 'general')
195
+ uptake_dict_list = dict_to_list(uptake_teacher_dict, 'teacher')
196
+ teacher_dict_list = dict_to_list(teacher_dict, 'general')
197
+ student_dict_list = dict_to_list(student_dict, 'general')
198
+
199
  sorted_dict_list = sorted(dict_list, key=lambda x: x['value'], reverse=True)
200
  sorted_uptake_dict_list = sorted(uptake_dict_list, key=lambda x: x['value'], reverse=True)
201
  sorted_teacher_dict_list = sorted(teacher_dict_list, key=lambda x: x['value'], reverse=True)
 
265
  return_pooler_output=False)
266
  return output
267
 
 
268
  class ReasoningModel:
269
  def __init__(self, device, tokenizer, input_builder, max_length=128, path=REASONING_MODEL):
270
  print("Loading models...")
 
298
  token_type_ids=instance["token_type_ids"])
299
  return output
300
 
 
301
  class UptakeModel:
302
  def __init__(self, device, tokenizer, input_builder, max_length=120, path=UPTAKE_MODEL):
303
  print("Loading models...")
utils.py CHANGED
@@ -811,7 +811,11 @@ p = inflect.engine()
811
  def singular_to_plural(word):
812
  """Convert singular words to plural using inflect."""
813
  plural = p.plural(word)
814
- return plural if plural else word
 
 
 
 
815
 
816
  plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
817
 
 
811
  def singular_to_plural(word):
812
  """Convert singular words to plural using inflect."""
813
  plural = p.plural(word)
814
+ return plural or word
815
+
816
+ def plural_to_singular(word):
817
+ """Convert plural word to singular using inflect."""
818
+ return p.singular_noun(word) or word
819
 
820
  plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
821