alessandro trinca tornidor commited on
Commit
bc44985
1 Parent(s): d0f407b

test: add missing test cases for get_selected_word(), fix wrong use of regex, remove some unused functions

Browse files
.coveragerc ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [run]
2
+ source = samgis
3
+ omit = ./venv/*,__version__.py,*tests*,*apps.py,*manage.py,*__init__.py,*migrations*,*asgi*,*wsgi*,*admin.py,*urls.py,./tests/*,aip_trainer/lambdas/js.py
4
+
5
+ [report]
6
+ omit = ./venv/*,*tests*,*apps.py,*manage.py,*__init__.py,*migrations*,*asgi*,*wsgi*,*admin.py,*urls.py,./tests/*,aip_trainer/lambdas/js.py
7
+
8
+ exclude_lines =
9
+ if __name__ == .__main__.:
aip_trainer/WordMatching.py CHANGED
@@ -157,20 +157,6 @@ def get_best_mapped_words(words_estimated: list, words_real: list) -> tuple[list
157
  return mapped_words, mapped_words_indices
158
 
159
 
160
- # Faster, but not optimal
161
- def get_best_mapped_words_dtw(words_estimated: list, words_real: list) -> list:
162
-
163
- from dtwalign import dtw_from_distance_matrix
164
- word_distance_matrix = get_word_distance_matrix(
165
- words_estimated, words_real)
166
- mapped_indices = dtw_from_distance_matrix(
167
- word_distance_matrix).path[:-1, 0]
168
-
169
- mapped_words, mapped_words_indices = get_resulting_string(
170
- mapped_indices, words_estimated, words_real)
171
- return mapped_words, mapped_words_indices
172
-
173
-
174
  def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
175
  is_leter_correct = [None]*len(real_word)
176
  for idx, letter in enumerate(real_word):
@@ -179,17 +165,3 @@ def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
179
  else:
180
  is_leter_correct[idx] = 0
181
  return is_leter_correct
182
-
183
-
184
- def parseLetterErrorsToHTML(word_real, is_leter_correct):
185
- word_colored = ''
186
- correct_color_start = '*'
187
- correct_color_end = '*'
188
- wrong_color_start = '-'
189
- wrong_color_end = '-'
190
- for idx, letter in enumerate(word_real):
191
- if is_leter_correct[idx] == 1:
192
- word_colored += correct_color_start + letter+correct_color_end
193
- else:
194
- word_colored += wrong_color_start + letter+wrong_color_end
195
- return word_colored
 
157
  return mapped_words, mapped_words_indices
158
 
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
161
  is_leter_correct = [None]*len(real_word)
162
  for idx, letter in enumerate(real_word):
 
165
  else:
166
  is_leter_correct[idx] = 0
167
  return is_leter_correct
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aip_trainer/utils/middlewares.py DELETED
File without changes
tests/lambdas/test_lambdaSpeechToScore.py CHANGED
@@ -9,7 +9,11 @@ from aip_trainer.lambdas import lambdaSpeechToScore
9
  from tests import EVENTS_FOLDER
10
 
11
 
12
- text_dict = {"de": "Ich bin Alex, wer bist du?", "en": "Hi there, how are you?"}
 
 
 
 
13
  expected_output = {
14
  "de": {
15
  "real_transcript": text_dict["de"],
@@ -55,12 +59,12 @@ def assert_raises_get_speech_to_score_dict(self, real_text, file_bytes_or_audiot
55
 
56
  def check_value_by_field(value, match):
57
  import re
58
-
59
  assert len(value.strip()) > 0
60
  for word in value.lstrip().rstrip().split(" "):
61
  word_check = re.findall(match, word.strip())
62
  assert len(word_check) == 1
63
  assert word_check[0] == word.strip()
 
64
 
65
 
66
  def check_output_by_field(output, key, match, expected_output):
@@ -80,9 +84,10 @@ def check_output(self, output, expected_output, check_audio_files=False):
80
  output = check_output_by_field(
81
  output, "is_letter_correct_all_words", "[01]+", expected_output
82
  )
83
- output = check_output_by_field(output, "end_time", "\d+\.\d+", expected_output)
 
84
  output = check_output_by_field(
85
- output, "start_time", "\d+\.\d+", expected_output
86
  )
87
  pronunciation_accuracy = output["pronunciation_accuracy"]
88
  assert isinstance(pronunciation_accuracy, float)
@@ -291,6 +296,87 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
291
  def test_get_speech_to_score_dict__empty_language(self):
292
  assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
  if __name__ == "__main__":
296
  unittest.main()
 
9
  from tests import EVENTS_FOLDER
10
 
11
 
12
+ text_dict = {
13
+ "de": "Ich bin Alex, wer bist du?",
14
+ "de1": "Hallo, wie geht es dir?",
15
+ "en": "Hi there, how are you?",
16
+ }
17
  expected_output = {
18
  "de": {
19
  "real_transcript": text_dict["de"],
 
59
 
60
  def check_value_by_field(value, match):
61
  import re
 
62
  assert len(value.strip()) > 0
63
  for word in value.lstrip().rstrip().split(" "):
64
  word_check = re.findall(match, word.strip())
65
  assert len(word_check) == 1
66
  assert word_check[0] == word.strip()
67
+ print("ok")
68
 
69
 
70
  def check_output_by_field(output, key, match, expected_output):
 
84
  output = check_output_by_field(
85
  output, "is_letter_correct_all_words", "[01]+", expected_output
86
  )
87
+ match_numbers = "\\d+\\.\\d+"
88
+ output = check_output_by_field(output, "end_time", match_numbers, expected_output)
89
  output = check_output_by_field(
90
+ output, "start_time", match_numbers, expected_output
91
  )
92
  pronunciation_accuracy = output["pronunciation_accuracy"]
93
  assert isinstance(pronunciation_accuracy, float)
 
296
  def test_get_speech_to_score_dict__empty_language(self):
297
  assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
298
 
299
+ def test_get_selected_word_valid_index_de_ok(self):
300
+ language = "de"
301
+ path = EVENTS_FOLDER / f"test_{language}_easy.wav"
302
+ input_text = text_dict["de1"]
303
+ _, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
304
+ input_text,
305
+ str(path), language,
306
+ False
307
+ )
308
+ idx_recorded_word = 2
309
+ output_loaded = json.loads(output_json)
310
+ audio_file, word, duration = lambdaSpeechToScore.get_selected_word(idx_recorded_word, output_json)
311
+ audio_file_path = Path(audio_file)
312
+ assert audio_file_path.exists() and audio_file_path.is_file()
313
+ assert duration > 0
314
+ words_list = text_dict["de1"].split()
315
+ assert word == words_list[idx_recorded_word]
316
+ for file_to_del in output_loaded["audio_files"]:
317
+ Path(file_to_del).unlink()
318
+
319
+ def test_get_selected_word_valid_index_en_ok(self):
320
+ language = "en"
321
+ path = EVENTS_FOLDER / f"test_{language}_easy.wav"
322
+ _, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
323
+ text_dict[language],
324
+ str(path), language,
325
+ False
326
+ )
327
+ idx_recorded_word = 2
328
+ output_loaded = json.loads(output_json)
329
+ audio_file, word, duration = lambdaSpeechToScore.get_selected_word(idx_recorded_word, output_json)
330
+ audio_file_path = Path(audio_file)
331
+ assert audio_file_path.exists() and audio_file_path.is_file()
332
+ assert duration > 0
333
+ words_list = text_dict[language].split()
334
+ assert word == words_list[idx_recorded_word]
335
+ for file_to_del in output_loaded["audio_files"]:
336
+ Path(file_to_del).unlink()
337
+
338
+ def test_get_selected_word_invalid_index_de(self):
339
+ language = "de"
340
+ path = EVENTS_FOLDER / f"test_{language}_easy.wav"
341
+ _, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
342
+ text_dict["de1"],
343
+ str(path), language,
344
+ False
345
+ )
346
+ with self.assertRaises(IndexError):
347
+ try:
348
+ lambdaSpeechToScore.get_selected_word(120, output_json)
349
+ except IndexError as ie:
350
+ msg = str(ie)
351
+ assert msg == 'list index out of range'
352
+ raise ie
353
+
354
+ def test_get_selected_word_invalid_index_en(self):
355
+ language = "en"
356
+ path = EVENTS_FOLDER / f"test_{language}_easy.wav"
357
+ _, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
358
+ text_dict[language],
359
+ str(path), language,
360
+ False
361
+ )
362
+ with self.assertRaises(IndexError):
363
+ try:
364
+ lambdaSpeechToScore.get_selected_word(120, output_json)
365
+ except IndexError as ie:
366
+ msg = str(ie)
367
+ assert msg == 'list index out of range'
368
+ raise ie
369
+
370
+ def test_get_selected_word_empty_transcripts(self):
371
+ raw_json_output = json.dumps({
372
+ "audio_files": [],
373
+ "real_transcripts": "",
374
+ "audio_durations": []
375
+ })
376
+ idx_recorded_word = 0
377
+ with self.assertRaises(IndexError):
378
+ lambdaSpeechToScore.get_selected_word(idx_recorded_word, raw_json_output)
379
+
380
 
381
  if __name__ == "__main__":
382
  unittest.main()