Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on 22 days ago

Commit

bc44985

•

1 Parent(s): d0f407b

test: add missing test cases for get_selected_word(), fix wrong use of regex, remove some unused functions

Browse files

Files changed (4) hide show

.coveragerc +9 -0
aip_trainer/WordMatching.py +0 -28
aip_trainer/utils/middlewares.py +0 -0
tests/lambdas/test_lambdaSpeechToScore.py +90 -4

.coveragerc ADDED Viewed

	@@ -0,0 +1,9 @@

+[run]
+source = samgis
+omit = ./venv/*,__version__.py,*tests*,*apps.py,*manage.py,*__init__.py,*migrations*,*asgi*,*wsgi*,*admin.py,*urls.py,./tests/*,aip_trainer/lambdas/js.py
+[report]
+omit = ./venv/*,*tests*,*apps.py,*manage.py,*__init__.py,*migrations*,*asgi*,*wsgi*,*admin.py,*urls.py,./tests/*,aip_trainer/lambdas/js.py
+exclude_lines =
+     if __name__ == .__main__.:

aip_trainer/WordMatching.py CHANGED Viewed

@@ -157,20 +157,6 @@ def get_best_mapped_words(words_estimated: list, words_real: list) -> tuple[list
     return mapped_words, mapped_words_indices
-# Faster, but not optimal
-def get_best_mapped_words_dtw(words_estimated: list, words_real: list) -> list:
-    from dtwalign import dtw_from_distance_matrix
-    word_distance_matrix = get_word_distance_matrix(
-        words_estimated, words_real)
-    mapped_indices = dtw_from_distance_matrix(
-        word_distance_matrix).path[:-1, 0]
-    mapped_words, mapped_words_indices = get_resulting_string(
-        mapped_indices, words_estimated, words_real)
-    return mapped_words, mapped_words_indices
 def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
     is_leter_correct = [None]*len(real_word)
     for idx, letter in enumerate(real_word):
@@ -179,17 +165,3 @@ def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
         else:
             is_leter_correct[idx] = 0
     return is_leter_correct
-def parseLetterErrorsToHTML(word_real, is_leter_correct):
-    word_colored = ''
-    correct_color_start = '*'
-    correct_color_end = '*'
-    wrong_color_start = '-'
-    wrong_color_end = '-'
-    for idx, letter in enumerate(word_real):
-        if is_leter_correct[idx] == 1:
-            word_colored += correct_color_start + letter+correct_color_end
-        else:
-            word_colored += wrong_color_start + letter+wrong_color_end
-    return word_colored

     return mapped_words, mapped_words_indices
 def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
     is_leter_correct = [None]*len(real_word)
     for idx, letter in enumerate(real_word):
         else:
             is_leter_correct[idx] = 0
     return is_leter_correct

aip_trainer/utils/middlewares.py DELETED Viewed

File without changes

tests/lambdas/test_lambdaSpeechToScore.py CHANGED Viewed

@@ -9,7 +9,11 @@ from aip_trainer.lambdas import lambdaSpeechToScore
 from tests import EVENTS_FOLDER
-text_dict = {"de": "Ich bin Alex, wer bist du?", "en": "Hi there, how are you?"}
 expected_output = {
     "de": {
         "real_transcript": text_dict["de"],
@@ -55,12 +59,12 @@ def assert_raises_get_speech_to_score_dict(self, real_text, file_bytes_or_audiot
 def check_value_by_field(value, match):
     import re
     assert len(value.strip()) > 0
     for word in value.lstrip().rstrip().split(" "):
         word_check = re.findall(match, word.strip())
         assert len(word_check) == 1
         assert word_check[0] == word.strip()
 def check_output_by_field(output, key, match, expected_output):
@@ -80,9 +84,10 @@ def check_output(self, output, expected_output, check_audio_files=False):
         output = check_output_by_field(
             output, "is_letter_correct_all_words", "[01]+", expected_output
         )
-        output = check_output_by_field(output, "end_time", "\d+\.\d+", expected_output)
         output = check_output_by_field(
-            output, "start_time", "\d+\.\d+", expected_output
         )
         pronunciation_accuracy = output["pronunciation_accuracy"]
         assert isinstance(pronunciation_accuracy, float)
@@ -291,6 +296,87 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
     def test_get_speech_to_score_dict__empty_language(self):
         assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
 if __name__ == "__main__":
     unittest.main()

 from tests import EVENTS_FOLDER
+text_dict = {
+    "de": "Ich bin Alex, wer bist du?",
+    "de1": "Hallo, wie geht es dir?",
+    "en": "Hi there, how are you?",
+}
 expected_output = {
     "de": {
         "real_transcript": text_dict["de"],
 def check_value_by_field(value, match):
     import re
     assert len(value.strip()) > 0
     for word in value.lstrip().rstrip().split(" "):
         word_check = re.findall(match, word.strip())
         assert len(word_check) == 1
         assert word_check[0] == word.strip()
+    print("ok")
 def check_output_by_field(output, key, match, expected_output):
         output = check_output_by_field(
             output, "is_letter_correct_all_words", "[01]+", expected_output
         )
+        match_numbers = "\\d+\\.\\d+"
+        output = check_output_by_field(output, "end_time", match_numbers, expected_output)
         output = check_output_by_field(
+            output, "start_time", match_numbers, expected_output
         )
         pronunciation_accuracy = output["pronunciation_accuracy"]
         assert isinstance(pronunciation_accuracy, float)
     def test_get_speech_to_score_dict__empty_language(self):
         assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
+    def test_get_selected_word_valid_index_de_ok(self):
+        language = "de"
+        path = EVENTS_FOLDER / f"test_{language}_easy.wav"
+        input_text = text_dict["de1"]
+        _, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
+            input_text,
+            str(path), language,
+            False
+        )
+        idx_recorded_word = 2
+        output_loaded = json.loads(output_json)
+        audio_file, word, duration = lambdaSpeechToScore.get_selected_word(idx_recorded_word, output_json)
+        audio_file_path = Path(audio_file)
+        assert audio_file_path.exists() and audio_file_path.is_file()
+        assert duration > 0
+        words_list = text_dict["de1"].split()
+        assert word == words_list[idx_recorded_word]
+        for file_to_del in output_loaded["audio_files"]:
+            Path(file_to_del).unlink()
+    def test_get_selected_word_valid_index_en_ok(self):
+        language = "en"
+        path = EVENTS_FOLDER / f"test_{language}_easy.wav"
+        _, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
+            text_dict[language],
+            str(path), language,
+            False
+        )
+        idx_recorded_word = 2
+        output_loaded = json.loads(output_json)
+        audio_file, word, duration = lambdaSpeechToScore.get_selected_word(idx_recorded_word, output_json)
+        audio_file_path = Path(audio_file)
+        assert audio_file_path.exists() and audio_file_path.is_file()
+        assert duration > 0
+        words_list = text_dict[language].split()
+        assert word == words_list[idx_recorded_word]
+        for file_to_del in output_loaded["audio_files"]:
+            Path(file_to_del).unlink()
+    def test_get_selected_word_invalid_index_de(self):
+        language = "de"
+        path = EVENTS_FOLDER / f"test_{language}_easy.wav"
+        _, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
+            text_dict["de1"],
+            str(path), language,
+            False
+        )
+        with self.assertRaises(IndexError):
+            try:
+                lambdaSpeechToScore.get_selected_word(120, output_json)
+            except IndexError as ie:
+                msg = str(ie)
+                assert msg == 'list index out of range'
+                raise ie
+    def test_get_selected_word_invalid_index_en(self):
+        language = "en"
+        path = EVENTS_FOLDER / f"test_{language}_easy.wav"
+        _, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
+            text_dict[language],
+            str(path), language,
+            False
+        )
+        with self.assertRaises(IndexError):
+            try:
+                lambdaSpeechToScore.get_selected_word(120, output_json)
+            except IndexError as ie:
+                msg = str(ie)
+                assert msg == 'list index out of range'
+                raise ie
+    def test_get_selected_word_empty_transcripts(self):
+        raw_json_output = json.dumps({
+            "audio_files": [],
+            "real_transcripts": "",
+            "audio_durations": []
+        })
+        idx_recorded_word = 0
+        with self.assertRaises(IndexError):
+            lambdaSpeechToScore.get_selected_word(idx_recorded_word, raw_json_output)
 if __name__ == "__main__":
     unittest.main()