Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
•
bc44985
1
Parent(s):
d0f407b
test: add missing test cases for get_selected_word(), fix wrong use of regex, remove some unused functions
Browse files- .coveragerc +9 -0
- aip_trainer/WordMatching.py +0 -28
- aip_trainer/utils/middlewares.py +0 -0
- tests/lambdas/test_lambdaSpeechToScore.py +90 -4
.coveragerc
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[run]
|
2 |
+
source = samgis
|
3 |
+
omit = ./venv/*,__version__.py,*tests*,*apps.py,*manage.py,*__init__.py,*migrations*,*asgi*,*wsgi*,*admin.py,*urls.py,./tests/*,aip_trainer/lambdas/js.py
|
4 |
+
|
5 |
+
[report]
|
6 |
+
omit = ./venv/*,*tests*,*apps.py,*manage.py,*__init__.py,*migrations*,*asgi*,*wsgi*,*admin.py,*urls.py,./tests/*,aip_trainer/lambdas/js.py
|
7 |
+
|
8 |
+
exclude_lines =
|
9 |
+
if __name__ == .__main__.:
|
aip_trainer/WordMatching.py
CHANGED
@@ -157,20 +157,6 @@ def get_best_mapped_words(words_estimated: list, words_real: list) -> tuple[list
|
|
157 |
return mapped_words, mapped_words_indices
|
158 |
|
159 |
|
160 |
-
# Faster, but not optimal
|
161 |
-
def get_best_mapped_words_dtw(words_estimated: list, words_real: list) -> list:
|
162 |
-
|
163 |
-
from dtwalign import dtw_from_distance_matrix
|
164 |
-
word_distance_matrix = get_word_distance_matrix(
|
165 |
-
words_estimated, words_real)
|
166 |
-
mapped_indices = dtw_from_distance_matrix(
|
167 |
-
word_distance_matrix).path[:-1, 0]
|
168 |
-
|
169 |
-
mapped_words, mapped_words_indices = get_resulting_string(
|
170 |
-
mapped_indices, words_estimated, words_real)
|
171 |
-
return mapped_words, mapped_words_indices
|
172 |
-
|
173 |
-
|
174 |
def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
|
175 |
is_leter_correct = [None]*len(real_word)
|
176 |
for idx, letter in enumerate(real_word):
|
@@ -179,17 +165,3 @@ def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
|
|
179 |
else:
|
180 |
is_leter_correct[idx] = 0
|
181 |
return is_leter_correct
|
182 |
-
|
183 |
-
|
184 |
-
def parseLetterErrorsToHTML(word_real, is_leter_correct):
|
185 |
-
word_colored = ''
|
186 |
-
correct_color_start = '*'
|
187 |
-
correct_color_end = '*'
|
188 |
-
wrong_color_start = '-'
|
189 |
-
wrong_color_end = '-'
|
190 |
-
for idx, letter in enumerate(word_real):
|
191 |
-
if is_leter_correct[idx] == 1:
|
192 |
-
word_colored += correct_color_start + letter+correct_color_end
|
193 |
-
else:
|
194 |
-
word_colored += wrong_color_start + letter+wrong_color_end
|
195 |
-
return word_colored
|
|
|
157 |
return mapped_words, mapped_words_indices
|
158 |
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
def getWhichLettersWereTranscribedCorrectly(real_word, transcribed_word):
|
161 |
is_leter_correct = [None]*len(real_word)
|
162 |
for idx, letter in enumerate(real_word):
|
|
|
165 |
else:
|
166 |
is_leter_correct[idx] = 0
|
167 |
return is_leter_correct
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
aip_trainer/utils/middlewares.py
DELETED
File without changes
|
tests/lambdas/test_lambdaSpeechToScore.py
CHANGED
@@ -9,7 +9,11 @@ from aip_trainer.lambdas import lambdaSpeechToScore
|
|
9 |
from tests import EVENTS_FOLDER
|
10 |
|
11 |
|
12 |
-
text_dict = {
|
|
|
|
|
|
|
|
|
13 |
expected_output = {
|
14 |
"de": {
|
15 |
"real_transcript": text_dict["de"],
|
@@ -55,12 +59,12 @@ def assert_raises_get_speech_to_score_dict(self, real_text, file_bytes_or_audiot
|
|
55 |
|
56 |
def check_value_by_field(value, match):
|
57 |
import re
|
58 |
-
|
59 |
assert len(value.strip()) > 0
|
60 |
for word in value.lstrip().rstrip().split(" "):
|
61 |
word_check = re.findall(match, word.strip())
|
62 |
assert len(word_check) == 1
|
63 |
assert word_check[0] == word.strip()
|
|
|
64 |
|
65 |
|
66 |
def check_output_by_field(output, key, match, expected_output):
|
@@ -80,9 +84,10 @@ def check_output(self, output, expected_output, check_audio_files=False):
|
|
80 |
output = check_output_by_field(
|
81 |
output, "is_letter_correct_all_words", "[01]+", expected_output
|
82 |
)
|
83 |
-
|
|
|
84 |
output = check_output_by_field(
|
85 |
-
output, "start_time",
|
86 |
)
|
87 |
pronunciation_accuracy = output["pronunciation_accuracy"]
|
88 |
assert isinstance(pronunciation_accuracy, float)
|
@@ -291,6 +296,87 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
|
|
291 |
def test_get_speech_to_score_dict__empty_language(self):
|
292 |
assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
|
293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
294 |
|
295 |
if __name__ == "__main__":
|
296 |
unittest.main()
|
|
|
9 |
from tests import EVENTS_FOLDER
|
10 |
|
11 |
|
12 |
+
text_dict = {
|
13 |
+
"de": "Ich bin Alex, wer bist du?",
|
14 |
+
"de1": "Hallo, wie geht es dir?",
|
15 |
+
"en": "Hi there, how are you?",
|
16 |
+
}
|
17 |
expected_output = {
|
18 |
"de": {
|
19 |
"real_transcript": text_dict["de"],
|
|
|
59 |
|
60 |
def check_value_by_field(value, match):
|
61 |
import re
|
|
|
62 |
assert len(value.strip()) > 0
|
63 |
for word in value.lstrip().rstrip().split(" "):
|
64 |
word_check = re.findall(match, word.strip())
|
65 |
assert len(word_check) == 1
|
66 |
assert word_check[0] == word.strip()
|
67 |
+
print("ok")
|
68 |
|
69 |
|
70 |
def check_output_by_field(output, key, match, expected_output):
|
|
|
84 |
output = check_output_by_field(
|
85 |
output, "is_letter_correct_all_words", "[01]+", expected_output
|
86 |
)
|
87 |
+
match_numbers = "\\d+\\.\\d+"
|
88 |
+
output = check_output_by_field(output, "end_time", match_numbers, expected_output)
|
89 |
output = check_output_by_field(
|
90 |
+
output, "start_time", match_numbers, expected_output
|
91 |
)
|
92 |
pronunciation_accuracy = output["pronunciation_accuracy"]
|
93 |
assert isinstance(pronunciation_accuracy, float)
|
|
|
296 |
def test_get_speech_to_score_dict__empty_language(self):
|
297 |
assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
|
298 |
|
299 |
+
def test_get_selected_word_valid_index_de_ok(self):
|
300 |
+
language = "de"
|
301 |
+
path = EVENTS_FOLDER / f"test_{language}_easy.wav"
|
302 |
+
input_text = text_dict["de1"]
|
303 |
+
_, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
|
304 |
+
input_text,
|
305 |
+
str(path), language,
|
306 |
+
False
|
307 |
+
)
|
308 |
+
idx_recorded_word = 2
|
309 |
+
output_loaded = json.loads(output_json)
|
310 |
+
audio_file, word, duration = lambdaSpeechToScore.get_selected_word(idx_recorded_word, output_json)
|
311 |
+
audio_file_path = Path(audio_file)
|
312 |
+
assert audio_file_path.exists() and audio_file_path.is_file()
|
313 |
+
assert duration > 0
|
314 |
+
words_list = text_dict["de1"].split()
|
315 |
+
assert word == words_list[idx_recorded_word]
|
316 |
+
for file_to_del in output_loaded["audio_files"]:
|
317 |
+
Path(file_to_del).unlink()
|
318 |
+
|
319 |
+
def test_get_selected_word_valid_index_en_ok(self):
|
320 |
+
language = "en"
|
321 |
+
path = EVENTS_FOLDER / f"test_{language}_easy.wav"
|
322 |
+
_, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
|
323 |
+
text_dict[language],
|
324 |
+
str(path), language,
|
325 |
+
False
|
326 |
+
)
|
327 |
+
idx_recorded_word = 2
|
328 |
+
output_loaded = json.loads(output_json)
|
329 |
+
audio_file, word, duration = lambdaSpeechToScore.get_selected_word(idx_recorded_word, output_json)
|
330 |
+
audio_file_path = Path(audio_file)
|
331 |
+
assert audio_file_path.exists() and audio_file_path.is_file()
|
332 |
+
assert duration > 0
|
333 |
+
words_list = text_dict[language].split()
|
334 |
+
assert word == words_list[idx_recorded_word]
|
335 |
+
for file_to_del in output_loaded["audio_files"]:
|
336 |
+
Path(file_to_del).unlink()
|
337 |
+
|
338 |
+
def test_get_selected_word_invalid_index_de(self):
|
339 |
+
language = "de"
|
340 |
+
path = EVENTS_FOLDER / f"test_{language}_easy.wav"
|
341 |
+
_, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
|
342 |
+
text_dict["de1"],
|
343 |
+
str(path), language,
|
344 |
+
False
|
345 |
+
)
|
346 |
+
with self.assertRaises(IndexError):
|
347 |
+
try:
|
348 |
+
lambdaSpeechToScore.get_selected_word(120, output_json)
|
349 |
+
except IndexError as ie:
|
350 |
+
msg = str(ie)
|
351 |
+
assert msg == 'list index out of range'
|
352 |
+
raise ie
|
353 |
+
|
354 |
+
def test_get_selected_word_invalid_index_en(self):
|
355 |
+
language = "en"
|
356 |
+
path = EVENTS_FOLDER / f"test_{language}_easy.wav"
|
357 |
+
_, _, _, _, _, _, _, output_json = lambdaSpeechToScore.get_speech_to_score_tuple(
|
358 |
+
text_dict[language],
|
359 |
+
str(path), language,
|
360 |
+
False
|
361 |
+
)
|
362 |
+
with self.assertRaises(IndexError):
|
363 |
+
try:
|
364 |
+
lambdaSpeechToScore.get_selected_word(120, output_json)
|
365 |
+
except IndexError as ie:
|
366 |
+
msg = str(ie)
|
367 |
+
assert msg == 'list index out of range'
|
368 |
+
raise ie
|
369 |
+
|
370 |
+
def test_get_selected_word_empty_transcripts(self):
|
371 |
+
raw_json_output = json.dumps({
|
372 |
+
"audio_files": [],
|
373 |
+
"real_transcripts": "",
|
374 |
+
"audio_durations": []
|
375 |
+
})
|
376 |
+
idx_recorded_word = 0
|
377 |
+
with self.assertRaises(IndexError):
|
378 |
+
lambdaSpeechToScore.get_selected_word(idx_recorded_word, raw_json_output)
|
379 |
+
|
380 |
|
381 |
if __name__ == "__main__":
|
382 |
unittest.main()
|