Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
025384a
1
Parent(s):
43a7191
test: add e2e test TestGetAccuracyFromRecordedAudio
Browse files
tests/events/GetAccuracyFromRecordedAudio.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tests/test_GetAccuracyFromRecordedAudio.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import unittest
|
3 |
+
|
4 |
+
from aip_trainer.lambdas import lambdaSpeechToScore
|
5 |
+
from tests import EVENTS_FOLDER
|
6 |
+
|
7 |
+
|
8 |
+
def check_output_by_field(output, key, match, expected_output):
|
9 |
+
import re
|
10 |
+
|
11 |
+
assert len(output[key].strip()) > 0
|
12 |
+
for word in output[key].lstrip().rstrip().split(" "):
|
13 |
+
word_check = re.findall(match, word.strip())
|
14 |
+
assert len(word_check) == 1
|
15 |
+
assert word_check[0] == word.strip()
|
16 |
+
output[key] = expected_output[key]
|
17 |
+
return output
|
18 |
+
|
19 |
+
|
20 |
+
class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
|
21 |
+
def test_GetAccuracyFromRecordedAudio(self):
|
22 |
+
self.maxDiff = None
|
23 |
+
|
24 |
+
with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
|
25 |
+
inputs_outputs = json.load(src)
|
26 |
+
inputs = inputs_outputs["inputs"]
|
27 |
+
outputs = inputs_outputs["outputs"]
|
28 |
+
for event_name, event_content in inputs.items():
|
29 |
+
expected_output = outputs[event_name]
|
30 |
+
output = lambdaSpeechToScore.lambda_handler(event_content, [])
|
31 |
+
output = json.loads(output)
|
32 |
+
assert len(output["matched_transcripts"].strip()) > 0
|
33 |
+
assert len(output["matched_transcripts_ipa"].strip()) > 0
|
34 |
+
output = check_output_by_field(output, "is_letter_correct_all_words", '[01]+', expected_output)
|
35 |
+
output = check_output_by_field(output, "end_time", '\d+\.\d+', expected_output)
|
36 |
+
output = check_output_by_field(output, "start_time", '\d+\.\d+', expected_output)
|
37 |
+
output = check_output_by_field(output, "pronunciation_accuracy", '\d+', expected_output)
|
38 |
+
output["matched_transcripts"] = expected_output["matched_transcripts"]
|
39 |
+
output["matched_transcripts_ipa"] = expected_output["matched_transcripts_ipa"]
|
40 |
+
output["pronunciation_accuracy"] = expected_output["pronunciation_accuracy"]
|
41 |
+
self.assertEqual(expected_output, output)
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == '__main__':
|
45 |
+
unittest.main()
|
tests/test_dataset.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import unittest
|
3 |
+
|
4 |
+
from aip_trainer.lambdas import lambdaGetSample
|
5 |
+
from tests import test_logger
|
6 |
+
|
7 |
+
|
8 |
+
def helper_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
|
9 |
+
for _ in range(n):
|
10 |
+
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
11 |
+
response = lambdaGetSample.lambda_handler(event, [])
|
12 |
+
response_dict = json.loads(response)
|
13 |
+
number_of_words = len(response_dict['real_transcript'][0].split())
|
14 |
+
try:
|
15 |
+
assert threshold_min < number_of_words <= threshold_max
|
16 |
+
except AssertionError:
|
17 |
+
test_logger.error(
|
18 |
+
f"Category: {category} had a sentence with length {number_of_words}.")
|
19 |
+
raise AssertionError
|
20 |
+
|
21 |
+
|
22 |
+
class TestDataset(unittest.TestCase):
|
23 |
+
def test_random_sentences(self):
|
24 |
+
helper_category(0, 0, 40)
|
25 |
+
|
26 |
+
def test_easy_sentences(self):
|
27 |
+
helper_category(1, 0, 8)
|
28 |
+
|
29 |
+
def test_normal_sentences(self):
|
30 |
+
helper_category(2, 8, 20)
|
31 |
+
|
32 |
+
def test_hard_sentences(self):
|
33 |
+
helper_category(3, 20, 10000)
|
34 |
+
|
35 |
+
|
36 |
+
if __name__ == '__main__':
|
37 |
+
unittest.main()
|
tests/{unitTests.py → test_phonem_converter_score.py}
RENAMED
@@ -1,52 +1,14 @@
|
|
1 |
-
import json
|
2 |
-
import os
|
3 |
import unittest
|
4 |
|
5 |
import epitran
|
6 |
-
import structlog
|
7 |
|
|
|
8 |
from aip_trainer.models import RuleBasedModels
|
9 |
-
from aip_trainer import pronunciationTrainer, LOG_JSON_FORMAT
|
10 |
-
from aip_trainer.lambdas import lambdaGetSample
|
11 |
-
from aip_trainer.utils import session_logger
|
12 |
-
|
13 |
-
|
14 |
-
log_level = os.getenv("LOG_LEVEL", "INFO")
|
15 |
-
session_logger.setup_logging(json_logs=LOG_JSON_FORMAT, log_level=log_level)
|
16 |
-
test_logger = structlog.stdlib.get_logger(__name__)
|
17 |
-
|
18 |
-
|
19 |
-
def test_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
|
20 |
-
for _ in range(n):
|
21 |
-
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
22 |
-
response = lambdaGetSample.lambda_handler(event, [])
|
23 |
-
response_dict = json.loads(response)
|
24 |
-
number_of_words = len(response_dict['real_transcript'][0].split())
|
25 |
-
try:
|
26 |
-
assert threshold_min < number_of_words <= threshold_max
|
27 |
-
except AssertionError:
|
28 |
-
test_logger.error(
|
29 |
-
f"Category: {category} had a sentence with length {number_of_words}.")
|
30 |
-
raise AssertionError
|
31 |
-
|
32 |
-
|
33 |
-
class TestDataset(unittest.TestCase):
|
34 |
-
def test_random_sentences(self):
|
35 |
-
test_category(0, 0, 40)
|
36 |
-
|
37 |
-
def test_easy_sentences(self):
|
38 |
-
test_category(1, 0, 8)
|
39 |
-
|
40 |
-
def test_normal_sentences(self):
|
41 |
-
test_category(2, 8, 20)
|
42 |
-
|
43 |
-
def test_hard_sentences(self):
|
44 |
-
test_category(3, 20, 10000)
|
45 |
|
46 |
|
47 |
class TestPhonemConverter(unittest.TestCase):
|
48 |
|
49 |
-
def
|
50 |
phonem_converter = RuleBasedModels.EngPhonemConverter()
|
51 |
output = phonem_converter.convertToPhonem('Hello, this is a test')
|
52 |
self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
|
|
|
|
|
|
|
1 |
import unittest
|
2 |
|
3 |
import epitran
|
|
|
4 |
|
5 |
+
from aip_trainer import pronunciationTrainer
|
6 |
from aip_trainer.models import RuleBasedModels
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
|
9 |
class TestPhonemConverter(unittest.TestCase):
|
10 |
|
11 |
+
def test_english_ok(self):
|
12 |
phonem_converter = RuleBasedModels.EngPhonemConverter()
|
13 |
output = phonem_converter.convertToPhonem('Hello, this is a test')
|
14 |
self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
|