Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
2a62680
1
Parent(s):
5abbb8c
test: start updating tests
Browse files- tests/unitTests.py +33 -39
tests/unitTests.py
CHANGED
@@ -1,67 +1,61 @@
|
|
1 |
import json
|
|
|
2 |
import unittest
|
3 |
|
4 |
import epitran
|
|
|
5 |
|
6 |
-
from aip_trainer.models import
|
7 |
-
from aip_trainer import pronunciationTrainer
|
8 |
from aip_trainer.lambdas import lambdaGetSample
|
|
|
9 |
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
14 |
response = lambdaGetSample.lambda_handler(event, [])
|
15 |
response_dict = json.loads(response)
|
16 |
-
number_of_words = len(
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
return True
|
24 |
|
25 |
|
26 |
class TestDataset(unittest.TestCase):
|
27 |
-
|
28 |
def test_random_sentences(self):
|
29 |
-
|
30 |
-
self.assertFalse(test_category(0, 0, 8))
|
31 |
|
32 |
def test_easy_sentences(self):
|
33 |
-
|
34 |
-
self.assertTrue(test_category(1, 0, 8))
|
35 |
|
36 |
def test_normal_sentences(self):
|
37 |
-
|
38 |
|
39 |
def test_hard_sentences(self):
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
def check_phonem_converter(converter: ModelInterfaces.ITextToPhonemModel, input_phonem: str, expected_output: str):
|
44 |
-
output = converter.convertToPhonem(input_phonem)
|
45 |
-
|
46 |
-
is_correct = output == expected_output
|
47 |
-
if not is_correct:
|
48 |
-
print('Conversion from "', input_phonem, '" should be "',
|
49 |
-
expected_output, '", but was "', output, '"')
|
50 |
-
return is_correct
|
51 |
|
52 |
|
53 |
class TestPhonemConverter(unittest.TestCase):
|
54 |
|
55 |
def test_english(self):
|
56 |
phonem_converter = RuleBasedModels.EngPhonemConverter()
|
57 |
-
|
58 |
-
|
59 |
|
60 |
-
def
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
|
66 |
|
67 |
trainer_SST_lambda = {'de': pronunciationTrainer.getTrainer("de")}
|
@@ -78,7 +72,7 @@ class TestScore(unittest.TestCase):
|
|
78 |
pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
|
79 |
real_and_transcribed_words)
|
80 |
|
81 |
-
self.
|
82 |
|
83 |
def test_incorrect_transcription(self):
|
84 |
words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
|
@@ -90,7 +84,7 @@ class TestScore(unittest.TestCase):
|
|
90 |
pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
|
91 |
real_and_transcribed_words)
|
92 |
|
93 |
-
self.
|
94 |
|
95 |
|
96 |
if __name__ == '__main__':
|
|
|
1 |
import json
|
2 |
+
import os
|
3 |
import unittest
|
4 |
|
5 |
import epitran
|
6 |
+
import structlog
|
7 |
|
8 |
+
from aip_trainer.models import RuleBasedModels
|
9 |
+
from aip_trainer import pronunciationTrainer, LOG_JSON_FORMAT
|
10 |
from aip_trainer.lambdas import lambdaGetSample
|
11 |
+
from aip_trainer.utils import session_logger
|
12 |
|
13 |
|
14 |
+
log_level = os.getenv("LOG_LEVEL", "INFO")
|
15 |
+
session_logger.setup_logging(json_logs=LOG_JSON_FORMAT, log_level=log_level)
|
16 |
+
test_logger = structlog.stdlib.get_logger(__name__)
|
17 |
+
|
18 |
+
|
19 |
+
def test_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
|
20 |
+
for _ in range(n):
|
21 |
+
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
22 |
response = lambdaGetSample.lambda_handler(event, [])
|
23 |
response_dict = json.loads(response)
|
24 |
+
number_of_words = len(response_dict['real_transcript'][0].split())
|
25 |
+
try:
|
26 |
+
assert threshold_min < number_of_words <= threshold_max
|
27 |
+
except AssertionError:
|
28 |
+
test_logger.error(
|
29 |
+
f"Category: {category} had a sentence with length {number_of_words}.")
|
30 |
+
raise AssertionError
|
|
|
31 |
|
32 |
|
33 |
class TestDataset(unittest.TestCase):
|
|
|
34 |
def test_random_sentences(self):
|
35 |
+
test_category(0, 0, 40)
|
|
|
36 |
|
37 |
def test_easy_sentences(self):
|
38 |
+
test_category(1, 0, 8)
|
|
|
39 |
|
40 |
def test_normal_sentences(self):
|
41 |
+
test_category(2, 8, 20)
|
42 |
|
43 |
def test_hard_sentences(self):
|
44 |
+
test_category(3, 20, 10000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
|
47 |
class TestPhonemConverter(unittest.TestCase):
|
48 |
|
49 |
def test_english(self):
|
50 |
phonem_converter = RuleBasedModels.EngPhonemConverter()
|
51 |
+
output = phonem_converter.convertToPhonem('Hello, this is a test')
|
52 |
+
self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
|
53 |
|
54 |
+
def test_german_ok(self):
|
55 |
+
deu_latn = epitran.Epitran('deu-Latn')
|
56 |
+
phonem_converter = RuleBasedModels.EpitranPhonemConverter(deu_latn)
|
57 |
+
output = phonem_converter.convertToPhonem('Hallo, das ist ein Test')
|
58 |
+
self.assertEqual(output, 'haloː, dɑːs ɪst ain tɛst')
|
59 |
|
60 |
|
61 |
trainer_SST_lambda = {'de': pronunciationTrainer.getTrainer("de")}
|
|
|
72 |
pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
|
73 |
real_and_transcribed_words)
|
74 |
|
75 |
+
self.assertEqual(int(pronunciation_accuracy), 100)
|
76 |
|
77 |
def test_incorrect_transcription(self):
|
78 |
words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
|
|
|
84 |
pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
|
85 |
real_and_transcribed_words)
|
86 |
|
87 |
+
self.assertEqual(int(pronunciation_accuracy), 71)
|
88 |
|
89 |
|
90 |
if __name__ == '__main__':
|