alessandro trinca tornidor commited on
Commit
025384a
·
1 Parent(s): 43a7191

test: add e2e test TestGetAccuracyFromRecordedAudio

Browse files
tests/events/GetAccuracyFromRecordedAudio.json ADDED
The diff for this file is too large to render. See raw diff
 
tests/test_GetAccuracyFromRecordedAudio.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import unittest
3
+
4
+ from aip_trainer.lambdas import lambdaSpeechToScore
5
+ from tests import EVENTS_FOLDER
6
+
7
+
8
+ def check_output_by_field(output, key, match, expected_output):
9
+ import re
10
+
11
+ assert len(output[key].strip()) > 0
12
+ for word in output[key].lstrip().rstrip().split(" "):
13
+ word_check = re.findall(match, word.strip())
14
+ assert len(word_check) == 1
15
+ assert word_check[0] == word.strip()
16
+ output[key] = expected_output[key]
17
+ return output
18
+
19
+
20
+ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
21
+ def test_GetAccuracyFromRecordedAudio(self):
22
+ self.maxDiff = None
23
+
24
+ with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
25
+ inputs_outputs = json.load(src)
26
+ inputs = inputs_outputs["inputs"]
27
+ outputs = inputs_outputs["outputs"]
28
+ for event_name, event_content in inputs.items():
29
+ expected_output = outputs[event_name]
30
+ output = lambdaSpeechToScore.lambda_handler(event_content, [])
31
+ output = json.loads(output)
32
+ assert len(output["matched_transcripts"].strip()) > 0
33
+ assert len(output["matched_transcripts_ipa"].strip()) > 0
34
+ output = check_output_by_field(output, "is_letter_correct_all_words", '[01]+', expected_output)
35
+ output = check_output_by_field(output, "end_time", '\d+\.\d+', expected_output)
36
+ output = check_output_by_field(output, "start_time", '\d+\.\d+', expected_output)
37
+ output = check_output_by_field(output, "pronunciation_accuracy", '\d+', expected_output)
38
+ output["matched_transcripts"] = expected_output["matched_transcripts"]
39
+ output["matched_transcripts_ipa"] = expected_output["matched_transcripts_ipa"]
40
+ output["pronunciation_accuracy"] = expected_output["pronunciation_accuracy"]
41
+ self.assertEqual(expected_output, output)
42
+
43
+
44
+ if __name__ == '__main__':
45
+ unittest.main()
tests/test_dataset.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import unittest
3
+
4
+ from aip_trainer.lambdas import lambdaGetSample
5
+ from tests import test_logger
6
+
7
+
8
+ def helper_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
9
+ for _ in range(n):
10
+ event = {'body': json.dumps({'category': category, 'language': 'de'})}
11
+ response = lambdaGetSample.lambda_handler(event, [])
12
+ response_dict = json.loads(response)
13
+ number_of_words = len(response_dict['real_transcript'][0].split())
14
+ try:
15
+ assert threshold_min < number_of_words <= threshold_max
16
+ except AssertionError:
17
+ test_logger.error(
18
+ f"Category: {category} had a sentence with length {number_of_words}.")
19
+ raise AssertionError
20
+
21
+
22
+ class TestDataset(unittest.TestCase):
23
+ def test_random_sentences(self):
24
+ helper_category(0, 0, 40)
25
+
26
+ def test_easy_sentences(self):
27
+ helper_category(1, 0, 8)
28
+
29
+ def test_normal_sentences(self):
30
+ helper_category(2, 8, 20)
31
+
32
+ def test_hard_sentences(self):
33
+ helper_category(3, 20, 10000)
34
+
35
+
36
+ if __name__ == '__main__':
37
+ unittest.main()
tests/{unitTests.py → test_phonem_converter_score.py} RENAMED
@@ -1,52 +1,14 @@
1
- import json
2
- import os
3
  import unittest
4
 
5
  import epitran
6
- import structlog
7
 
 
8
  from aip_trainer.models import RuleBasedModels
9
- from aip_trainer import pronunciationTrainer, LOG_JSON_FORMAT
10
- from aip_trainer.lambdas import lambdaGetSample
11
- from aip_trainer.utils import session_logger
12
-
13
-
14
- log_level = os.getenv("LOG_LEVEL", "INFO")
15
- session_logger.setup_logging(json_logs=LOG_JSON_FORMAT, log_level=log_level)
16
- test_logger = structlog.stdlib.get_logger(__name__)
17
-
18
-
19
- def test_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
20
- for _ in range(n):
21
- event = {'body': json.dumps({'category': category, 'language': 'de'})}
22
- response = lambdaGetSample.lambda_handler(event, [])
23
- response_dict = json.loads(response)
24
- number_of_words = len(response_dict['real_transcript'][0].split())
25
- try:
26
- assert threshold_min < number_of_words <= threshold_max
27
- except AssertionError:
28
- test_logger.error(
29
- f"Category: {category} had a sentence with length {number_of_words}.")
30
- raise AssertionError
31
-
32
-
33
- class TestDataset(unittest.TestCase):
34
- def test_random_sentences(self):
35
- test_category(0, 0, 40)
36
-
37
- def test_easy_sentences(self):
38
- test_category(1, 0, 8)
39
-
40
- def test_normal_sentences(self):
41
- test_category(2, 8, 20)
42
-
43
- def test_hard_sentences(self):
44
- test_category(3, 20, 10000)
45
 
46
 
47
  class TestPhonemConverter(unittest.TestCase):
48
 
49
- def test_english(self):
50
  phonem_converter = RuleBasedModels.EngPhonemConverter()
51
  output = phonem_converter.convertToPhonem('Hello, this is a test')
52
  self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
 
 
 
1
  import unittest
2
 
3
  import epitran
 
4
 
5
+ from aip_trainer import pronunciationTrainer
6
  from aip_trainer.models import RuleBasedModels
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  class TestPhonemConverter(unittest.TestCase):
10
 
11
+ def test_english_ok(self):
12
  phonem_converter = RuleBasedModels.EngPhonemConverter()
13
  output = phonem_converter.convertToPhonem('Hello, this is a test')
14
  self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')