alessandro trinca tornidor commited on
Commit
2a62680
·
1 Parent(s): 5abbb8c

test: start updating tests

Browse files
Files changed (1) hide show
  1. tests/unitTests.py +33 -39
tests/unitTests.py CHANGED
@@ -1,67 +1,61 @@
1
  import json
 
2
  import unittest
3
 
4
  import epitran
 
5
 
6
- from aip_trainer.models import ModelInterfaces, RuleBasedModels
7
- from aip_trainer import pronunciationTrainer
8
  from aip_trainer.lambdas import lambdaGetSample
 
9
 
10
 
11
- def test_category(category: int, threshold_min: int, threshold_max: int):
12
- event = {'body': json.dumps({'category': category, 'language': 'de'})}
13
- for _ in range(1000):
 
 
 
 
 
14
  response = lambdaGetSample.lambda_handler(event, [])
15
  response_dict = json.loads(response)
16
- number_of_words = len(
17
- response_dict['real_transcript'][0].split())
18
- length_valid = threshold_min < number_of_words <= threshold_max
19
- if not length_valid:
20
- print('Category ', category,
21
- ' had a sentence with length ', number_of_words)
22
- return False
23
- return True
24
 
25
 
26
  class TestDataset(unittest.TestCase):
27
-
28
  def test_random_sentences(self):
29
-
30
- self.assertFalse(test_category(0, 0, 8))
31
 
32
  def test_easy_sentences(self):
33
-
34
- self.assertTrue(test_category(1, 0, 8))
35
 
36
  def test_normal_sentences(self):
37
- self.assertTrue(test_category(2, 8, 20))
38
 
39
  def test_hard_sentences(self):
40
- self.assertTrue(test_category(3, 20, 10000))
41
-
42
-
43
- def check_phonem_converter(converter: ModelInterfaces.ITextToPhonemModel, input_phonem: str, expected_output: str):
44
- output = converter.convertToPhonem(input_phonem)
45
-
46
- is_correct = output == expected_output
47
- if not is_correct:
48
- print('Conversion from "', input_phonem, '" should be "',
49
- expected_output, '", but was "', output, '"')
50
- return is_correct
51
 
52
 
53
  class TestPhonemConverter(unittest.TestCase):
54
 
55
  def test_english(self):
56
  phonem_converter = RuleBasedModels.EngPhonemConverter()
57
- self.assertTrue(check_phonem_converter(
58
- phonem_converter, 'Hello, this is a test', 'hɛˈloʊ, ðɪs ɪz ə tɛst'))
59
 
60
- def test_german(self):
61
- phonem_converter = RuleBasedModels.EpitranPhonemConverter(
62
- epitran.Epitran('deu-Latn'))
63
- self.assertTrue(check_phonem_converter(
64
- phonem_converter, 'Hallo, das ist ein Test', 'haloː, dɑːs ɪst ain tɛst'))
65
 
66
 
67
  trainer_SST_lambda = {'de': pronunciationTrainer.getTrainer("de")}
@@ -78,7 +72,7 @@ class TestScore(unittest.TestCase):
78
  pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
79
  real_and_transcribed_words)
80
 
81
- self.assertTrue(int(pronunciation_accuracy) == 100)
82
 
83
  def test_incorrect_transcription(self):
84
  words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
@@ -90,7 +84,7 @@ class TestScore(unittest.TestCase):
90
  pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
91
  real_and_transcribed_words)
92
 
93
- self.assertTrue(int(pronunciation_accuracy) == 71)
94
 
95
 
96
  if __name__ == '__main__':
 
1
  import json
2
+ import os
3
  import unittest
4
 
5
  import epitran
6
+ import structlog
7
 
8
+ from aip_trainer.models import RuleBasedModels
9
+ from aip_trainer import pronunciationTrainer, LOG_JSON_FORMAT
10
  from aip_trainer.lambdas import lambdaGetSample
11
+ from aip_trainer.utils import session_logger
12
 
13
 
14
+ log_level = os.getenv("LOG_LEVEL", "INFO")
15
+ session_logger.setup_logging(json_logs=LOG_JSON_FORMAT, log_level=log_level)
16
+ test_logger = structlog.stdlib.get_logger(__name__)
17
+
18
+
19
+ def test_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
20
+ for _ in range(n):
21
+ event = {'body': json.dumps({'category': category, 'language': 'de'})}
22
  response = lambdaGetSample.lambda_handler(event, [])
23
  response_dict = json.loads(response)
24
+ number_of_words = len(response_dict['real_transcript'][0].split())
25
+ try:
26
+ assert threshold_min < number_of_words <= threshold_max
27
+ except AssertionError:
28
+ test_logger.error(
29
+ f"Category: {category} had a sentence with length {number_of_words}.")
30
+ raise AssertionError
 
31
 
32
 
33
  class TestDataset(unittest.TestCase):
 
34
  def test_random_sentences(self):
35
+ test_category(0, 0, 40)
 
36
 
37
  def test_easy_sentences(self):
38
+ test_category(1, 0, 8)
 
39
 
40
  def test_normal_sentences(self):
41
+ test_category(2, 8, 20)
42
 
43
  def test_hard_sentences(self):
44
+ test_category(3, 20, 10000)
 
 
 
 
 
 
 
 
 
 
45
 
46
 
47
  class TestPhonemConverter(unittest.TestCase):
48
 
49
  def test_english(self):
50
  phonem_converter = RuleBasedModels.EngPhonemConverter()
51
+ output = phonem_converter.convertToPhonem('Hello, this is a test')
52
+ self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
53
 
54
+ def test_german_ok(self):
55
+ deu_latn = epitran.Epitran('deu-Latn')
56
+ phonem_converter = RuleBasedModels.EpitranPhonemConverter(deu_latn)
57
+ output = phonem_converter.convertToPhonem('Hallo, das ist ein Test')
58
+ self.assertEqual(output, 'haloː, dɑːs ɪst ain tɛst')
59
 
60
 
61
  trainer_SST_lambda = {'de': pronunciationTrainer.getTrainer("de")}
 
72
  pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
73
  real_and_transcribed_words)
74
 
75
+ self.assertEqual(int(pronunciation_accuracy), 100)
76
 
77
  def test_incorrect_transcription(self):
78
  words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
 
84
  pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
85
  real_and_transcribed_words)
86
 
87
+ self.assertEqual(int(pronunciation_accuracy), 71)
88
 
89
 
90
  if __name__ == '__main__':