alessandro trinca tornidor commited on
Commit
4cafb0a
·
1 Parent(s): 8e595ef

test: refactor structure tests suite, add more test cases for pronunciationTrainer module

Browse files
tests/lambdas/__init__.py ADDED
File without changes
tests/models/__init__.py ADDED
File without changes
tests/models/test_rulebasedmodels.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ import epitran
4
+
5
+ from aip_trainer.models import RuleBasedModels
6
+
7
+
8
+ words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
9
+ words_transcribed = 'Ic hab zeh viel guck am und gesund tu sein'
10
+
11
+
12
+ class TestPhonemConverter(unittest.TestCase):
13
+
14
+ def test_english_ok(self):
15
+ phonem_converter = RuleBasedModels.EngPhonemConverter()
16
+ output = phonem_converter.convertToPhonem('Hello, this is a test')
17
+ self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
18
+
19
+ def test_german_ok(self):
20
+ deu_latn = epitran.Epitran('deu-Latn')
21
+ phonem_converter = RuleBasedModels.EpitranPhonemConverter(deu_latn)
22
+ output = phonem_converter.convertToPhonem('Hallo, das ist ein Test')
23
+ self.assertEqual(output, 'haloː, daːs ɪst aɪ̯n tɛst')
24
+
25
+
26
+ if __name__ == '__main__':
27
+ unittest.main()
tests/test_pronunciationtrainer.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ import numpy as np
4
+ import torch
5
+ from torchaudio.transforms import Resample
6
+
7
+ from aip_trainer import pronunciationTrainer, sample_rate_start
8
+ from aip_trainer.lambdas.lambdaSpeechToScore import soundfile_load
9
+ from aip_trainer.utils import utilities
10
+ from tests import EVENTS_FOLDER
11
+ from tests.lambdas.test_lambdaSpeechToScore import set_seed
12
+
13
+
14
+ phrases = {
15
+ "de": {
16
+ "real": "Hallo, wie geht es dir?",
17
+ "transcribed": 'hallo wie geht es dir',
18
+ "partial": 'hallo wie geht ',
19
+ "incorrect": 'hail wi git es dir'
20
+ },
21
+ "en": {
22
+ "real": "Hi there, how are you?",
23
+ "transcribed": 'i there how are you',
24
+ "partial": 'i there how',
25
+ "incorrect": "I here how re youth"
26
+ }
27
+ }
28
+ trainer_SST_lambda_de = pronunciationTrainer.getTrainer("de")
29
+ trainer_SST_lambda_en = pronunciationTrainer.getTrainer("en")
30
+ signal_de, samplerate = soundfile_load(str(EVENTS_FOLDER / "test_de_easy.wav"))
31
+ signal_en, samplerate = soundfile_load(str(EVENTS_FOLDER / "test_en_easy.wav"))
32
+ transform = Resample(orig_freq=sample_rate_start, new_freq=16000)
33
+
34
+
35
+ class TestScore(unittest.TestCase):
36
+ def test_getTrainer(self):
37
+ self.assertIsInstance(trainer_SST_lambda_de, pronunciationTrainer.PronunciationTrainer)
38
+ self.assertIsInstance(trainer_SST_lambda_en, pronunciationTrainer.PronunciationTrainer)
39
+
40
+ def test_exact_transcription_de(self):
41
+ set_seed()
42
+ phrase_real = phrases["de"]["real"]
43
+ real_and_transcribed_words, _, _ = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_real)
44
+ pronunciation_accuracy, _ = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
45
+ self.assertEqual(int(pronunciation_accuracy), 100)
46
+
47
+ def test_transcription_de(self):
48
+ set_seed()
49
+ phrase_real = phrases["de"]["real"]
50
+ phrase_transcribed = phrases["de"]["transcribed"]
51
+ real_and_transcribed_words, _, _ = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_transcribed)
52
+ pronunciation_accuracy, _ = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
53
+ self.assertEqual(int(pronunciation_accuracy), 100)
54
+
55
+ def test_partial_transcription_de(self):
56
+ set_seed()
57
+ phrase_real = phrases["de"]["real"]
58
+ phrase_partial = phrases["de"]["partial"]
59
+ real_and_transcribed_words, _, _ = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_partial)
60
+ pronunciation_accuracy, _ = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
61
+ self.assertEqual(int(pronunciation_accuracy), 71)
62
+
63
+ def test_incorrect_transcription_with_correct_words_de(self):
64
+ set_seed()
65
+ phrase_real = phrases["de"]["real"]
66
+ phrase_transcribed_incorrect = phrases["de"]["incorrect"]
67
+ real_and_transcribed_words, _, _ = trainer_SST_lambda_de.matchSampleAndRecordedWords(phrase_real, phrase_transcribed_incorrect)
68
+ pronunciation_accuracy, _ = trainer_SST_lambda_de.getPronunciationAccuracy(real_and_transcribed_words)
69
+ self.assertEqual(int(pronunciation_accuracy), 71)
70
+
71
+ def test_exact_transcription_en(self):
72
+ set_seed()
73
+ phrase_real = phrases["en"]["real"]
74
+ real_and_transcribed_words, _, _ = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_real)
75
+ pronunciation_accuracy, _ = trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
76
+ self.assertEqual(int(pronunciation_accuracy), 100)
77
+
78
+ def test_transcription_en(self):
79
+ set_seed()
80
+ phrase_real = phrases["en"]["real"]
81
+ phrase_transcribed = phrases["en"]["transcribed"]
82
+ real_and_transcribed_words, _, _ = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_transcribed)
83
+ pronunciation_accuracy, _ = trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
84
+ self.assertEqual(int(pronunciation_accuracy), 94)
85
+
86
+ def test_partial_transcription_en(self):
87
+ set_seed()
88
+ phrase_real = phrases["en"]["real"]
89
+ phrase_partial = phrases["en"]["partial"]
90
+ real_and_transcribed_words, _, _ = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_partial)
91
+ pronunciation_accuracy, _ = trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
92
+ self.assertEqual(int(pronunciation_accuracy), 56)
93
+
94
+ def test_incorrect_transcription_with_correct_words_en(self):
95
+ set_seed()
96
+ phrase_real = phrases["en"]["real"]
97
+ phrase_transcribed_incorrect = phrases["en"]["incorrect"]
98
+ real_and_transcribed_words, _, _ = trainer_SST_lambda_en.matchSampleAndRecordedWords(phrase_real, phrase_transcribed_incorrect)
99
+ pronunciation_accuracy, _ = trainer_SST_lambda_en.getPronunciationAccuracy(real_and_transcribed_words)
100
+ self.assertEqual(int(pronunciation_accuracy), 69)
101
+
102
+ def test_processAudioForGivenText_getTranscriptAndWordsLocations_de(self):
103
+ set_seed()
104
+ phrase_real = phrases["de"]["real"]
105
+ signal_de_shape = signal_de.shape[0]
106
+ signal_transformed = transform(torch.Tensor(signal_de)).unsqueeze(0)
107
+ result = trainer_SST_lambda_de.processAudioForGivenText(signal_transformed, phrase_real)
108
+ expected_result = {
109
+ 'recording_transcript': 'hallo wie geht es dir',
110
+ 'real_and_transcribed_words': [('Hallo,', 'hallo'), ('wie', 'wie'), ('geht', 'geht'), ('es', 'es'), ('dir?', 'dir')],
111
+ 'recording_ipa': 'haloː viː ɡeːt ɛːs diːɐ̯', 'start_time': '0.0 0.3733125 0.60425 0.7966875 0.989125', 'end_time': '0.4733125 0.70425 0.8966875 1.089125 1.3200625',
112
+ 'real_and_transcribed_words_ipa': [('haloː,', 'haloː'), ('viː', 'viː'), ('ɡeːt', 'ɡeːt'), ('ɛːs', 'ɛːs'), ('diːr?', 'diːɐ̯')],
113
+ 'pronunciation_accuracy': 100.0,
114
+ 'pronunciation_categories': [0, 0, 0, 0, 0]
115
+ }
116
+ self.assertDictEqual(result, expected_result)
117
+ transcript, word_locations = trainer_SST_lambda_de.getTranscriptAndWordsLocations(signal_de_shape)
118
+ assert transcript == phrases["de"]["transcribed"]
119
+ assert word_locations == [(0, 7573), (5973, 11268), (9668, 14347), (12747, 17426), (15826, 21121)]
120
+
121
+ def test_processAudioForGivenText_de(self):
122
+ set_seed()
123
+ phrase_real = phrases["de"]["real"]
124
+ signal_transformed = transform(torch.Tensor(signal_de)).unsqueeze(0)
125
+ expected_result = {
126
+ 'recording_transcript': 'hallo wie geht es dir',
127
+ 'real_and_transcribed_words': [('Hallo,', 'hallo'), ('wie', 'wie'), ('geht', 'geht'), ('es', 'es'), ('dir?', 'dir')],
128
+ 'recording_ipa': 'haloː viː ɡeːt ɛːs diːɐ̯', 'start_time': '0.0 0.3733125 0.60425 0.7966875 0.989125', 'end_time': '0.4733125 0.70425 0.8966875 1.089125 1.3200625',
129
+ 'real_and_transcribed_words_ipa': [('haloː,', 'haloː'), ('viː', 'viː'), ('ɡeːt', 'ɡeːt'), ('ɛːs', 'ɛːs'), ('diːr?', 'diːɐ̯')],
130
+ 'pronunciation_accuracy': 100.0,
131
+ 'pronunciation_categories': [0, 0, 0, 0, 0],
132
+ "start_time": "0.0 0.3733125 0.60425 0.7966875 0.989125",
133
+ "end_time": "0.4733125 0.70425 0.8966875 1.089125 1.3200625",
134
+ }
135
+ result = trainer_SST_lambda_de.processAudioForGivenText(signal_transformed, phrase_real)
136
+ self.assertDictEqual(result, expected_result)
137
+
138
+ def test_removePunctuation_de(self):
139
+ word = "glück,"
140
+ cleaned_word = trainer_SST_lambda_de.removePunctuation(word)
141
+ self.assertEqual(cleaned_word, "glück")
142
+ word = "glück,\n\rhallo..."
143
+ cleaned_word = trainer_SST_lambda_de.removePunctuation(word)
144
+ self.assertEqual(cleaned_word, "glück\n\rhallo")
145
+
146
+ def test_getWordsPronunciationCategory_de(self):
147
+ accuracies = [x for x in range(-121, 121, 10)] + [np.inf, -np.inf, np.nan, 1.5, -1.5]
148
+ expected_categories = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2]
149
+ categories = trainer_SST_lambda_de.getWordsPronunciationCategory(accuracies)
150
+ self.assertEqual(categories, expected_categories)
151
+
152
+ def test_preprocessAudio_de(self):
153
+ output_hash = utilities.hash_calculate(signal_de, is_file=False)
154
+ assert output_hash == b'D9pMFzYL1BSPPg89ZCQE61xzb7QICXolYtC9EJRpvS0='
155
+ signal_transformed = transform(torch.Tensor(signal_de)).unsqueeze(0)
156
+ processed_audio = trainer_SST_lambda_de.preprocessAudio(signal_transformed)
157
+ self.assertIsInstance(processed_audio, torch.Tensor)
158
+ self.assertEqual(processed_audio.shape, (1, 23400))
159
+ output_hash = utilities.hash_calculate(processed_audio.numpy(), is_file=False)
160
+ assert output_hash == b'Ri/1rmgYmRSWaAw/Y3PoLEu1woiczhSUdUCbaMf++EM='
161
+
162
+ def test_processAudioForGivenText_getTranscriptAndWordsLocations_en(self):
163
+ set_seed()
164
+ phrase_real = phrases["en"]["real"]
165
+ signal_en_shape = signal_en.shape[0]
166
+ signal_transformed = transform(torch.Tensor(signal_en)).unsqueeze(0)
167
+ result = trainer_SST_lambda_en.processAudioForGivenText(signal_transformed, phrase_real)
168
+ expected_result = {
169
+ 'recording_transcript': 'i there how are you',
170
+ 'real_and_transcribed_words': [('Hi', 'i'), ('there,', 'there'), ('how', 'how'), ('are', 'are'), ('you?', 'you')],
171
+ 'recording_ipa': 'aɪ ðɛr haʊ ər ju', 'start_time': '0.0 0.0625 0.2875 0.475 0.7', 'end_time': '0.1625 0.3875 0.575 0.8 0.9875',
172
+ 'real_and_transcribed_words_ipa': [('haɪ', 'aɪ'), ('ðɛr,', 'ðɛr'), ('haʊ', 'haʊ'), ('ər', 'ər'), ('ju?', 'ju')],
173
+ 'pronunciation_accuracy': 94.0, 'pronunciation_categories': [2, 0, 0, 0, 0]
174
+ }
175
+ self.assertDictEqual(result, expected_result)
176
+ transcript, word_locations = trainer_SST_lambda_en.getTranscriptAndWordsLocations(signal_en_shape)
177
+ assert transcript == phrases["en"]["transcribed"]
178
+ assert word_locations == [(0, 2600), (1000, 6200), (4600, 9200), (7600, 12800), (11200, 15800)]
179
+
180
+ def test_processAudioForGivenText_en(self):
181
+ set_seed()
182
+ phrase_real = phrases["en"]["real"]
183
+ signal_transformed = transform(torch.Tensor(signal_en)).unsqueeze(0)
184
+ expected_result = {
185
+ 'recording_transcript': 'i there how are you',
186
+ 'real_and_transcribed_words': [('Hi', 'i'), ('there,', 'there'), ('how', 'how'), ('are', 'are'), ('you?', 'you')],
187
+ 'recording_ipa': 'aɪ ðɛr haʊ ər ju', 'start_time': '0.0 0.0625 0.2875 0.475 0.7', 'end_time': '0.1625 0.3875 0.575 0.8 0.9875',
188
+ 'real_and_transcribed_words_ipa': [('haɪ', 'aɪ'), ('ðɛr,', 'ðɛr'), ('haʊ', 'haʊ'), ('ər', 'ər'), ('ju?', 'ju')],
189
+ 'pronunciation_accuracy': 94.0, 'pronunciation_categories': [2, 0, 0, 0, 0],
190
+ 'start_time': '0.0 0.0625 0.2875 0.475 0.7',
191
+ 'end_time': '0.1625 0.3875 0.575 0.8 0.9875'
192
+ }
193
+ result = trainer_SST_lambda_en.processAudioForGivenText(signal_transformed, phrase_real)
194
+ self.assertDictEqual(result, expected_result)
195
+
196
+ def test_getPronunciationCategoryFromAccuracy_en(self):
197
+ accuracies = [x for x in range(-121, 121, 10)] + [np.inf, -np.inf, np.nan, 1.5, -1.5]
198
+ expected_categories = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2]
199
+ all_categories = []
200
+ for accuracy in accuracies:
201
+ category = trainer_SST_lambda_en.getPronunciationCategoryFromAccuracy(accuracy)
202
+ all_categories.append(category)
203
+ self.assertEqual(all_categories, expected_categories)
204
+
205
+
206
+ if __name__ == '__main__':
207
+ unittest.main()
tests/{test_phonem_converter_score.py → test_wordmetrics.py} RENAMED
@@ -1,53 +1,13 @@
1
  import unittest
2
 
3
- import epitran
4
-
5
- from aip_trainer import pronunciationTrainer, WordMetrics
6
- from aip_trainer.models import RuleBasedModels
7
 
8
 
9
  words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
10
  words_transcribed = 'Ic hab zeh viel guck am und gesund tu sein'
11
 
12
 
13
- class TestPhonemConverter(unittest.TestCase):
14
-
15
- def test_english_ok(self):
16
- phonem_converter = RuleBasedModels.EngPhonemConverter()
17
- output = phonem_converter.convertToPhonem('Hello, this is a test')
18
- self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst')
19
-
20
- def test_german_ok(self):
21
- deu_latn = epitran.Epitran('deu-Latn')
22
- phonem_converter = RuleBasedModels.EpitranPhonemConverter(deu_latn)
23
- output = phonem_converter.convertToPhonem('Hallo, das ist ein Test')
24
- self.assertEqual(output, 'haloː, daːs ɪst aɪ̯n tɛst')
25
-
26
-
27
- trainer_SST_lambda = {'de': pronunciationTrainer.getTrainer("de")}
28
-
29
-
30
- class TestScore(unittest.TestCase):
31
-
32
- def test_exact_transcription(self):
33
- real_and_transcribed_words, _, _ = trainer_SST_lambda['de'].matchSampleAndRecordedWords(
34
- words_real, words_real)
35
-
36
- pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
37
- real_and_transcribed_words)
38
-
39
- self.assertEqual(int(pronunciation_accuracy), 100)
40
-
41
- def test_incorrect_transcription(self):
42
-
43
- real_and_transcribed_words, _, _ = trainer_SST_lambda['de'].matchSampleAndRecordedWords(
44
- words_real, words_transcribed)
45
-
46
- pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
47
- real_and_transcribed_words)
48
-
49
- self.assertEqual(int(pronunciation_accuracy), 71)
50
-
51
  def test_edit_distance_python(self):
52
  output = WordMetrics.edit_distance_python(words_real, words_transcribed)
53
  self.assertEqual(output, int(14))
 
1
  import unittest
2
 
3
+ from aip_trainer import WordMetrics
 
 
 
4
 
5
 
6
  words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
7
  words_transcribed = 'Ic hab zeh viel guck am und gesund tu sein'
8
 
9
 
10
+ class TestWordMetrics(unittest.TestCase):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def test_edit_distance_python(self):
12
  output = WordMetrics.edit_distance_python(words_real, words_transcribed)
13
  self.assertEqual(output, int(14))
tests/utils/__init__.py ADDED
File without changes