alessandro trinca tornidor commited on
Commit
acfca85
·
1 Parent(s): d1b2b5d

test: update test cases for models modules, add preprocessAudioStandalone() function

Browse files
.gitignore CHANGED
@@ -280,3 +280,5 @@ fabric.properties
280
 
281
  # node_modules
282
  node_modules
 
 
 
280
 
281
  # node_modules
282
  node_modules
283
+
284
+ *.jit
aip_trainer/models/models.py CHANGED
@@ -14,9 +14,7 @@ default_speaker_dict = {
14
  }
15
 
16
 
17
- def silero_tts(
18
- language="en", version="latest", output_folder: Path | str = None, **kwargs
19
- ):
20
  """Silero Text-To-Speech Models
21
  language (str): language of the model, now available are ['ru', 'en', 'de', 'es', 'fr']
22
  Returns a model and a set of utils
@@ -117,9 +115,10 @@ def init_jit_model(
117
  model_dir = (
118
  Path(output_folder)
119
  if output_folder is not None
120
- else Path(os.path.dirname(__file__)) / "model"
121
  )
122
  os.makedirs(model_dir, exist_ok=True)
 
123
  model_path = model_dir / os.path.basename(model_url)
124
  app_logger.info(
125
  f"model_path exists? '{os.path.isfile(model_path)}' => '{model_path}' ..."
@@ -159,7 +158,7 @@ def get_models(language, output_folder, version, model_type):
159
  output_folder = (
160
  Path(output_folder)
161
  if output_folder is not None
162
- else Path(os.path.dirname(__file__)) / ".." / ".."
163
  )
164
  models_list_file = output_folder / f"latest_silero_model_{language}.yml"
165
  if not os.path.exists(models_list_file):
 
14
  }
15
 
16
 
17
+ def silero_tts(language="en", version="latest", output_folder: Path | str = None, **kwargs):
 
 
18
  """Silero Text-To-Speech Models
19
  language (str): language of the model, now available are ['ru', 'en', 'de', 'es', 'fr']
20
  Returns a model and a set of utils
 
115
  model_dir = (
116
  Path(output_folder)
117
  if output_folder is not None
118
+ else Path(torch.hub.get_dir())
119
  )
120
  os.makedirs(model_dir, exist_ok=True)
121
+ app_logger.info(f"downloading the models to model_dir: '{model_dir}' ...")
122
  model_path = model_dir / os.path.basename(model_url)
123
  app_logger.info(
124
  f"model_path exists? '{os.path.isfile(model_path)}' => '{model_path}' ..."
 
158
  output_folder = (
159
  Path(output_folder)
160
  if output_folder is not None
161
+ else Path(os.path.dirname(__file__)).parent.parent
162
  )
163
  models_list_file = output_folder / f"latest_silero_model_{language}.yml"
164
  if not os.path.exists(models_list_file):
aip_trainer/pronunciationTrainer.py CHANGED
@@ -33,6 +33,12 @@ def getTrainer(language: str):
33
  return trainer
34
 
35
 
 
 
 
 
 
 
36
  class PronunciationTrainer:
37
  current_transcript: str
38
  current_ipa: str
@@ -185,6 +191,4 @@ class PronunciationTrainer:
185
  return np.argmin(abs(self.categories_thresholds-accuracy))
186
 
187
  def preprocessAudio(self, audio: torch.tensor) -> torch.tensor:
188
- audio = audio-torch.mean(audio)
189
- audio = audio/torch.max(torch.abs(audio))
190
- return audio
 
33
  return trainer
34
 
35
 
36
+ def preprocessAudioStandalone(audio: torch.tensor) -> torch.tensor:
37
+ audio = audio-torch.mean(audio)
38
+ audio = audio/torch.max(torch.abs(audio))
39
+ return audio
40
+
41
+
42
  class PronunciationTrainer:
43
  current_transcript: str
44
  current_ipa: str
 
191
  return np.argmin(abs(self.categories_thresholds-accuracy))
192
 
193
  def preprocessAudio(self, audio: torch.tensor) -> torch.tensor:
194
+ return preprocessAudioStandalone(audio=audio)
 
 
aip_trainer/utils/split_cosmic_ray_report.py CHANGED
@@ -1,7 +1,9 @@
1
  from pathlib import Path
2
 
3
 
4
- def get_cosmic_ray_report_filtered(input_filename, suffix="filtered", separator="============", filter_string="test outcome: TestOutcome.KILLED"):
 
 
5
  filename, ext = Path(input_filename).stem, Path(input_filename).suffix
6
  working_dir = input_filename.parent
7
  # Read the input file
@@ -10,9 +12,11 @@ def get_cosmic_ray_report_filtered(input_filename, suffix="filtered", separator=
10
 
11
  # Split the content into sections
12
  sections = content.split(separator)
 
13
 
14
  # Filter out sections containing "test outcome: TestOutcome.KILLED"
15
- filtered_sections = [section for section in sections if filter_string not in section]
 
16
 
17
  # Join the filtered sections back into a single string
18
  filtered_content = separator.join(filtered_sections)
@@ -25,5 +29,5 @@ def get_cosmic_ray_report_filtered(input_filename, suffix="filtered", separator=
25
 
26
  if __name__ == "__main__":
27
  from aip_trainer import PROJECT_ROOT_FOLDER
28
- _input_filename = "cosmic-ray-pronunciationtrainer1.txt"
29
  get_cosmic_ray_report_filtered(PROJECT_ROOT_FOLDER / "tmp" / _input_filename)
 
1
  from pathlib import Path
2
 
3
 
4
+ def get_cosmic_ray_report_filtered(input_filename, suffix="filtered", separator="============", filter_string_list: list = None):
5
+ if filter_string_list is None:
6
+ filter_string_list = ["test outcome: TestOutcome.KILLED"]
7
  filename, ext = Path(input_filename).stem, Path(input_filename).suffix
8
  working_dir = input_filename.parent
9
  # Read the input file
 
12
 
13
  # Split the content into sections
14
  sections = content.split(separator)
15
+ filtered_sections = [section for section in sections]
16
 
17
  # Filter out sections containing "test outcome: TestOutcome.KILLED"
18
+ for filter_string in filter_string_list:
19
+ filtered_sections = [section for section in filtered_sections if filter_string not in section]
20
 
21
  # Join the filtered sections back into a single string
22
  filtered_content = separator.join(filtered_sections)
 
29
 
30
  if __name__ == "__main__":
31
  from aip_trainer import PROJECT_ROOT_FOLDER
32
+ _input_filename = "cosmic-ray-models2.txt"
33
  get_cosmic_ray_report_filtered(PROJECT_ROOT_FOLDER / "tmp" / _input_filename)
cosmic_ray_config.toml CHANGED
@@ -1,8 +1,8 @@
1
  [cosmic-ray]
2
- module-path = "aip_trainer/pronunciationTrainer.py"
3
  timeout = 30.0
4
  excluded-modules = []
5
- test-command = "python -m pytest tests/test_pronunciationtrainer.py"
6
 
7
  [cosmic-ray.distributor]
8
  name = "local"
 
1
  [cosmic-ray]
2
+ module-path = "aip_trainer/models/models.py"
3
  timeout = 30.0
4
  excluded-modules = []
5
+ test-command = "python -m pytest tests/models/test_models.py"
6
 
7
  [cosmic-ray.distributor]
8
  name = "local"
tests/events/cosmic-ray-pronunciationtrainer.txt ADDED
The diff for this file is too large to render. See raw diff
 
tests/models/test_aimodels.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import unittest
3
+ import torch
4
+
5
+ from torchaudio.transforms import Resample
6
+
7
+ from aip_trainer.lambdas.lambdaSpeechToScore import soundfile_load
8
+ from aip_trainer.models import AIModels, models as mo
9
+ from aip_trainer import sample_rate_start
10
+ from aip_trainer.pronunciationTrainer import preprocessAudioStandalone
11
+ from aip_trainer.utils import utilities
12
+ from tests import EVENTS_FOLDER
13
+ from tests.lambdas.test_lambdaSpeechToScore import set_seed
14
+
15
+
16
+ device = torch.device('cpu')
17
+ transform = Resample(orig_freq=sample_rate_start, new_freq=16000)
18
+
19
+
20
+ def get_model(language):
21
+ model, decoder = mo.getASRModel(language)
22
+ model = model.to(device)
23
+ model.eval()
24
+ return AIModels.NeuralASR(model, decoder)
25
+
26
+ signal_de, samplerate = soundfile_load(str(EVENTS_FOLDER / "test_de_easy.wav"))
27
+ signal_en, samplerate = soundfile_load(str(EVENTS_FOLDER / "test_en_easy.wav"))
28
+ signal_transformed_de = transform(torch.Tensor(signal_de)).unsqueeze(0)
29
+ signal_transformed_de = preprocessAudioStandalone(signal_transformed_de)
30
+ signal_transformed_en = transform(torch.Tensor(signal_en)).unsqueeze(0)
31
+ signal_transformed_en = preprocessAudioStandalone(signal_transformed_en)
32
+
33
+
34
+ class TestDeNeuralASR(unittest.TestCase):
35
+ def test_is_instance_of_NeuralASR_language_de(self):
36
+ asr_de = get_model("de")
37
+ self.assertIsInstance(asr_de, AIModels.NeuralASR)
38
+
39
+ def test_is_instance_of_NeuralASR_language_en(self):
40
+ asr_en = get_model("de")
41
+ self.assertIsInstance(asr_en, AIModels.NeuralASR)
42
+
43
+ def test_getTranscript_without_processing_audio_de(self):
44
+ with self.assertRaises(AssertionError):
45
+ try:
46
+ asr_de = get_model("de")
47
+ asr_de.getTranscript()
48
+ except AssertionError as ae:
49
+ assert "Can get audio transcripts without having processed the audio" in str(ae)
50
+ raise ae
51
+
52
+ def test_getTranscript_without_processing_audio_en(self):
53
+ with self.assertRaises(AssertionError):
54
+ try:
55
+ asr_en = get_model("en")
56
+ asr_en.getTranscript()
57
+ except AssertionError as ae:
58
+ assert "Can get audio transcripts without having processed the audio" in str(ae)
59
+ raise ae
60
+
61
+ def test_getWordLocations_without_processing_audio_de(self):
62
+ with self.assertRaises(AssertionError):
63
+ try:
64
+ asr_de = get_model("de")
65
+ asr_de.getWordLocations()
66
+ except AssertionError as ae:
67
+ assert "Can get word locations without having processed the audio" in str(ae)
68
+ raise ae
69
+
70
+ def test_getWordLocations_without_processing_audio_en(self):
71
+ with self.assertRaises(AssertionError):
72
+ try:
73
+ asr_en = get_model("en")
74
+ asr_en.getWordLocations()
75
+ except AssertionError as ae:
76
+ assert "Can get word locations without having processed the audio" in str(ae)
77
+ raise ae
78
+
79
+ def test_process_audio_de(self):
80
+ set_seed()
81
+ asr_de = get_model("de")
82
+ self.assertIsNone(asr_de.audio_transcript)
83
+ self.assertIsNone(asr_de.word_locations_in_samples)
84
+
85
+ asr_de.processAudio(signal_transformed_de)
86
+
87
+ self.assertEqual(asr_de.audio_transcript, 'hallo wie geht es dir')
88
+ self.assertEqual(
89
+ asr_de.word_locations_in_samples,
90
+ [
91
+ {"word": "hallo", "start_ts": 0.0, "end_ts": 6773.68},
92
+ {"word": "wie", "start_ts": 6773.68, "end_ts": 10468.42},
93
+ {"word": "geht", "start_ts": 10468.42, "end_ts": 13547.37},
94
+ {"word": "es", "start_ts": 13547.37, "end_ts": 16626.32},
95
+ {"word": "dir", "start_ts": 16626.32, "end_ts": 20321.05},
96
+ ],
97
+ )
98
+
99
+ def test_process_audio_en(self):
100
+ set_seed()
101
+ asr_en = get_model("en")
102
+ self.assertIsNone(asr_en.audio_transcript)
103
+ self.assertIsNone(asr_en.word_locations_in_samples)
104
+
105
+ asr_en.processAudio(signal_transformed_en)
106
+
107
+ self.assertEqual(asr_en.audio_transcript, 'i there how are you')
108
+ self.assertEqual(
109
+ asr_en.word_locations_in_samples,
110
+ [
111
+ {"word": "i", "start_ts": 0.0, "end_ts": 1800.0},
112
+ {"word": "there", "start_ts": 1800.0, "end_ts": 5400.0},
113
+ {"word": "how", "start_ts": 5400.0, "end_ts": 8400.0},
114
+ {"word": "are", "start_ts": 8400.0, "end_ts": 12000.0},
115
+ {"word": "you", "start_ts": 12000.0, "end_ts": 15000.0},
116
+ ],
117
+ )
118
+
119
+ def test_getTranscript_after_processing_audio_de(self):
120
+ set_seed()
121
+ asr_de = get_model("de")
122
+ asr_de.processAudio(signal_transformed_de)
123
+ transcript = asr_de.getTranscript()
124
+ self.assertEqual(transcript, 'hallo wie geht es dir')
125
+
126
+ def test_getTranscript_after_processing_audio_en(self):
127
+ set_seed()
128
+ asr_en = get_model("en")
129
+ asr_en.processAudio(signal_transformed_en)
130
+ transcript = asr_en.getTranscript()
131
+ self.assertEqual(transcript, 'i there how are you')
132
+
133
+ def test_getWordLocations_after_processing_audio_de(self):
134
+ set_seed()
135
+ asr_de = get_model("de")
136
+ asr_de.processAudio(signal_transformed_de)
137
+ word_locations = asr_de.getWordLocations()
138
+ self.assertEqual(
139
+ word_locations,
140
+ [
141
+ {"word": "hallo", "start_ts": 0.0, "end_ts": 6773.68},
142
+ {"word": "wie", "start_ts": 6773.68, "end_ts": 10468.42},
143
+ {"word": "geht", "start_ts": 10468.42, "end_ts": 13547.37},
144
+ {"word": "es", "start_ts": 13547.37, "end_ts": 16626.32},
145
+ {"word": "dir", "start_ts": 16626.32, "end_ts": 20321.05},
146
+ ],
147
+ )
148
+
149
+ def test_getWordLocations_after_processing_audio_en(self):
150
+ set_seed()
151
+ asr_en = get_model("en")
152
+ asr_en.processAudio(signal_transformed_en)
153
+ word_locations = asr_en.getWordLocations()
154
+ self.assertEqual(
155
+ word_locations,
156
+ [
157
+ {"word": "i", "start_ts": 0.0, "end_ts": 1800.0},
158
+ {"word": "there", "start_ts": 1800.0, "end_ts": 5400.0},
159
+ {"word": "how", "start_ts": 5400.0, "end_ts": 8400.0},
160
+ {"word": "are", "start_ts": 8400.0, "end_ts": 12000.0},
161
+ {"word": "you", "start_ts": 12000.0, "end_ts": 15000.0},
162
+ ],
163
+ )
164
+
165
+
166
+ if __name__ == '__main__':
167
+ unittest.main()
tests/models/test_models.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import unittest
3
+ import torch
4
+ from pathlib import Path
5
+ from silero.utils import Decoder
6
+ from silero.silero import silero_tts
7
+ import torch.package
8
+ from aip_trainer import PROJECT_ROOT_FOLDER
9
+ from aip_trainer.models import models as mo
10
+
11
+ class TestModels(unittest.TestCase):
12
+
13
+ def setUp(self):
14
+ self.language_de = "de"
15
+ self.language_en = "en"
16
+ self.tmp_dir = torch.hub.get_dir()
17
+ self.device = torch.device("cpu")
18
+
19
+ def test_getASRModel_de(self):
20
+ model, decoder = mo.getASRModel(self.language_de)
21
+ self.assertIsInstance(model, torch.nn.Module)
22
+ self.assertIsInstance(decoder, Decoder)
23
+
24
+ def test_getASRModel_en(self):
25
+ model, decoder = mo.getASRModel(self.language_en)
26
+ self.assertIsInstance(model, torch.nn.Module)
27
+ self.assertIsInstance(decoder, Decoder)
28
+
29
+ def test_silero_stt_en(self):
30
+ model, decoder, utils = mo.silero_stt(language=self.language_en, output_folder=self.tmp_dir)
31
+ self.assertIsInstance(model, torch.jit.ScriptModule)
32
+ self.assertIsInstance(decoder, Decoder)
33
+ self.assertIsInstance(utils, tuple)
34
+
35
+ def test_silero_tts_en2(self):
36
+ model, example, speaker, sample_rate = mo.silero_tts(language=self.language_en, output_folder=self.tmp_dir)
37
+ assert model is not None
38
+ self.assertIsInstance(model, object)
39
+ self.assertIsInstance(example, str)
40
+ self.assertIsInstance(speaker, str)
41
+ self.assertIsInstance(sample_rate, int)
42
+ assert speaker == 'en_0'
43
+ assert sample_rate == 48000
44
+ assert example == 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
45
+
46
+ def test_init_jit_model_en(self):
47
+ name = "en_v5.jit"
48
+ model_url_en = f'https://models.silero.ai/models/en/{name}'
49
+ model_en1, decoder_en1 = mo.init_jit_model(model_url_en, device=self.device, output_folder=self.tmp_dir)
50
+ self.assertIsInstance(model_en1, torch.nn.Module)
51
+ self.assertIsInstance(decoder_en1, Decoder)
52
+
53
+ model_en2, decoder_en2 = mo.init_jit_model(model_url_en, device=self.device)
54
+ self.assertIsInstance(model_en2, torch.nn.Module)
55
+ self.assertIsInstance(decoder_en2, Decoder)
56
+ # model_en_filepath.unlink(missing_ok=False)
57
+
58
+ model_en3, decoder_en3 = mo.init_jit_model(model_url_en)
59
+ self.assertIsInstance(model_en3, torch.nn.Module)
60
+ self.assertIsInstance(decoder_en3, Decoder)
61
+ # model_en_filepath.unlink(missing_ok=False)
62
+
63
+ def test_get_models_de(self):
64
+ models_de = mo.get_models(self.language_de, self.tmp_dir, "latest", "stt_models")
65
+ self.assertIn(self.language_de, models_de.stt_models)
66
+
67
+ def test_get_models_en(self):
68
+ models_en = mo.get_models(self.language_en, self.tmp_dir, "latest", "stt_models")
69
+ self.assertIn(self.language_en, models_en.stt_models)
70
+
71
+ def test_get_latest_model_de(self):
72
+ model_de, decoder_de = mo.get_latest_model(self.language_de, self.tmp_dir, "latest", "stt_models", "jit")
73
+ self.assertIsInstance(model_de, torch.nn.Module)
74
+ self.assertIsInstance(decoder_de, Decoder)
75
+
76
+ def test_get_latest_model_en(self):
77
+ model_en, decoder_en = mo.get_latest_model(self.language_en, self.tmp_dir, "latest", "stt_models", "jit")
78
+ self.assertIsInstance(model_en, torch.nn.Module)
79
+ self.assertIsInstance(decoder_en, Decoder)
80
+
81
+
82
+ if __name__ == '__main__':
83
+ unittest.main()
tests/test_pronunciationtrainer.py CHANGED
@@ -179,10 +179,20 @@ class TestScore(unittest.TestCase):
179
  output_hash = utilities.hash_calculate(signal_de, is_file=False)
180
  assert output_hash == b'D9pMFzYL1BSPPg89ZCQE61xzb7QICXolYtC9EJRpvS0='
181
  signal_transformed = transform(torch.Tensor(signal_de)).unsqueeze(0)
182
- processed_audio = trainer_SST_lambda_de.preprocessAudio(signal_transformed)
183
- self.assertIsInstance(processed_audio, torch.Tensor)
184
- self.assertEqual(processed_audio.shape, (1, 23400))
185
- output_hash = utilities.hash_calculate(processed_audio.numpy(), is_file=False)
 
 
 
 
 
 
 
 
 
 
186
  assert output_hash == b'Ri/1rmgYmRSWaAw/Y3PoLEu1woiczhSUdUCbaMf++EM='
187
 
188
  def test_processAudioForGivenText_getTranscriptAndWordsLocations_en(self):
@@ -257,10 +267,20 @@ class TestScore(unittest.TestCase):
257
  output_hash = utilities.hash_calculate(signal_en, is_file=False)
258
  assert output_hash == b'zBAV/y7mecyPHLGiitHRP9vK7oU9hnYvyuatU0PQfts='
259
  signal_transformed = transform(torch.Tensor(signal_en)).unsqueeze(0)
260
- processed_audio = trainer_SST_lambda_en.preprocessAudio(signal_transformed)
261
- self.assertIsInstance(processed_audio, torch.Tensor)
262
- self.assertEqual(processed_audio.shape, (1, 16800))
263
- output_hash = utilities.hash_calculate(processed_audio.numpy(), is_file=False)
 
 
 
 
 
 
 
 
 
 
264
  assert output_hash == b'KsyH1MXIc+5e5B6CcijhitsGPUDRJjrJU2qg8bQi600='
265
 
266
 
 
179
  output_hash = utilities.hash_calculate(signal_de, is_file=False)
180
  assert output_hash == b'D9pMFzYL1BSPPg89ZCQE61xzb7QICXolYtC9EJRpvS0='
181
  signal_transformed = transform(torch.Tensor(signal_de)).unsqueeze(0)
182
+ preprocessed_audio = trainer_SST_lambda_de.preprocessAudio(signal_transformed)
183
+ self.assertIsInstance(preprocessed_audio, torch.Tensor)
184
+ self.assertEqual(preprocessed_audio.shape, (1, 23400))
185
+ output_hash = utilities.hash_calculate(preprocessed_audio.numpy(), is_file=False)
186
+ assert output_hash == b'Ri/1rmgYmRSWaAw/Y3PoLEu1woiczhSUdUCbaMf++EM='
187
+
188
+ def test_preprocessAudioStandalone_de(self):
189
+ output_hash = utilities.hash_calculate(signal_de, is_file=False)
190
+ assert output_hash == b'D9pMFzYL1BSPPg89ZCQE61xzb7QICXolYtC9EJRpvS0='
191
+ signal_transformed = transform(torch.Tensor(signal_de)).unsqueeze(0)
192
+ preprocessed_audio = pronunciationTrainer.preprocessAudioStandalone(signal_transformed)
193
+ self.assertIsInstance(preprocessed_audio, torch.Tensor)
194
+ self.assertEqual(preprocessed_audio.shape, (1, 23400))
195
+ output_hash = utilities.hash_calculate(preprocessed_audio.numpy(), is_file=False)
196
  assert output_hash == b'Ri/1rmgYmRSWaAw/Y3PoLEu1woiczhSUdUCbaMf++EM='
197
 
198
  def test_processAudioForGivenText_getTranscriptAndWordsLocations_en(self):
 
267
  output_hash = utilities.hash_calculate(signal_en, is_file=False)
268
  assert output_hash == b'zBAV/y7mecyPHLGiitHRP9vK7oU9hnYvyuatU0PQfts='
269
  signal_transformed = transform(torch.Tensor(signal_en)).unsqueeze(0)
270
+ preprocessed_audio = trainer_SST_lambda_en.preprocessAudio(signal_transformed)
271
+ self.assertIsInstance(preprocessed_audio, torch.Tensor)
272
+ self.assertEqual(preprocessed_audio.shape, (1, 16800))
273
+ output_hash = utilities.hash_calculate(preprocessed_audio.numpy(), is_file=False)
274
+ assert output_hash == b'KsyH1MXIc+5e5B6CcijhitsGPUDRJjrJU2qg8bQi600='
275
+
276
+ def test_preprocessAudioStandalone_en(self):
277
+ output_hash = utilities.hash_calculate(signal_en, is_file=False)
278
+ assert output_hash == b'zBAV/y7mecyPHLGiitHRP9vK7oU9hnYvyuatU0PQfts='
279
+ signal_transformed = transform(torch.Tensor(signal_en)).unsqueeze(0)
280
+ preprocessed_audio = pronunciationTrainer.preprocessAudioStandalone(signal_transformed)
281
+ self.assertIsInstance(preprocessed_audio, torch.Tensor)
282
+ self.assertEqual(preprocessed_audio.shape, (1, 16800))
283
+ output_hash = utilities.hash_calculate(preprocessed_audio.numpy(), is_file=False)
284
  assert output_hash == b'KsyH1MXIc+5e5B6CcijhitsGPUDRJjrJU2qg8bQi600='
285
 
286
 
tests/utils/test_split_cosmic_ray_report.py CHANGED
@@ -6,11 +6,11 @@ from tests import EVENTS_FOLDER
6
 
7
 
8
  class TestSplitCosmicRayReport(unittest.TestCase):
9
- def test_get_cosmic_ray_report_filtered(self):
10
  input_filename = EVENTS_FOLDER / "cosmic-ray-lambdagetsample.txt"
11
  output_filename = EVENTS_FOLDER / f"{input_filename.stem}_filtered{input_filename.suffix}"
12
  self.assertFalse(output_filename.exists())
13
- get_cosmic_ray_report_filtered(input_filename, separator="============", filter_string="test outcome: TestOutcome.KILLED")
14
 
15
  # Check if the filtered file is created
16
  self.assertTrue(output_filename.exists() and output_filename.is_file())
@@ -22,5 +22,21 @@ class TestSplitCosmicRayReport(unittest.TestCase):
22
  output_filename.unlink(missing_ok=False)
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  if __name__ == "__main__":
26
  unittest.main()
 
6
 
7
 
8
  class TestSplitCosmicRayReport(unittest.TestCase):
9
+ def test_get_cosmic_ray_report_filtered_only_string_killed(self):
10
  input_filename = EVENTS_FOLDER / "cosmic-ray-lambdagetsample.txt"
11
  output_filename = EVENTS_FOLDER / f"{input_filename.stem}_filtered{input_filename.suffix}"
12
  self.assertFalse(output_filename.exists())
13
+ get_cosmic_ray_report_filtered(input_filename, separator="============", filter_string_list=["test outcome: TestOutcome.KILLED", ])
14
 
15
  # Check if the filtered file is created
16
  self.assertTrue(output_filename.exists() and output_filename.is_file())
 
22
  output_filename.unlink(missing_ok=False)
23
 
24
 
25
+ def test_get_cosmic_ray_report_filtered_list_strings(self):
26
+ input_filename = EVENTS_FOLDER / "cosmic-ray-pronunciationtrainer.txt"
27
+ output_filename = EVENTS_FOLDER / f"{input_filename.stem}_filtered{input_filename.suffix}"
28
+ self.assertFalse(output_filename.exists())
29
+ get_cosmic_ray_report_filtered(input_filename, separator="============", filter_string_list=["test outcome: TestOutcome.KILLED", "- duration = time.time()"])
30
+
31
+ # Check if the filtered file is created
32
+ self.assertTrue(output_filename.exists() and output_filename.is_file())
33
+
34
+ # Verify the filtered content
35
+ hash_output = utilities.hash_calculate(output_filename, True)
36
+ self.assertEqual(hash_output, b'Fk0KDWCbc8mPoZllQ7HfgMjuWQVvUdNl+eR56eJeSxg=')
37
+
38
+ output_filename.unlink(missing_ok=False)
39
+
40
+
41
  if __name__ == "__main__":
42
  unittest.main()