import json import os import unittest import epitran import structlog from aip_trainer.models import RuleBasedModels from aip_trainer import pronunciationTrainer, LOG_JSON_FORMAT from aip_trainer.lambdas import lambdaGetSample from aip_trainer.utils import session_logger log_level = os.getenv("LOG_LEVEL", "INFO") session_logger.setup_logging(json_logs=LOG_JSON_FORMAT, log_level=log_level) test_logger = structlog.stdlib.get_logger(__name__) def test_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000): for _ in range(n): event = {'body': json.dumps({'category': category, 'language': 'de'})} response = lambdaGetSample.lambda_handler(event, []) response_dict = json.loads(response) number_of_words = len(response_dict['real_transcript'][0].split()) try: assert threshold_min < number_of_words <= threshold_max except AssertionError: test_logger.error( f"Category: {category} had a sentence with length {number_of_words}.") raise AssertionError class TestDataset(unittest.TestCase): def test_random_sentences(self): test_category(0, 0, 40) def test_easy_sentences(self): test_category(1, 0, 8) def test_normal_sentences(self): test_category(2, 8, 20) def test_hard_sentences(self): test_category(3, 20, 10000) class TestPhonemConverter(unittest.TestCase): def test_english(self): phonem_converter = RuleBasedModels.EngPhonemConverter() output = phonem_converter.convertToPhonem('Hello, this is a test') self.assertEqual(output, 'hɛˈloʊ, ðɪs ɪz ə tɛst') def test_german_ok(self): deu_latn = epitran.Epitran('deu-Latn') phonem_converter = RuleBasedModels.EpitranPhonemConverter(deu_latn) output = phonem_converter.convertToPhonem('Hallo, das ist ein Test') self.assertEqual(output, 'haloː, dɑːs ɪst ain tɛst') trainer_SST_lambda = {'de': pronunciationTrainer.getTrainer("de")} class TestScore(unittest.TestCase): def test_exact_transcription(self): words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein' real_and_transcribed_words, _, _ = trainer_SST_lambda['de'].matchSampleAndRecordedWords( words_real, words_real) pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy( real_and_transcribed_words) self.assertEqual(int(pronunciation_accuracy), 100) def test_incorrect_transcription(self): words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein' words_transcribed = 'Ic hab zeh viel guck am und gesund tu sein' real_and_transcribed_words, _, _ = trainer_SST_lambda['de'].matchSampleAndRecordedWords( words_real, words_transcribed) pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy( real_and_transcribed_words) self.assertEqual(int(pronunciation_accuracy), 71) if __name__ == '__main__': unittest.main()