Spaces:
Running
Running
File size: 3,274 Bytes
74a35d9 28d0c5f 74a35d9 28d0c5f e8a1983 28d0c5f 74a35d9 28d0c5f 74a35d9 28d0c5f e8a1983 28d0c5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import json
import unittest
import epitran
from aip_trainer.models import ModelInterfaces, RuleBasedModels
from aip_trainer import pronunciationTrainer
from aip_trainer.lambdas import lambdaGetSample
def test_category(category: int, threshold_min: int, threshold_max: int):
event = {'body': json.dumps({'category': category, 'language': 'de'})}
for _ in range(1000):
response = lambdaGetSample.lambda_handler(event, [])
response_dict = json.loads(response)
number_of_words = len(
response_dict['real_transcript'][0].split())
length_valid = threshold_min < number_of_words <= threshold_max
if not length_valid:
print('Category ', category,
' had a sentence with length ', number_of_words)
return False
return True
class TestDataset(unittest.TestCase):
def test_random_sentences(self):
self.assertFalse(test_category(0, 0, 8))
def test_easy_sentences(self):
self.assertTrue(test_category(1, 0, 8))
def test_normal_sentences(self):
self.assertTrue(test_category(2, 8, 20))
def test_hard_sentences(self):
self.assertTrue(test_category(3, 20, 10000))
def check_phonem_converter(converter: ModelInterfaces.ITextToPhonemModel, input_phonem: str, expected_output: str):
output = converter.convertToPhonem(input_phonem)
is_correct = output == expected_output
if not is_correct:
print('Conversion from "', input_phonem, '" should be "',
expected_output, '", but was "', output, '"')
return is_correct
class TestPhonemConverter(unittest.TestCase):
def test_english(self):
phonem_converter = RuleBasedModels.EngPhonemConverter()
self.assertTrue(check_phonem_converter(
phonem_converter, 'Hello, this is a test', 'hɛˈloʊ, ðɪs ɪz ə tɛst'))
def test_german(self):
phonem_converter = RuleBasedModels.EpitranPhonemConverter(
epitran.Epitran('deu-Latn'))
self.assertTrue(check_phonem_converter(
phonem_converter, 'Hallo, das ist ein Test', 'haloː, dɑːs ɪst ain tɛst'))
trainer_SST_lambda = {'de': pronunciationTrainer.getTrainer("de")}
class TestScore(unittest.TestCase):
def test_exact_transcription(self):
words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
real_and_transcribed_words, _, _ = trainer_SST_lambda['de'].matchSampleAndRecordedWords(
words_real, words_real)
pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
real_and_transcribed_words)
self.assertTrue(int(pronunciation_accuracy) == 100)
def test_incorrect_transcription(self):
words_real = 'Ich habe sehr viel glück, am leben und gesund zu sein'
words_transcribed = 'Ic hab zeh viel guck am und gesund tu sein'
real_and_transcribed_words, _, _ = trainer_SST_lambda['de'].matchSampleAndRecordedWords(
words_real, words_transcribed)
pronunciation_accuracy, _ = trainer_SST_lambda['de'].getPronunciationAccuracy(
real_and_transcribed_words)
self.assertTrue(int(pronunciation_accuracy) == 71)
if __name__ == '__main__':
unittest.main()
|