alessandro trinca tornidor commited on
Commit
8838421
1 Parent(s): d51ffe7

test: refactor and update test cases for dataset and get_random_selection() function

Browse files
tests/test_dataset.py DELETED
@@ -1,49 +0,0 @@
1
- import json
2
- import unittest
3
-
4
- from aip_trainer.lambdas import lambdaGetSample
5
- from tests import test_logger, TEST_ROOT_FOLDER
6
-
7
-
8
- def helper_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
9
- for _ in range(n):
10
- event = {'body': json.dumps({'category': category, 'language': 'de'})}
11
- response = lambdaGetSample.lambda_handler(event, [])
12
- response_dict = json.loads(response)
13
- number_of_words = len(response_dict['real_transcript'].split())
14
- try:
15
- assert threshold_min < number_of_words <= threshold_max
16
- except AssertionError:
17
- test_logger.error(
18
- f"Category: {category} had a sentence with length {number_of_words}.")
19
- raise AssertionError
20
-
21
-
22
- class TestDataset(unittest.TestCase):
23
- def test_random_sentences(self):
24
- helper_category(0, 0, 40)
25
-
26
- def test_easy_sentences(self):
27
- helper_category(1, 0, 8)
28
-
29
- def test_normal_sentences(self):
30
- helper_category(2, 8, 20)
31
-
32
- def test_hard_sentences(self):
33
- helper_category(3, 20, 10000)
34
-
35
- def test_get_pickle2json_dataframe(self):
36
- import os
37
-
38
- custom_filename = 'test_data_de_en_2'
39
- lambdaGetSample.get_pickle2json_dataframe(custom_filename, TEST_ROOT_FOLDER)
40
- with open(TEST_ROOT_FOLDER / f'{custom_filename}.json', 'r') as src1:
41
- with open(TEST_ROOT_FOLDER / f'{custom_filename}_expected.json', 'r') as src2:
42
- json1 = json.load(src1)
43
- json2 = json.load(src2)
44
- assert json1 == json2
45
- os.remove(TEST_ROOT_FOLDER / f'{custom_filename}.json')
46
-
47
-
48
- if __name__ == '__main__':
49
- unittest.main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_lambdaGetSample.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import unittest
3
+
4
+ from aip_trainer.lambdas import lambdaGetSample
5
+ from tests import TEST_ROOT_FOLDER
6
+ from numpy.random import seed
7
+
8
+ expected_output__get_random_selection = [
9
+ "Er ist ein begeisterter Theaterliebhaber.",
10
+ ["Du hast schon den ganzen Morgen gute Laune."],
11
+ "Tom hat mir gerade das Leben gerettet.",
12
+ ["Kannst du mich jetzt hören?"],
13
+ "Falls es eine bessere Lösung geben sollte, werde ich Sie umgehend informieren.",
14
+ ["Die Kette, die du mir vor zehn Jahren geschenkt hast, liegt mir bis heute sehr am Herzen."],
15
+ "Es lässt sich nicht in Worte fassen, wie glücklich ich gerade bin!“ – So ist es aber ganz gut in Worte gefasst.“",
16
+ ["Als nur ein Beispiel dafür, wie Automatisierung der Menschheit zugute kommen kann, nehme man eine Roboterhand, mit deren Hilfe man unreine oder gefährliche Arbeiten verrichten kann."],
17
+ "Since I was really tired I went to sleep early.",
18
+ ["She accused me of being a liar."],
19
+ "They lived in the land of Cockaigne.",
20
+ ["This is a complicated question to answer."],
21
+ "We are still hoping that Tom will get better.",
22
+ ["Tom said something, but I couldn't hear what he said."],
23
+ "People who starve themselves and mask themselves so that they can conform to what society deems beautiful are only hurting themselves.",
24
+ ["The queen has the ability to move as much as if she were a rook, that is, over the rows and columns, as if she were a bishop, that is, over the diagonals."],
25
+ ]
26
+
27
+
28
+ def helper_category(cls, category: int, language: str, expected_output: dict):
29
+ seed(0)
30
+ event = {"body": json.dumps({"category": category, "language": language})}
31
+ response = lambdaGetSample.lambda_handler(event, [])
32
+ response_dict = json.loads(response)
33
+ try:
34
+ cls.assertDictEqual(response_dict, expected_output)
35
+ except AssertionError as ae:
36
+ print(f"category: {category}, language: {language}.")
37
+ print(f"response_dict: {response_dict}")
38
+ print(f"expected_output: {expected_output}")
39
+ raise ae
40
+
41
+
42
+ class TestLambdaGetSample(unittest.TestCase):
43
+ def test_get_random_selection(self):
44
+ seed(0)
45
+ count = 0
46
+ for lang in ["de", "en"]:
47
+ for cat in range(4):
48
+ for is_gradio_output in [True, False]:
49
+ output = lambdaGetSample.get_random_selection(
50
+ lang, cat, is_gradio_output=is_gradio_output
51
+ )
52
+ self.assertEqual(output, expected_output__get_random_selection[count])
53
+ count += 1
54
+
55
+
56
+ class TestDataset(unittest.TestCase):
57
+ def test_random_sentences_de(self):
58
+ expected = {'real_transcript': 'Er ist ein begeisterter Theaterliebhaber.', 'ipa_transcript': 'ɛɐ̯ ɪst aɪ̯n bɛːɡaɪ̯stɛːrtɛːr tɛːaːtɛːrliːbhaːbɛːr.', 'transcript_translation': ''}
59
+ helper_category(self, 0, "de", expected_output=expected)
60
+
61
+ def test_easy_sentences_easy_de(self):
62
+ expected = {'real_transcript': 'Sie will niemanden heiraten.', 'ipa_transcript': 'ziː vɪl niːmandɛːn haɪ̯raːtɛːn.', 'transcript_translation': ''}
63
+ helper_category(self, 1, "de", expected_output=expected)
64
+
65
+ def test_normal_sentences_medium_de(self):
66
+ expected = {'real_transcript': 'Leg das Buch dorthin, wo du es gefunden hast.', 'ipa_transcript': 'lɛːɡ daːs bʊx doːrtiːn, voː duː ɛːs ɡɛːfʊndɛːn hast.', 'transcript_translation': ''}
67
+ helper_category(self, 2, "de", expected_output=expected)
68
+
69
+ def test_hard_sentences_hard_de(self):
70
+ expected = {'real_transcript': 'Eine Frau braucht neun Monate, um ein Kind zur Welt zu bringen, aber das heißt nicht, dass es neun zusammen in einem Monat schaffen könnten.', 'ipa_transcript': 'aɪ̯nɛː fraʊ̯ braʊ̯xt nɔɪ̯n moːnaːtɛː, uːm aɪ̯n kɪnd t͡suːr vɛlt t͡suː brɪŋɛːn, aːbɛːr daːs haɪ̯st nɪçt, das ɛːs nɔɪ̯n t͡suːzamɛːn iːn aɪ̯nɛːm moːnaːt ʃafɛːn kœntɛːn.', 'transcript_translation': ''}
71
+ helper_category(self, 3, "de", expected_output=expected)
72
+
73
+ def test_random_sentences_en(self):
74
+ expected = {'real_transcript': 'He is a passionate theatregoer.', 'ipa_transcript': 'hi ɪz ə ˈpæʃənət theatregoer.', 'transcript_translation': ''}
75
+ helper_category(self, 0, "en", expected_output=expected)
76
+
77
+ def test_easy_sentences_easy_en(self):
78
+ expected = {'real_transcript': 'Lemons are usually sour.', 'ipa_transcript': 'ˈlɛmənz ər ˈjuʒəwəli saʊər.', 'transcript_translation': ''}
79
+ helper_category(self, 1, "en", expected_output=expected)
80
+
81
+ def test_normal_sentences_medium_en(self):
82
+ expected = {'real_transcript': 'Tom read the Bible in its entirety, from the beginning to the end.', 'ipa_transcript': 'tɑm rɛd ðə ˈbaɪbəl ɪn ɪts ɪnˈtaɪərti, frəm ðə bɪˈgɪnɪŋ tɪ ðə ɛnd.', 'transcript_translation': ''}
83
+ helper_category(self, 2, "en", expected_output=expected)
84
+
85
+ def test_hard_sentences_hard_en(self):
86
+ expected = {'real_transcript': 'That was the first time, in the history of chess, that a machine (Deep Blue) defeated a Grand Master (Garry Kasparov).', 'ipa_transcript': 'ðət wɑz ðə fərst taɪm, ɪn ðə ˈhɪstəri əv ʧɛs, ðət ə məˈʃin (dip blu) dɪˈfitɪd ə grænd ˈmæstər (ˈgɛri ˈkæspərɑv).', 'transcript_translation': ''}
87
+ helper_category(self, 3, "en", expected_output=expected)
88
+
89
+ def test_get_pickle2json_dataframe(self):
90
+ import os
91
+
92
+ custom_filename = 'test_data_de_en_2'
93
+ lambdaGetSample.get_pickle2json_dataframe(custom_filename, TEST_ROOT_FOLDER)
94
+ with open(TEST_ROOT_FOLDER / f'{custom_filename}.json', 'r') as src1:
95
+ with open(TEST_ROOT_FOLDER / f'{custom_filename}_expected.json', 'r') as src2:
96
+ json1 = json.load(src1)
97
+ json2 = json.load(src2)
98
+ assert json1 == json2
99
+ os.remove(TEST_ROOT_FOLDER / f'{custom_filename}.json')
100
+
101
+
102
+ if __name__ == "__main__":
103
+ unittest.main()