Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
•
8838421
1
Parent(s):
d51ffe7
test: refactor and update test cases for dataset and get_random_selection() function
Browse files- tests/test_dataset.py +0 -49
- tests/test_lambdaGetSample.py +103 -0
tests/test_dataset.py
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import unittest
|
3 |
-
|
4 |
-
from aip_trainer.lambdas import lambdaGetSample
|
5 |
-
from tests import test_logger, TEST_ROOT_FOLDER
|
6 |
-
|
7 |
-
|
8 |
-
def helper_category(category: int, threshold_min: int, threshold_max: int, n: int = 1000):
|
9 |
-
for _ in range(n):
|
10 |
-
event = {'body': json.dumps({'category': category, 'language': 'de'})}
|
11 |
-
response = lambdaGetSample.lambda_handler(event, [])
|
12 |
-
response_dict = json.loads(response)
|
13 |
-
number_of_words = len(response_dict['real_transcript'].split())
|
14 |
-
try:
|
15 |
-
assert threshold_min < number_of_words <= threshold_max
|
16 |
-
except AssertionError:
|
17 |
-
test_logger.error(
|
18 |
-
f"Category: {category} had a sentence with length {number_of_words}.")
|
19 |
-
raise AssertionError
|
20 |
-
|
21 |
-
|
22 |
-
class TestDataset(unittest.TestCase):
|
23 |
-
def test_random_sentences(self):
|
24 |
-
helper_category(0, 0, 40)
|
25 |
-
|
26 |
-
def test_easy_sentences(self):
|
27 |
-
helper_category(1, 0, 8)
|
28 |
-
|
29 |
-
def test_normal_sentences(self):
|
30 |
-
helper_category(2, 8, 20)
|
31 |
-
|
32 |
-
def test_hard_sentences(self):
|
33 |
-
helper_category(3, 20, 10000)
|
34 |
-
|
35 |
-
def test_get_pickle2json_dataframe(self):
|
36 |
-
import os
|
37 |
-
|
38 |
-
custom_filename = 'test_data_de_en_2'
|
39 |
-
lambdaGetSample.get_pickle2json_dataframe(custom_filename, TEST_ROOT_FOLDER)
|
40 |
-
with open(TEST_ROOT_FOLDER / f'{custom_filename}.json', 'r') as src1:
|
41 |
-
with open(TEST_ROOT_FOLDER / f'{custom_filename}_expected.json', 'r') as src2:
|
42 |
-
json1 = json.load(src1)
|
43 |
-
json2 = json.load(src2)
|
44 |
-
assert json1 == json2
|
45 |
-
os.remove(TEST_ROOT_FOLDER / f'{custom_filename}.json')
|
46 |
-
|
47 |
-
|
48 |
-
if __name__ == '__main__':
|
49 |
-
unittest.main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_lambdaGetSample.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import unittest
|
3 |
+
|
4 |
+
from aip_trainer.lambdas import lambdaGetSample
|
5 |
+
from tests import TEST_ROOT_FOLDER
|
6 |
+
from numpy.random import seed
|
7 |
+
|
8 |
+
expected_output__get_random_selection = [
|
9 |
+
"Er ist ein begeisterter Theaterliebhaber.",
|
10 |
+
["Du hast schon den ganzen Morgen gute Laune."],
|
11 |
+
"Tom hat mir gerade das Leben gerettet.",
|
12 |
+
["Kannst du mich jetzt hören?"],
|
13 |
+
"Falls es eine bessere Lösung geben sollte, werde ich Sie umgehend informieren.",
|
14 |
+
["Die Kette, die du mir vor zehn Jahren geschenkt hast, liegt mir bis heute sehr am Herzen."],
|
15 |
+
"Es lässt sich nicht in Worte fassen, wie glücklich ich gerade bin!“ – So ist es aber ganz gut in Worte gefasst.“",
|
16 |
+
["Als nur ein Beispiel dafür, wie Automatisierung der Menschheit zugute kommen kann, nehme man eine Roboterhand, mit deren Hilfe man unreine oder gefährliche Arbeiten verrichten kann."],
|
17 |
+
"Since I was really tired I went to sleep early.",
|
18 |
+
["She accused me of being a liar."],
|
19 |
+
"They lived in the land of Cockaigne.",
|
20 |
+
["This is a complicated question to answer."],
|
21 |
+
"We are still hoping that Tom will get better.",
|
22 |
+
["Tom said something, but I couldn't hear what he said."],
|
23 |
+
"People who starve themselves and mask themselves so that they can conform to what society deems beautiful are only hurting themselves.",
|
24 |
+
["The queen has the ability to move as much as if she were a rook, that is, over the rows and columns, as if she were a bishop, that is, over the diagonals."],
|
25 |
+
]
|
26 |
+
|
27 |
+
|
28 |
+
def helper_category(cls, category: int, language: str, expected_output: dict):
|
29 |
+
seed(0)
|
30 |
+
event = {"body": json.dumps({"category": category, "language": language})}
|
31 |
+
response = lambdaGetSample.lambda_handler(event, [])
|
32 |
+
response_dict = json.loads(response)
|
33 |
+
try:
|
34 |
+
cls.assertDictEqual(response_dict, expected_output)
|
35 |
+
except AssertionError as ae:
|
36 |
+
print(f"category: {category}, language: {language}.")
|
37 |
+
print(f"response_dict: {response_dict}")
|
38 |
+
print(f"expected_output: {expected_output}")
|
39 |
+
raise ae
|
40 |
+
|
41 |
+
|
42 |
+
class TestLambdaGetSample(unittest.TestCase):
|
43 |
+
def test_get_random_selection(self):
|
44 |
+
seed(0)
|
45 |
+
count = 0
|
46 |
+
for lang in ["de", "en"]:
|
47 |
+
for cat in range(4):
|
48 |
+
for is_gradio_output in [True, False]:
|
49 |
+
output = lambdaGetSample.get_random_selection(
|
50 |
+
lang, cat, is_gradio_output=is_gradio_output
|
51 |
+
)
|
52 |
+
self.assertEqual(output, expected_output__get_random_selection[count])
|
53 |
+
count += 1
|
54 |
+
|
55 |
+
|
56 |
+
class TestDataset(unittest.TestCase):
|
57 |
+
def test_random_sentences_de(self):
|
58 |
+
expected = {'real_transcript': 'Er ist ein begeisterter Theaterliebhaber.', 'ipa_transcript': 'ɛɐ̯ ɪst aɪ̯n bɛːɡaɪ̯stɛːrtɛːr tɛːaːtɛːrliːbhaːbɛːr.', 'transcript_translation': ''}
|
59 |
+
helper_category(self, 0, "de", expected_output=expected)
|
60 |
+
|
61 |
+
def test_easy_sentences_easy_de(self):
|
62 |
+
expected = {'real_transcript': 'Sie will niemanden heiraten.', 'ipa_transcript': 'ziː vɪl niːmandɛːn haɪ̯raːtɛːn.', 'transcript_translation': ''}
|
63 |
+
helper_category(self, 1, "de", expected_output=expected)
|
64 |
+
|
65 |
+
def test_normal_sentences_medium_de(self):
|
66 |
+
expected = {'real_transcript': 'Leg das Buch dorthin, wo du es gefunden hast.', 'ipa_transcript': 'lɛːɡ daːs bʊx doːrtiːn, voː duː ɛːs ɡɛːfʊndɛːn hast.', 'transcript_translation': ''}
|
67 |
+
helper_category(self, 2, "de", expected_output=expected)
|
68 |
+
|
69 |
+
def test_hard_sentences_hard_de(self):
|
70 |
+
expected = {'real_transcript': 'Eine Frau braucht neun Monate, um ein Kind zur Welt zu bringen, aber das heißt nicht, dass es neun zusammen in einem Monat schaffen könnten.', 'ipa_transcript': 'aɪ̯nɛː fraʊ̯ braʊ̯xt nɔɪ̯n moːnaːtɛː, uːm aɪ̯n kɪnd t͡suːr vɛlt t͡suː brɪŋɛːn, aːbɛːr daːs haɪ̯st nɪçt, das ɛːs nɔɪ̯n t͡suːzamɛːn iːn aɪ̯nɛːm moːnaːt ʃafɛːn kœntɛːn.', 'transcript_translation': ''}
|
71 |
+
helper_category(self, 3, "de", expected_output=expected)
|
72 |
+
|
73 |
+
def test_random_sentences_en(self):
|
74 |
+
expected = {'real_transcript': 'He is a passionate theatregoer.', 'ipa_transcript': 'hi ɪz ə ˈpæʃənət theatregoer.', 'transcript_translation': ''}
|
75 |
+
helper_category(self, 0, "en", expected_output=expected)
|
76 |
+
|
77 |
+
def test_easy_sentences_easy_en(self):
|
78 |
+
expected = {'real_transcript': 'Lemons are usually sour.', 'ipa_transcript': 'ˈlɛmənz ər ˈjuʒəwəli saʊər.', 'transcript_translation': ''}
|
79 |
+
helper_category(self, 1, "en", expected_output=expected)
|
80 |
+
|
81 |
+
def test_normal_sentences_medium_en(self):
|
82 |
+
expected = {'real_transcript': 'Tom read the Bible in its entirety, from the beginning to the end.', 'ipa_transcript': 'tɑm rɛd ðə ˈbaɪbəl ɪn ɪts ɪnˈtaɪərti, frəm ðə bɪˈgɪnɪŋ tɪ ðə ɛnd.', 'transcript_translation': ''}
|
83 |
+
helper_category(self, 2, "en", expected_output=expected)
|
84 |
+
|
85 |
+
def test_hard_sentences_hard_en(self):
|
86 |
+
expected = {'real_transcript': 'That was the first time, in the history of chess, that a machine (Deep Blue) defeated a Grand Master (Garry Kasparov).', 'ipa_transcript': 'ðət wɑz ðə fərst taɪm, ɪn ðə ˈhɪstəri əv ʧɛs, ðət ə məˈʃin (dip blu) dɪˈfitɪd ə grænd ˈmæstər (ˈgɛri ˈkæspərɑv).', 'transcript_translation': ''}
|
87 |
+
helper_category(self, 3, "en", expected_output=expected)
|
88 |
+
|
89 |
+
def test_get_pickle2json_dataframe(self):
|
90 |
+
import os
|
91 |
+
|
92 |
+
custom_filename = 'test_data_de_en_2'
|
93 |
+
lambdaGetSample.get_pickle2json_dataframe(custom_filename, TEST_ROOT_FOLDER)
|
94 |
+
with open(TEST_ROOT_FOLDER / f'{custom_filename}.json', 'r') as src1:
|
95 |
+
with open(TEST_ROOT_FOLDER / f'{custom_filename}_expected.json', 'r') as src2:
|
96 |
+
json1 = json.load(src1)
|
97 |
+
json2 = json.load(src2)
|
98 |
+
assert json1 == json2
|
99 |
+
os.remove(TEST_ROOT_FOLDER / f'{custom_filename}.json')
|
100 |
+
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
unittest.main()
|