Update app.py
Browse files
app.py
CHANGED
@@ -24,60 +24,60 @@ def create_speaker_embedding(speaker_model, waveform: np.ndarray) -> np.ndarray:
|
|
24 |
def remove_special_characters_s(text: Text) -> Text:
|
25 |
chars_to_remove_regex = '[\=\´\–\“\”\…\=]'
|
26 |
# remove special characters
|
27 |
-
text = re.sub(chars_to_remove_regex, '', text)
|
28 |
-
text = re.sub("‘", "'", text)
|
29 |
-
text = re.sub("’", "'", text)
|
30 |
-
text = re.sub("´", "'", text)
|
31 |
text = text.lower()
|
32 |
return text
|
33 |
|
34 |
|
35 |
def dutch_to_english(text: Text) -> Text:
|
36 |
replacements = [
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
|
82 |
for src, dst in replacements:
|
83 |
text = text.replace(src, dst)
|
|
|
24 |
def remove_special_characters_s(text: Text) -> Text:
|
25 |
chars_to_remove_regex = '[\=\´\–\“\”\…\=]'
|
26 |
# remove special characters
|
27 |
+
text = re.sub(chars_to_remove_regex, '', text)
|
28 |
+
text = re.sub("‘", "'", text)
|
29 |
+
text = re.sub("’", "'", text)
|
30 |
+
text = re.sub("´", "'", text)
|
31 |
text = text.lower()
|
32 |
return text
|
33 |
|
34 |
|
35 |
def dutch_to_english(text: Text) -> Text:
|
36 |
replacements = [
|
37 |
+
("à", "a"),
|
38 |
+
("ç", "c"),
|
39 |
+
("è", "e"),
|
40 |
+
("ë", "e"),
|
41 |
+
("í", "i"),
|
42 |
+
("ï", "i"),
|
43 |
+
("ö", "o"),
|
44 |
+
("ü", "u"),
|
45 |
+
('&', "en"),
|
46 |
+
('á','a'),
|
47 |
+
('ä','a'),
|
48 |
+
('î','i'),
|
49 |
+
('ó','o'),
|
50 |
+
('ö','o'),
|
51 |
+
('ú','u'),
|
52 |
+
('û','u'),
|
53 |
+
('ă','a'),
|
54 |
+
('ć','c'),
|
55 |
+
('đ','d'),
|
56 |
+
('š','s'),
|
57 |
+
('ţ','t'),
|
58 |
+
('j', 'y'),
|
59 |
+
('k', 'k'),
|
60 |
+
('ci', 'si'),
|
61 |
+
('ce', 'se'),
|
62 |
+
('ca', 'ka'),
|
63 |
+
('co', 'ko'),
|
64 |
+
('cu', 'ku'),
|
65 |
+
(' sch', ' sg'),
|
66 |
+
('sch ', 's '),
|
67 |
+
('ch', 'g'),
|
68 |
+
('eeuw', 'eaw'),
|
69 |
+
('ee', 'ea'),
|
70 |
+
('aai','ay'),
|
71 |
+
('oei', 'ooy'),
|
72 |
+
('ooi', 'oay'),
|
73 |
+
('ieuw', 'eew'),
|
74 |
+
('ie', 'ee'),
|
75 |
+
('oo', 'oa'),
|
76 |
+
('oe', 'oo'),
|
77 |
+
('ei', '\\i\\'),
|
78 |
+
('ij', 'i'),
|
79 |
+
('\\i\\', 'i')
|
80 |
+
]
|
81 |
|
82 |
for src, dst in replacements:
|
83 |
text = text.replace(src, dst)
|