marinone94
commited on
Commit
•
cd904f4
1
Parent(s):
71e9ea9
add decoding to get correct swedish chars
Browse files
run_speech_recognition_ctc.py
CHANGED
@@ -521,9 +521,9 @@ def main():
|
|
521 |
|
522 |
def remove_special_characters(batch):
|
523 |
if chars_to_ignore_regex is not None:
|
524 |
-
batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("\\\\Punkt").replace("\\\\Komma").lower() + " "
|
525 |
else:
|
526 |
-
batch["target_text"] = batch[text_column_name].replace("\\\\Punkt").replace("\\\\Komma").lower() + " "
|
527 |
return batch
|
528 |
|
529 |
num_workers = data_args.preprocessing_num_workers
|
|
|
521 |
|
522 |
def remove_special_characters(batch):
|
523 |
if chars_to_ignore_regex is not None:
|
524 |
+
batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).replace("\\\\Punkt").replace("\\\\Komma").lower().decode("utf-8") + " "
|
525 |
else:
|
526 |
+
batch["target_text"] = batch[text_column_name].replace("\\\\Punkt").replace("\\\\Komma").lower().decode("utf-8") + " "
|
527 |
return batch
|
528 |
|
529 |
num_workers = data_args.preprocessing_num_workers
|