Spaces:
Runtime error
Runtime error
import re | |
from typing import Dict | |
non_printing_characters_re = re.compile( | |
f"[{''.join(map(chr, list(range(0,32)) + list(range(127,160))))}]" | |
) | |
digits_re: re.Pattern = re.compile(r"\d") | |
unicode_punctuation: Dict[str, str] = { | |
",": ",", | |
"。": ".", | |
"、": ",", | |
"„": '"', | |
"”": '"', | |
"“": '"', | |
"«": '"', | |
"»": '"', | |
"1": '"', | |
"」": '"', | |
"「": '"', | |
"《": '"', | |
"》": '"', | |
"´": "'", | |
"∶": ":", | |
":": ":", | |
"?": "?", | |
"!": "!", | |
"(": "(", | |
")": ")", | |
";": ";", | |
"–": "-", | |
"—": " - ", | |
".": ". ", | |
"~": "~", | |
"’": "'", | |
"…": "...", | |
"━": "-", | |
"〈": "<", | |
"〉": ">", | |
"【": "[", | |
"】": "]", | |
"%": "%", | |
"►": "-", | |
} | |
normalization = { | |
"non_printing_characters_re": non_printing_characters_re, | |
"digits_re": digits_re, | |
"unicode_punctuation": unicode_punctuation, | |
} | |