Spaces:
Runtime error
Runtime error
File size: 941 Bytes
d1e3e7b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import re
from typing import Dict
non_printing_characters_re = re.compile(
f"[{''.join(map(chr, list(range(0,32)) + list(range(127,160))))}]"
)
digits_re: re.Pattern = re.compile(r"\d")
unicode_punctuation: Dict[str, str] = {
"οΌ": ",",
"γ": ".",
"γ": ",",
"β": '"',
"β": '"',
"β": '"',
"Β«": '"',
"Β»": '"',
"οΌ": '"',
"γ": '"',
"γ": '"',
"γ": '"',
"γ": '"',
"Β΄": "'",
"βΆ": ":",
"οΌ": ":",
"οΌ": "?",
"οΌ": "!",
"οΌ": "(",
"οΌ": ")",
"οΌ": ";",
"β": "-",
"β": " - ",
"οΌ": ". ",
"ο½": "~",
"β": "'",
"β¦": "...",
"β": "-",
"γ": "<",
"γ": ">",
"γ": "[",
"γ": "]",
"οΌ
": "%",
"βΊ": "-",
}
normalization = {
"non_printing_characters_re": non_printing_characters_re,
"digits_re": digits_re,
"unicode_punctuation": unicode_punctuation,
}
|