Fix eval script
Browse files
eval.py
CHANGED
@@ -86,14 +86,14 @@ def normalize_text(text: str) -> str:
|
|
86 |
|
87 |
return result
|
88 |
|
89 |
-
|
90 |
|
91 |
text = text.lower()
|
92 |
# normalize non-standard (stylized) unicode characters
|
93 |
text = unicodedata.normalize('NFKC', text)
|
94 |
# remove punctuation
|
95 |
text = re.sub(chars_to_ignore_regex, "", text)
|
96 |
-
|
97 |
|
98 |
# Let's also make sure we split on all kinds of newlines, spaces, etc...
|
99 |
text = " ".join(text.split())
|
|
|
86 |
|
87 |
return result
|
88 |
|
89 |
+
chars_to_ignore_regex = '[\,\?\.\!\-\;\:\/\"\“\„\%\”\�\–\'\`\«\»\—\’\…]'
|
90 |
|
91 |
text = text.lower()
|
92 |
# normalize non-standard (stylized) unicode characters
|
93 |
text = unicodedata.normalize('NFKC', text)
|
94 |
# remove punctuation
|
95 |
text = re.sub(chars_to_ignore_regex, "", text)
|
96 |
+
text = replace_chars(text)
|
97 |
|
98 |
# Let's also make sure we split on all kinds of newlines, spaces, etc...
|
99 |
text = " ".join(text.split())
|