Update README.md
Browse files
README.md
CHANGED
@@ -26,7 +26,7 @@ model-index:
|
|
26 |
metrics:
|
27 |
- name: Test WER
|
28 |
type: wer
|
29 |
-
value: 33.
|
30 |
---
|
31 |
|
32 |
|
@@ -68,7 +68,7 @@ chars_to_ignore = [
|
|
68 |
"β", "%", "β", "οΏ½", "β", "β¦", "_", "β", 'β', 'β'
|
69 |
]
|
70 |
chars_to_mapping = {
|
71 |
-
"
|
72 |
}
|
73 |
|
74 |
def multiple_replace(text, chars_to_mapping):
|
@@ -83,7 +83,7 @@ def normalizer(batch, chars_to_ignore, chars_to_mapping):
|
|
83 |
chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
|
84 |
text = batch["sentence"].lower().strip()
|
85 |
|
86 |
-
text = text.replace("
|
87 |
text = multiple_replace(text, chars_to_mapping)
|
88 |
text = remove_special_characters(text, chars_to_ignore_regex)
|
89 |
|
@@ -194,7 +194,7 @@ chars_to_ignore = [
|
|
194 |
"β", "%", "β", "οΏ½", "β", "β¦", "_", "β", 'β', 'β'
|
195 |
]
|
196 |
chars_to_mapping = {
|
197 |
-
"
|
198 |
}
|
199 |
|
200 |
def multiple_replace(text, chars_to_mapping):
|
@@ -209,7 +209,7 @@ def normalizer(batch, chars_to_ignore, chars_to_mapping):
|
|
209 |
chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
|
210 |
text = batch["sentence"].lower().strip()
|
211 |
|
212 |
-
text = text.replace("
|
213 |
text = multiple_replace(text, chars_to_mapping)
|
214 |
text = remove_special_characters(text, chars_to_ignore_regex)
|
215 |
|
@@ -261,7 +261,7 @@ print("WER: {:.2f}".format(100 * wer.compute(predictions=result["predicted"], re
|
|
261 |
```
|
262 |
|
263 |
**Test Result**:
|
264 |
-
- WER: 33.
|
265 |
|
266 |
|
267 |
## Training & Report
|
|
|
26 |
metrics:
|
27 |
- name: Test WER
|
28 |
type: wer
|
29 |
+
value: 33.93
|
30 |
---
|
31 |
|
32 |
|
|
|
68 |
"β", "%", "β", "οΏ½", "β", "β¦", "_", "β", 'β', 'β'
|
69 |
]
|
70 |
chars_to_mapping = {
|
71 |
+
"\\\\u200c": " ", "\\\\u200d": " ", "\\\\u200e": " ", "\\\\u200f": " ", "\\\\ufeff": " ",
|
72 |
}
|
73 |
|
74 |
def multiple_replace(text, chars_to_mapping):
|
|
|
83 |
chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
|
84 |
text = batch["sentence"].lower().strip()
|
85 |
|
86 |
+
text = text.replace("\\\\u0307", " ").strip()
|
87 |
text = multiple_replace(text, chars_to_mapping)
|
88 |
text = remove_special_characters(text, chars_to_ignore_regex)
|
89 |
|
|
|
194 |
"β", "%", "β", "οΏ½", "β", "β¦", "_", "β", 'β', 'β'
|
195 |
]
|
196 |
chars_to_mapping = {
|
197 |
+
"\\\\u200c": " ", "\\\\u200d": " ", "\\\\u200e": " ", "\\\\u200f": " ", "\\\\ufeff": " ",
|
198 |
}
|
199 |
|
200 |
def multiple_replace(text, chars_to_mapping):
|
|
|
209 |
chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
|
210 |
text = batch["sentence"].lower().strip()
|
211 |
|
212 |
+
text = text.replace("\\\\u0307", " ").strip()
|
213 |
text = multiple_replace(text, chars_to_mapping)
|
214 |
text = remove_special_characters(text, chars_to_ignore_regex)
|
215 |
|
|
|
261 |
```
|
262 |
|
263 |
**Test Result**:
|
264 |
+
- WER: 33.93%
|
265 |
|
266 |
|
267 |
## Training & Report
|