Update README.md
Browse files
README.md
CHANGED
@@ -18,7 +18,7 @@ model-index:
|
|
18 |
type: automatic-speech-recognition
|
19 |
dataset:
|
20 |
name: MGB5 from ELDA and https://arabicspeech.org/
|
21 |
-
type: ELDA
|
22 |
args: ary
|
23 |
metrics:
|
24 |
- name: Test WER
|
@@ -151,7 +151,8 @@ processor = Wav2Vec2Processor.from_pretrained("othrif/wav2vec2-large-xlsr-morocc
|
|
151 |
model = Wav2Vec2ForCTC.from_pretrained("othrif/wav2vec2-large-xlsr-moroccan")
|
152 |
model.to("cuda")
|
153 |
|
154 |
-
chars_to_ignore_regex = '[
|
|
|
155 |
|
156 |
def remove_special_characters(batch):
|
157 |
batch["text"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).lower() + " "
|
|
|
18 |
type: automatic-speech-recognition
|
19 |
dataset:
|
20 |
name: MGB5 from ELDA and https://arabicspeech.org/
|
21 |
+
type: ELDA/mgb5_moroccan
|
22 |
args: ary
|
23 |
metrics:
|
24 |
- name: Test WER
|
|
|
151 |
model = Wav2Vec2ForCTC.from_pretrained("othrif/wav2vec2-large-xlsr-moroccan")
|
152 |
model.to("cuda")
|
153 |
|
154 |
+
chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\\"\\\'\\�]'
|
155 |
+
|
156 |
|
157 |
def remove_special_characters(batch):
|
158 |
batch["text"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).lower() + " "
|