Update README.md
Browse files
README.md
CHANGED
@@ -17,7 +17,7 @@ model-index:
|
|
17 |
name: Speech Recognition
|
18 |
type: automatic-speech-recognition
|
19 |
dataset:
|
20 |
-
name: Common Voice
|
21 |
type: common_voice
|
22 |
args: tr
|
23 |
metrics:
|
@@ -79,14 +79,14 @@ from datasets import load_dataset, load_metric
|
|
79 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
80 |
import re
|
81 |
|
82 |
-
test_dataset = load_dataset("common_voice", "
|
83 |
wer = load_metric("wer")
|
84 |
|
85 |
processor = Wav2Vec2Processor.from_pretrained("cahya-wirawan/wav2vec2-large-xlsr-turkish")
|
86 |
model = Wav2Vec2ForCTC.from_pretrained("cahya-wirawan/wav2vec2-large-xlsr-turkish")
|
87 |
model.to("cuda")
|
88 |
|
89 |
-
chars_to_ignore_regex = '[
|
90 |
|
91 |
# Preprocessing the datasets.
|
92 |
# We need to read the aduio files as arrays
|
|
|
17 |
name: Speech Recognition
|
18 |
type: automatic-speech-recognition
|
19 |
dataset:
|
20 |
+
name: Common Voice tr
|
21 |
type: common_voice
|
22 |
args: tr
|
23 |
metrics:
|
|
|
79 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
80 |
import re
|
81 |
|
82 |
+
test_dataset = load_dataset("common_voice", "tr", split="test")
|
83 |
wer = load_metric("wer")
|
84 |
|
85 |
processor = Wav2Vec2Processor.from_pretrained("cahya-wirawan/wav2vec2-large-xlsr-turkish")
|
86 |
model = Wav2Vec2ForCTC.from_pretrained("cahya-wirawan/wav2vec2-large-xlsr-turkish")
|
87 |
model.to("cuda")
|
88 |
|
89 |
+
chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“\\‘\\”\\'\\`…\\’»«]'
|
90 |
|
91 |
# Preprocessing the datasets.
|
92 |
# We need to read the aduio files as arrays
|