nithinholla
commited on
Commit
•
260efc1
1
Parent(s):
d519f76
Updated model
Browse files- README.md +4 -4
- config.json +3 -3
- pytorch_model.bin +2 -2
- vocab.json +1 -1
README.md
CHANGED
@@ -23,7 +23,7 @@ model-index:
|
|
23 |
metrics:
|
24 |
- name: Test WER
|
25 |
type: wer
|
26 |
-
value:
|
27 |
---
|
28 |
|
29 |
# Wav2Vec2-Large-XLSR-53-Dutch
|
@@ -86,13 +86,13 @@ processor = Wav2Vec2Processor.from_pretrained("nithinholla/wav2vec2-large-xlsr-5
|
|
86 |
model = Wav2Vec2ForCTC.from_pretrained("nithinholla/wav2vec2-large-xlsr-53-dutch")
|
87 |
model.to("cuda")
|
88 |
|
89 |
-
chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\'\�\(\)
|
90 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
91 |
|
92 |
# Preprocessing the datasets.
|
93 |
# We need to read the audio files as arrays
|
94 |
def speech_file_to_array_fn(batch):
|
95 |
-
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
|
96 |
speech_array, sampling_rate = torchaudio.load(batch["path"])
|
97 |
batch["speech"] = resampler(speech_array).squeeze().numpy()
|
98 |
return batch
|
@@ -116,7 +116,7 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
|
116 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
117 |
```
|
118 |
|
119 |
-
**Test Result**:
|
120 |
|
121 |
|
122 |
## Training
|
|
|
23 |
metrics:
|
24 |
- name: Test WER
|
25 |
type: wer
|
26 |
+
value: 21.72
|
27 |
---
|
28 |
|
29 |
# Wav2Vec2-Large-XLSR-53-Dutch
|
|
|
86 |
model = Wav2Vec2ForCTC.from_pretrained("nithinholla/wav2vec2-large-xlsr-53-dutch")
|
87 |
model.to("cuda")
|
88 |
|
89 |
+
chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\'\�\(\)\&\–\—\=\…]'
|
90 |
resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
91 |
|
92 |
# Preprocessing the datasets.
|
93 |
# We need to read the audio files as arrays
|
94 |
def speech_file_to_array_fn(batch):
|
95 |
+
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower().replace("´", "'").replace("’", "'")
|
96 |
speech_array, sampling_rate = torchaudio.load(batch["path"])
|
97 |
batch["speech"] = resampler(speech_array).squeeze().numpy()
|
98 |
return batch
|
|
|
116 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
117 |
```
|
118 |
|
119 |
+
**Test Result**: 21.72 %
|
120 |
|
121 |
|
122 |
## Training
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_dropout": 0.1,
|
4 |
"apply_spec_augment": true,
|
5 |
"architectures": [
|
@@ -70,7 +70,7 @@
|
|
70 |
"num_conv_pos_embeddings": 128,
|
71 |
"num_feat_extract_layers": 7,
|
72 |
"num_hidden_layers": 24,
|
73 |
-
"pad_token_id":
|
74 |
"transformers_version": "4.5.0.dev0",
|
75 |
-
"vocab_size":
|
76 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/workspace/models/nl_third/checkpoint-13000",
|
3 |
"activation_dropout": 0.1,
|
4 |
"apply_spec_augment": true,
|
5 |
"architectures": [
|
|
|
70 |
"num_conv_pos_embeddings": 128,
|
71 |
"num_feat_extract_layers": 7,
|
72 |
"num_hidden_layers": 24,
|
73 |
+
"pad_token_id": 41,
|
74 |
"transformers_version": "4.5.0.dev0",
|
75 |
+
"vocab_size": 42
|
76 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d66e36e390922ffe5ab48bcd36d637e94d3058955e987c62e41a04a276d368e
|
3 |
+
size 1262106007
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"x": 0, "n": 1, "u": 2, "b": 3, "p": 5, "g": 6, "ï": 7, "o": 8, "i": 9, "c": 10, "m": 11, "a": 12, "l": 13, "t": 14, "ü": 15, "é": 16, "á": 17, "e": 18, "r": 19, "f": 20, "w": 21, "´": 22, "v": 23, "ö": 24, "z": 25, "y": 26, "d": 27, "ó": 28, "h": 29, "s": 30, "q": 31, "k": 32, "'": 33, "ê": 34, "à": 35, "ë": 36, "è": 37, "j": 38, "ú": 39, "|": 4, "[UNK]": 40, "[PAD]": 41}
|