correcting CER score
Browse files
README.md
CHANGED
@@ -21,10 +21,14 @@ model-index:
|
|
21 |
metrics:
|
22 |
- name: Test CER
|
23 |
type: cer
|
24 |
-
value:
|
25 |
---
|
26 |
|
27 |
-
|
|
|
|
|
|
|
|
|
28 |
[Colab trial](https://colab.research.google.com/drive/1e_z5jQHYbO2YKEaUgzb1ww1WwiAyydAj?usp=sharing)
|
29 |
|
30 |
```
|
@@ -87,8 +91,15 @@ Predict
|
|
87 |
predict(load_file_to_data('voice file path'))
|
88 |
```
|
89 |
|
90 |
-
## Evaluation
|
|
|
|
|
|
|
91 |
```python
|
|
|
|
|
|
|
|
|
92 |
import torchaudio
|
93 |
from datasets import load_dataset, load_metric
|
94 |
from transformers import (
|
@@ -97,10 +108,11 @@ from transformers import (
|
|
97 |
)
|
98 |
import torch
|
99 |
import re
|
|
|
100 |
|
101 |
-
model_name = "voidful/wav2vec2-large-xlsr-53-tw"
|
102 |
device = "cuda"
|
103 |
-
processor_name = "voidful/wav2vec2-large-xlsr-53-tw"
|
104 |
|
105 |
chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\"#$%&()*+,\-.\:;<=>?@\[\]\\\/^_`{|}~]"
|
106 |
|
@@ -133,15 +145,18 @@ def map_to_pred(batch):
|
|
133 |
|
134 |
result = ds.map(map_to_pred, batched=True, batch_size=16, remove_columns=list(ds.features.keys()))
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
print(wer.compute(predictions=result["predicted"], references=result["target"]))
|
139 |
```
|
140 |
|
141 |
-
`CER:
|
142 |
|
143 |
-
|
144 |
```python
|
|
|
|
|
|
|
|
|
145 |
import torchaudio
|
146 |
from datasets import load_dataset, load_metric
|
147 |
from transformers import (
|
@@ -150,12 +165,12 @@ from transformers import (
|
|
150 |
)
|
151 |
import torch
|
152 |
import re
|
|
|
153 |
from transformers import AutoTokenizer, AutoModelWithLMHead
|
154 |
|
155 |
-
model_name = "voidful/wav2vec2-large-xlsr-53-tw"
|
156 |
device = "cuda"
|
157 |
-
processor_name = "voidful/wav2vec2-large-xlsr-53-tw"
|
158 |
-
|
159 |
chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\"#$%&()*+,\-.\:;<=>?@\[\]\\\/^_`{|}~]"
|
160 |
|
161 |
tokenizer = AutoTokenizer.from_pretrained("ckiplab/gpt2-base-chinese")
|
@@ -201,9 +216,8 @@ def map_to_pred(batch):
|
|
201 |
|
202 |
result = ds.map(map_to_pred, batched=True, batch_size=16, remove_columns=list(ds.features.keys()))
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
print(wer.compute(predictions=result["predicted"], references=result["target"]))
|
207 |
```
|
208 |
|
209 |
-
`CER
|
|
|
21 |
metrics:
|
22 |
- name: Test CER
|
23 |
type: cer
|
24 |
+
value: 16.41
|
25 |
---
|
26 |
|
27 |
+
# Wav2Vec2-Large-XLSR-53-tw-gpt
|
28 |
+
Fine-tuned [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on zh-tw using the [Common Voice](https://huggingface.co/datasets/common_voice).
|
29 |
+
When using this model, make sure that your speech input is sampled at 16kHz.
|
30 |
+
|
31 |
+
## Usage
|
32 |
[Colab trial](https://colab.research.google.com/drive/1e_z5jQHYbO2YKEaUgzb1ww1WwiAyydAj?usp=sharing)
|
33 |
|
34 |
```
|
|
|
91 |
predict(load_file_to_data('voice file path'))
|
92 |
```
|
93 |
|
94 |
+
## Evaluation
|
95 |
+
The model can be evaluated as follows on the zh-tw test data of Common Voice.
|
96 |
+
CER calculation refer to https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese
|
97 |
+
|
98 |
```python
|
99 |
+
!mkdir cer
|
100 |
+
!wget -O cer/cer.py https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese/raw/main/cer.py
|
101 |
+
!pip install jiwer
|
102 |
+
|
103 |
import torchaudio
|
104 |
from datasets import load_dataset, load_metric
|
105 |
from transformers import (
|
|
|
108 |
)
|
109 |
import torch
|
110 |
import re
|
111 |
+
import sys
|
112 |
|
113 |
+
model_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
|
114 |
device = "cuda"
|
115 |
+
processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
|
116 |
|
117 |
chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\"#$%&()*+,\-.\:;<=>?@\[\]\\\/^_`{|}~]"
|
118 |
|
|
|
145 |
|
146 |
result = ds.map(map_to_pred, batched=True, batch_size=16, remove_columns=list(ds.features.keys()))
|
147 |
|
148 |
+
cer = load_metric("./cer")
|
149 |
+
print("CER: {:2f}".format(100 * cer.compute(predictions=result["predicted"], references=result["target"])))
|
|
|
150 |
```
|
151 |
|
152 |
+
`CER: 28.734822`
|
153 |
|
154 |
+
## Evaluation with GPT:
|
155 |
```python
|
156 |
+
!mkdir cer
|
157 |
+
!wget -O cer/cer.py https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese/raw/main/cer.py
|
158 |
+
!pip install jiwer
|
159 |
+
|
160 |
import torchaudio
|
161 |
from datasets import load_dataset, load_metric
|
162 |
from transformers import (
|
|
|
165 |
)
|
166 |
import torch
|
167 |
import re
|
168 |
+
import sys
|
169 |
from transformers import AutoTokenizer, AutoModelWithLMHead
|
170 |
|
171 |
+
model_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
|
172 |
device = "cuda"
|
173 |
+
processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
|
|
|
174 |
chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\"#$%&()*+,\-.\:;<=>?@\[\]\\\/^_`{|}~]"
|
175 |
|
176 |
tokenizer = AutoTokenizer.from_pretrained("ckiplab/gpt2-base-chinese")
|
|
|
216 |
|
217 |
result = ds.map(map_to_pred, batched=True, batch_size=16, remove_columns=list(ds.features.keys()))
|
218 |
|
219 |
+
cer = load_metric("./cer")
|
220 |
+
print("CER: {:2f}".format(100 * cer.compute(predictions=result["predicted"], references=result["target"])))
|
|
|
221 |
```
|
222 |
|
223 |
+
`CER 25.69`
|