voidful commited on
Commit
c102c06
1 Parent(s): fd13715

correcting CER score

Browse files
Files changed (1) hide show
  1. README.md +31 -17
README.md CHANGED
@@ -21,10 +21,14 @@ model-index:
21
  metrics:
22
  - name: Test CER
23
  type: cer
24
- value: 78.03
25
  ---
26
 
27
- ## Colab trial with recording or voice file
 
 
 
 
28
  [Colab trial](https://colab.research.google.com/drive/1e_z5jQHYbO2YKEaUgzb1ww1WwiAyydAj?usp=sharing)
29
 
30
  ```
@@ -87,8 +91,15 @@ Predict
87
  predict(load_file_to_data('voice file path'))
88
  ```
89
 
90
- ## Evaluation on Common Voice TW Test
 
 
 
91
  ```python
 
 
 
 
92
  import torchaudio
93
  from datasets import load_dataset, load_metric
94
  from transformers import (
@@ -97,10 +108,11 @@ from transformers import (
97
  )
98
  import torch
99
  import re
 
100
 
101
- model_name = "voidful/wav2vec2-large-xlsr-53-tw"
102
  device = "cuda"
103
- processor_name = "voidful/wav2vec2-large-xlsr-53-tw"
104
 
105
  chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\"#$%&()*+,\-.\:;<=>?@\[\]\\\/^_`{|}~]"
106
 
@@ -133,15 +145,18 @@ def map_to_pred(batch):
133
 
134
  result = ds.map(map_to_pred, batched=True, batch_size=16, remove_columns=list(ds.features.keys()))
135
 
136
- wer = load_metric("wer")
137
-
138
- print(wer.compute(predictions=result["predicted"], references=result["target"]))
139
  ```
140
 
141
- `CER: 0.842832469775475`
142
 
143
- Inference with GPT LM:
144
  ```python
 
 
 
 
145
  import torchaudio
146
  from datasets import load_dataset, load_metric
147
  from transformers import (
@@ -150,12 +165,12 @@ from transformers import (
150
  )
151
  import torch
152
  import re
 
153
  from transformers import AutoTokenizer, AutoModelWithLMHead
154
 
155
- model_name = "voidful/wav2vec2-large-xlsr-53-tw"
156
  device = "cuda"
157
- processor_name = "voidful/wav2vec2-large-xlsr-53-tw"
158
-
159
  chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\"#$%&()*+,\-.\:;<=>?@\[\]\\\/^_`{|}~]"
160
 
161
  tokenizer = AutoTokenizer.from_pretrained("ckiplab/gpt2-base-chinese")
@@ -201,9 +216,8 @@ def map_to_pred(batch):
201
 
202
  result = ds.map(map_to_pred, batched=True, batch_size=16, remove_columns=list(ds.features.keys()))
203
 
204
- wer = load_metric("wer")
205
-
206
- print(wer.compute(predictions=result["predicted"], references=result["target"]))
207
  ```
208
 
209
- `CER 0.7803108808290156`
 
21
  metrics:
22
  - name: Test CER
23
  type: cer
24
+ value: 16.41
25
  ---
26
 
27
+ # Wav2Vec2-Large-XLSR-53-tw-gpt
28
+ Fine-tuned [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on zh-tw using the [Common Voice](https://huggingface.co/datasets/common_voice).
29
+ When using this model, make sure that your speech input is sampled at 16kHz.
30
+
31
+ ## Usage
32
  [Colab trial](https://colab.research.google.com/drive/1e_z5jQHYbO2YKEaUgzb1ww1WwiAyydAj?usp=sharing)
33
 
34
  ```
 
91
  predict(load_file_to_data('voice file path'))
92
  ```
93
 
94
+ ## Evaluation
95
+ The model can be evaluated as follows on the zh-tw test data of Common Voice.
96
+ CER calculation refer to https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese
97
+
98
  ```python
99
+ !mkdir cer
100
+ !wget -O cer/cer.py https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese/raw/main/cer.py
101
+ !pip install jiwer
102
+
103
  import torchaudio
104
  from datasets import load_dataset, load_metric
105
  from transformers import (
 
108
  )
109
  import torch
110
  import re
111
+ import sys
112
 
113
+ model_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
114
  device = "cuda"
115
+ processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
116
 
117
  chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\"#$%&()*+,\-.\:;<=>?@\[\]\\\/^_`{|}~]"
118
 
 
145
 
146
  result = ds.map(map_to_pred, batched=True, batch_size=16, remove_columns=list(ds.features.keys()))
147
 
148
+ cer = load_metric("./cer")
149
+ print("CER: {:2f}".format(100 * cer.compute(predictions=result["predicted"], references=result["target"])))
 
150
  ```
151
 
152
+ `CER: 28.734822`
153
 
154
+ ## Evaluation with GPT:
155
  ```python
156
+ !mkdir cer
157
+ !wget -O cer/cer.py https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese/raw/main/cer.py
158
+ !pip install jiwer
159
+
160
  import torchaudio
161
  from datasets import load_dataset, load_metric
162
  from transformers import (
 
165
  )
166
  import torch
167
  import re
168
+ import sys
169
  from transformers import AutoTokenizer, AutoModelWithLMHead
170
 
171
+ model_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
172
  device = "cuda"
173
+ processor_name = "voidful/wav2vec2-large-xlsr-53-tw-gpt"
 
174
  chars_to_ignore_regex = r"[¥•"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、 、〃〈〉《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏﹑﹔·'℃°•·.﹑︰〈〉─《﹖﹣﹂﹁﹔!?。。"#$%&'()*+,﹐-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏..!\"#$%&()*+,\-.\:;<=>?@\[\]\\\/^_`{|}~]"
175
 
176
  tokenizer = AutoTokenizer.from_pretrained("ckiplab/gpt2-base-chinese")
 
216
 
217
  result = ds.map(map_to_pred, batched=True, batch_size=16, remove_columns=list(ds.features.keys()))
218
 
219
+ cer = load_metric("./cer")
220
+ print("CER: {:2f}".format(100 * cer.compute(predictions=result["predicted"], references=result["target"])))
 
221
  ```
222
 
223
+ `CER 25.69`