alvanli
commited on
Commit
•
937d8de
1
Parent(s):
dfb8dc8
Added adapter model
Browse files
README.md
CHANGED
@@ -22,7 +22,7 @@ model-index:
|
|
22 |
metrics:
|
23 |
- name: Normalized CER
|
24 |
type: cer
|
25 |
-
value:
|
26 |
---
|
27 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
28 |
should probably proofread and complete it, then remove this comment. -->
|
@@ -61,15 +61,14 @@ For training, three datasets were used:
|
|
61 |
- Cantonse-ASR: Yu, Tiezheng, Frieske, Rita, Xu, Peng, Cahyawijaya, Samuel, Yiu, Cheuk Tung, Lovenia, Holy, Dai, Wenliang, Barezi, Elham, Chen, Qifeng, Ma, Xiaojuan, Shi, Bertram, Fung, Pascale (2022) "Automatic Speech Recognition Datasets in Cantonese: A Survey and New Dataset", 2022. Link: https://arxiv.org/pdf/2201.02419.pdf
|
62 |
|
63 |
## Training Hyperparameters
|
64 |
-
- learning_rate:
|
65 |
- train_batch_size: 60 (on 1 3090 GPU)
|
66 |
- eval_batch_size: 10
|
67 |
- gradient_accumulation_steps: 1
|
68 |
- total_train_batch_size: 60x1x1=60
|
69 |
-
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
70 |
- lr_scheduler_type: linear
|
71 |
- lr_scheduler_warmup_steps: 500
|
72 |
-
- training_steps:
|
73 |
- augmentation: SpecAugment
|
74 |
|
75 |
## Training Results
|
|
|
22 |
metrics:
|
23 |
- name: Normalized CER
|
24 |
type: cer
|
25 |
+
value: 7.766
|
26 |
---
|
27 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
28 |
should probably proofread and complete it, then remove this comment. -->
|
|
|
61 |
- Cantonse-ASR: Yu, Tiezheng, Frieske, Rita, Xu, Peng, Cahyawijaya, Samuel, Yiu, Cheuk Tung, Lovenia, Holy, Dai, Wenliang, Barezi, Elham, Chen, Qifeng, Ma, Xiaojuan, Shi, Bertram, Fung, Pascale (2022) "Automatic Speech Recognition Datasets in Cantonese: A Survey and New Dataset", 2022. Link: https://arxiv.org/pdf/2201.02419.pdf
|
62 |
|
63 |
## Training Hyperparameters
|
64 |
+
- learning_rate: 1e-3
|
65 |
- train_batch_size: 60 (on 1 3090 GPU)
|
66 |
- eval_batch_size: 10
|
67 |
- gradient_accumulation_steps: 1
|
68 |
- total_train_batch_size: 60x1x1=60
|
|
|
69 |
- lr_scheduler_type: linear
|
70 |
- lr_scheduler_warmup_steps: 500
|
71 |
+
- training_steps: 12000
|
72 |
- augmentation: SpecAugment
|
73 |
|
74 |
## Training Results
|
adapter_config.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_model_name_or_path": "openai/whisper-large-v2",
|
3 |
+
"bias": "none",
|
4 |
+
"enable_lora": null,
|
5 |
+
"fan_in_fan_out": false,
|
6 |
+
"inference_mode": true,
|
7 |
+
"lora_alpha": 64,
|
8 |
+
"lora_dropout": 0.05,
|
9 |
+
"merge_weights": false,
|
10 |
+
"modules_to_save": null,
|
11 |
+
"peft_type": "LORA",
|
12 |
+
"r": 32,
|
13 |
+
"target_modules": [
|
14 |
+
"q_proj",
|
15 |
+
"v_proj"
|
16 |
+
],
|
17 |
+
"task_type": null
|
18 |
+
}
|
adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4e7d024b237973a2ab2e0aae5b780aa1cd33fb0622463453bc680049e39ee7b
|
3 |
+
size 63056269
|
runs/Mar11_23-56-21_51ebbc6b4056/1678578981.4780438/events.out.tfevents.1678578981.51ebbc6b4056.17.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10949a1400eaa5230dc06a9cdcfb42b6e07477ac9184eb633494451babb49a36
|
3 |
+
size 6101
|
runs/Mar11_23-56-21_51ebbc6b4056/events.out.tfevents.1678578981.51ebbc6b4056.17.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56a14fc8106316f20360dfa1175e70bf32f0c0d1427a7e70c8a4cee09d3fa327
|
3 |
+
size 26974
|