wav2vec2-large-xls-r-300m-zhhk

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: ctl/wav2vec2-large-xlsr-cantonese
 tags:
 - generated_from_trainer
 model-index:
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
 # wav2vec2-large-xls-r-300m-zhhk
-This model is a fine-tuned version of [ctl/wav2vec2-large-xlsr-cantonese](https://huggingface.co/ctl/wav2vec2-large-xlsr-cantonese) on an unknown dataset.
 ## Model description
@@ -32,16 +32,17 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0003
-- train_batch_size: 16
 - eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 500
-- num_epochs: 3
 ### Training results
@@ -50,6 +51,6 @@ The following hyperparameters were used during training:
 ### Framework versions
 - Transformers 4.35.2
-- Pytorch 2.1.0+cu118
 - Datasets 2.15.0
 - Tokenizers 0.15.0

 ---
 license: apache-2.0
+base_model: w11wo/wav2vec2-xls-r-300m-zh-HK-lm-v2
 tags:
 - generated_from_trainer
 model-index:
 # wav2vec2-large-xls-r-300m-zhhk
+This model is a fine-tuned version of [w11wo/wav2vec2-xls-r-300m-zh-HK-lm-v2](https://huggingface.co/w11wo/wav2vec2-xls-r-300m-zh-HK-lm-v2) on an unknown dataset.
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.001
+- train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 500
+- num_epochs: 5
+- mixed_precision_training: Native AMP
 ### Training results
 ### Framework versions
 - Transformers 4.35.2
+- Pytorch 2.1.0+cu121
 - Datasets 2.15.0
 - Tokenizers 0.15.0

config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "_name_or_path": "ctl/wav2vec2-large-xlsr-cantonese",
-  "activation_dropout": 0.0,
   "adapter_attn_dim": null,
   "adapter_kernel_size": 3,
   "adapter_stride": 2,
@@ -12,7 +12,7 @@
   "attention_dropout": 0.0,
   "bos_token_id": 1,
   "classifier_proj_size": 256,
-  "codevector_dim": 256,
   "contrastive_logits_temperature": 0.1,
   "conv_bias": true,
   "conv_dim": [
@@ -60,20 +60,12 @@
   "intermediate_size": 4096,
   "layer_norm_eps": 1e-05,
   "layerdrop": 0.0,
-  "mask_channel_length": 10,
-  "mask_channel_min_space": 1,
-  "mask_channel_other": 0.0,
-  "mask_channel_prob": 0.0,
-  "mask_channel_selection": "static",
-  "mask_feature_length": 10,
   "mask_feature_min_masks": 0,
-  "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
-  "mask_time_min_space": 1,
-  "mask_time_other": 0.0,
-  "mask_time_prob": 0.05,
-  "mask_time_selection": "static",
   "model_type": "wav2vec2",
   "num_adapter_layers": 3,
   "num_attention_heads": 16,
@@ -86,7 +78,7 @@
   "num_negatives": 100,
   "output_hidden_size": 1024,
   "pad_token_id": 3580,
-  "proj_codevector_dim": 256,
   "tdnn_dilation": [
     1,
     2,

 {
+  "_name_or_path": "w11wo/wav2vec2-xls-r-300m-zh-HK-lm-v2",
+  "activation_dropout": 0.1,
   "adapter_attn_dim": null,
   "adapter_kernel_size": 3,
   "adapter_stride": 2,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
   "classifier_proj_size": 256,
+  "codevector_dim": 768,
   "contrastive_logits_temperature": 0.1,
   "conv_bias": true,
   "conv_dim": [
   "intermediate_size": 4096,
   "layer_norm_eps": 1e-05,
   "layerdrop": 0.0,
+  "mask_feature_length": 64,
   "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.25,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
+  "mask_time_prob": 0.0,
   "model_type": "wav2vec2",
   "num_adapter_layers": 3,
   "num_attention_heads": 16,
   "num_negatives": 100,
   "output_hidden_size": 1024,
   "pad_token_id": 3580,
+  "proj_codevector_dim": 768,
   "tdnn_dilation": [
     1,
     2,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d548d1ae1fd6388d58311df44718c4967e7c1364a719cbff3d63f575d2830880
 size 1276497820

 version https://git-lfs.github.com/spec/v1
+oid sha256:138922249ab18bee86ade0ea53c2d60f6e66a1dda527b02042014a32de9bd208
 size 1276497820

runs/Dec17_19-14-30_70668636b91a/events.out.tfevents.1702840787.70668636b91a.2568.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fa4606352634c6779c67337d7568266ddc0f5ae207f21c35821ed7eaa84feca
+size 5968

runs/Dec17_19-47-55_70668636b91a/events.out.tfevents.1702842619.70668636b91a.2568.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc9036c4dbaa4920c99d0504b858fcd5a4f469b6c1476bc92828cd56050d2ced
+size 5968

runs/Dec17_20-08-38_70668636b91a/events.out.tfevents.1702843862.70668636b91a.2568.2 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:fee028a40b91e61412479afcc9dcc435ad9ae642b97fbf18fd00c39a972bc9cd
+size 6164

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02359f91a1840d9316705763ba27b7a1c409d7a1934e4844d345d046ac32ea65
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f437f05240d6cabc870e307679be862f3c6e7e2e5c2f4afe923ef56011fb65f
 size 4728