dima1234321 commited on
Commit
61dfaaa
·
verified ·
1 Parent(s): bb71c63

dima1234321/wav2vec2-xls-r-300m-phoneme_he_small1

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ test_dataset filter=lfs diff=lfs merge=lfs -text
37
+ train_dataset filter=lfs diff=lfs merge=lfs -text
38
+ valid_dataset filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: facebook/wav2vec2-xls-r-300m
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - wer
8
+ model-index:
9
+ - name: shared_audio
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # shared_audio
17
+
18
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 19.5815
21
+ - Wer: 1.0
22
+ - Cer: 1.0
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 3e-05
42
+ - train_batch_size: 8
43
+ - eval_batch_size: 8
44
+ - seed: 42
45
+ - gradient_accumulation_steps: 4
46
+ - total_train_batch_size: 32
47
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
+ - lr_scheduler_type: linear
49
+ - lr_scheduler_warmup_steps: 20
50
+ - training_steps: 100
51
+
52
+ ### Training results
53
+
54
+ | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
55
+ |:-------------:|:-----:|:----:|:---------------:|:---:|:------:|
56
+ | 62.6565 | 0.16 | 10 | 63.7309 | 1.0 | 2.9738 |
57
+ | 46.9251 | 0.32 | 20 | 54.7811 | 1.0 | 1.0 |
58
+ | 43.8044 | 0.48 | 30 | 32.9802 | 1.0 | 1.0 |
59
+ | 30.6358 | 0.64 | 40 | 26.5389 | 1.0 | 1.0 |
60
+ | 23.5351 | 0.8 | 50 | 23.3499 | 1.0 | 1.0 |
61
+ | 29.7649 | 0.96 | 60 | 21.5416 | 1.0 | 1.0 |
62
+ | 21.3599 | 1.12 | 70 | 20.5404 | 1.0 | 1.0 |
63
+ | 19.8412 | 1.28 | 80 | 19.8837 | 1.0 | 1.0 |
64
+ | 23.5039 | 1.44 | 90 | 19.5565 | 1.0 | 1.0 |
65
+ | 20.1719 | 1.6 | 100 | 19.4386 | 1.0 | 1.0 |
66
+
67
+
68
+ ### Framework versions
69
+
70
+ - Transformers 4.38.2
71
+ - Pytorch 2.2.1+cu121
72
+ - Datasets 2.18.0
73
+ - Tokenizers 0.15.2
b.ogg ADDED
Binary file (3.37 kB). View file
 
ba.ogg ADDED
Binary file (2.44 kB). View file
 
config.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
+ "activation_dropout": 0.0,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.1,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 768,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": false,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "gradient_checkpointing": false,
57
+ "hidden_act": "gelu",
58
+ "hidden_dropout": 0.1,
59
+ "hidden_size": 1024,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 4096,
62
+ "layer_norm_eps": 1e-05,
63
+ "layerdrop": 0.0,
64
+ "mask_feature_length": 64,
65
+ "mask_feature_min_masks": 0,
66
+ "mask_feature_prob": 0.25,
67
+ "mask_time_length": 10,
68
+ "mask_time_min_masks": 2,
69
+ "mask_time_prob": 0.75,
70
+ "model_type": "wav2vec2",
71
+ "num_adapter_layers": 3,
72
+ "num_attention_heads": 16,
73
+ "num_codevector_groups": 2,
74
+ "num_codevectors_per_group": 320,
75
+ "num_conv_pos_embedding_groups": 16,
76
+ "num_conv_pos_embeddings": 128,
77
+ "num_feat_extract_layers": 7,
78
+ "num_hidden_layers": 24,
79
+ "num_negatives": 100,
80
+ "output_hidden_size": 1024,
81
+ "pad_token_id": 101,
82
+ "proj_codevector_dim": 768,
83
+ "tdnn_dilation": [
84
+ 1,
85
+ 2,
86
+ 3,
87
+ 1,
88
+ 1
89
+ ],
90
+ "tdnn_dim": [
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 512,
95
+ 1500
96
+ ],
97
+ "tdnn_kernel": [
98
+ 5,
99
+ 3,
100
+ 3,
101
+ 1,
102
+ 1
103
+ ],
104
+ "torch_dtype": "float32",
105
+ "transformers_version": "4.38.2",
106
+ "use_weighted_layer_sum": false,
107
+ "vocab_size": 104,
108
+ "xvector_output_dim": 512
109
+ }
dataset2_withAudio1.csv ADDED
The diff for this file is too large to render. See raw diff
 
kakba1.ogg ADDED
Binary file (4.85 kB). View file
 
kakba2.ogg ADDED
Binary file (3.36 kB). View file
 
kakba3.ogg ADDED
Binary file (5.01 kB). View file
 
maba1.ogg ADDED
Binary file (3.28 kB). View file
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83096bb2075586342fa0703c2cf4e1d8c4124779a58c55deb8bc2e65bd53b13b
3
+ size 1262233880
na'a1.ogg ADDED
Binary file (4.31 kB). View file
 
na'a2.ogg ADDED
Binary file (3.97 kB). View file
 
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
runs/Mar15_09-45-00_9163490853ac/events.out.tfevents.1710496114.9163490853ac.482.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:323678ff51d2c5e5d2a9028112301b5feb190cc9c6fc7ba06fbdea77986e9f40
3
+ size 30701
runs/Mar15_09-45-00_9163490853ac/events.out.tfevents.1710509183.9163490853ac.482.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a7339d1983396c0dc65de5a4fdae35cf6f97f9e54344016c0cad27a28b81b14
3
+ size 446
t.ogg ADDED
Binary file (2.61 kB). View file
 
te.ogg ADDED
Binary file (2.88 kB). View file
 
test_dataset ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc1ca3d0a8985e3553cccebf80a340f42aa8ce3a1de725c7c63128bbf3e3d5b
3
+ size 52862488
train_dataset ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8195a87c464ca3ed145693a7aec9878f7626c4ca49ec53f6c4912033db87f2d
3
+ size 415167252
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e96185dc6b382024930bfbbd505f2b7aed9db60a71e6eecf8e372096d5d0f2
3
+ size 4920
valid_dataset ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f54ac6a4bdaad37735a7424c22e4745448c9b1811825191feff4886ebf2e56
3
+ size 52189620
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"a": 1, "b": 2, "ba": 3, "be": 4, "bi": 5, "bo": 6, "bu": 7, "d": 8, "da": 9, "de": 10, "di": 11, "do": 12, "du": 13, "e": 14, "f": 15, "g": 16, "ga": 17, "ge": 18, "gi": 19, "go": 20, "gu": 21, "h": 22, "ha": 23, "he": 24, "hi": 25, "ho": 26, "hu": 27, "i": 28, "k": 29, "ka": 30, "ke": 31, "ki": 32, "ko": 33, "ku": 34, "l": 35, "la": 36, "le": 37, "li": 38, "lo": 39, "lu": 40, "m": 41, "n": 42, "o": 43, "p": 44, "pa": 45, "pe": 46, "pi": 47, "po": 48, "pu": 49, "r": 50, "ra": 51, "re": 52, "ri": 53, "ro": 54, "ru": 55, "s": 56, "sa": 57, "se": 58, "sh": 59, "sha": 60, "she": 61, "shi": 62, "sho": 63, "shu": 64, "si": 65, "so": 66, "su": 67, "t": 68, "ta": 69, "te": 70, "ti": 71, "to": 72, "tu": 73, "tz": 74, "u": 75, "v": 76, "va": 77, "ve": 78, "vi": 79, "vo": 80, "vu": 81, "x": 82, "xa": 83, "xe": 84, "xi": 85, "xo": 86, "xu": 87, "y": 88, "ya": 89, "ye": 90, "yi": 91, "yo": 92, "yu": 93, "z": 94, "za": 95, "ze": 96, "zi": 97, "zo": 98, "zu": 99, "|": 0, "[UNK]": 100, "[PAD]": 101}