HamdanXI commited on
Commit
1ab3064
1 Parent(s): e14c66b

HamdanXI/w2v2_uclass_clipped_10_seconds_fb_labeled

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: apache-2.0
3
- base_model: facebook/wav2vec2-base-960h
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,18 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # results
17
 
18
- This model is a fine-tuned version of [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.5527
21
- - Accuracy: 0.5206
22
- - F1 Score Class 0: 0.0
23
- - F1 Score Class 1: 0.0
24
- - F1 Score Class 2: 0.0
25
- - F1 Score Class 3: 0.0
26
- - F1 Score Class 4: 0.0
27
- - F1 Score Class 5: 0.0
28
- - F1 Score Class 6: 0.6847
29
- - F1 Score Class 7: 0.0
30
 
31
  ## Model description
32
 
@@ -49,24 +41,17 @@ The following hyperparameters were used during training:
49
  - train_batch_size: 16
50
  - eval_batch_size: 8
51
  - seed: 42
 
 
52
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
  - lr_scheduler_type: linear
54
- - num_epochs: 10
55
 
56
  ### Training results
57
 
58
- | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Score Class 0 | F1 Score Class 1 | F1 Score Class 2 | F1 Score Class 3 | F1 Score Class 4 | F1 Score Class 5 | F1 Score Class 6 | F1 Score Class 7 |
59
- |:-------------:|:-----:|:----:|:---------------:|:--------:|:----------------:|:----------------:|:----------------:|:----------------:|:----------------:|:----------------:|:----------------:|:----------------:|
60
- | 1.624 | 1.0 | 533 | 1.5562 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
61
- | 1.6758 | 2.0 | 1066 | 1.5635 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
62
- | 1.5986 | 3.0 | 1599 | 1.5559 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
63
- | 1.722 | 4.0 | 2132 | 1.5539 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
64
- | 1.7561 | 5.0 | 2665 | 1.5558 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
65
- | 1.724 | 6.0 | 3198 | 1.5543 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
66
- | 1.485 | 7.0 | 3731 | 1.5534 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
67
- | 1.6086 | 8.0 | 4264 | 1.5524 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
68
- | 1.4925 | 9.0 | 4797 | 1.5530 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
69
- | 1.611 | 10.0 | 5330 | 1.5527 | 0.5206 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.6847 | 0.0 |
70
 
71
 
72
  ### Framework versions
 
1
  ---
2
+ license: mit
3
+ base_model: facebook/w2v-bert-2.0
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # results
17
 
18
+ This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.6035
21
+ - Accuracy: 0.5153
 
 
 
 
 
 
 
 
22
 
23
  ## Model description
24
 
 
41
  - train_batch_size: 16
42
  - eval_batch_size: 8
43
  - seed: 42
44
+ - gradient_accumulation_steps: 2
45
+ - total_train_batch_size: 32
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
+ - num_epochs: 3
49
 
50
  ### Training results
51
 
52
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
+ |:-------------:|:------:|:----:|:---------------:|:--------:|
54
+ | 1.6178 | 1.8762 | 500 | 1.6035 | 0.5153 |
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  ### Framework versions
config.json CHANGED
@@ -1,20 +1,23 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-base-960h",
3
- "activation_dropout": 0.1,
 
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
6
  "adapter_stride": 2,
7
  "add_adapter": false,
8
- "apply_spec_augment": true,
9
  "architectures": [
10
  "Wav2Vec2ForSequenceClassification"
11
  ],
12
- "attention_dropout": 0.1,
13
  "bos_token_id": 1,
14
- "classifier_proj_size": 256,
15
- "codevector_dim": 256,
 
16
  "contrastive_logits_temperature": 0.1,
17
  "conv_bias": false,
 
18
  "conv_dim": [
19
  512,
20
  512,
@@ -42,22 +45,20 @@
42
  2,
43
  2
44
  ],
45
- "ctc_loss_reduction": "sum",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
  "do_stable_layer_norm": false,
49
  "eos_token_id": 2,
50
  "feat_extract_activation": "gelu",
51
- "feat_extract_dropout": 0.0,
52
  "feat_extract_norm": "group",
53
- "feat_proj_dropout": 0.1,
54
  "feat_quantizer_dropout": 0.0,
 
55
  "final_dropout": 0.1,
56
- "gradient_checkpointing": false,
57
- "hidden_act": "gelu",
58
- "hidden_dropout": 0.1,
59
- "hidden_dropout_prob": 0.1,
60
- "hidden_size": 768,
61
  "id2label": {
62
  "0": "LABEL_0",
63
  "1": "LABEL_1",
@@ -70,7 +71,7 @@
70
  "8": "LABEL_8"
71
  },
72
  "initializer_range": 0.02,
73
- "intermediate_size": 3072,
74
  "label2id": {
75
  "LABEL_0": 0,
76
  "LABEL_1": 1,
@@ -83,26 +84,31 @@
83
  "LABEL_8": 8
84
  },
85
  "layer_norm_eps": 1e-05,
86
- "layerdrop": 0.1,
 
87
  "mask_feature_length": 10,
88
  "mask_feature_min_masks": 0,
89
  "mask_feature_prob": 0.0,
90
  "mask_time_length": 10,
91
  "mask_time_min_masks": 2,
92
- "mask_time_prob": 0.05,
 
93
  "model_type": "wav2vec2",
94
- "num_adapter_layers": 3,
95
- "num_attention_heads": 12,
96
  "num_codevector_groups": 2,
97
  "num_codevectors_per_group": 320,
98
  "num_conv_pos_embedding_groups": 16,
99
  "num_conv_pos_embeddings": 128,
100
  "num_feat_extract_layers": 7,
101
- "num_hidden_layers": 12,
102
  "num_negatives": 100,
103
- "output_hidden_size": 768,
104
- "pad_token_id": 0,
105
- "proj_codevector_dim": 256,
 
 
 
106
  "tdnn_dilation": [
107
  1,
108
  2,
@@ -126,7 +132,8 @@
126
  ],
127
  "torch_dtype": "float32",
128
  "transformers_version": "4.40.1",
 
129
  "use_weighted_layer_sum": false,
130
- "vocab_size": 32,
131
  "xvector_output_dim": 512
132
  }
 
1
  {
2
+ "_name_or_path": "facebook/w2v-bert-2.0",
3
+ "activation_dropout": 0.0,
4
+ "adapter_act": "relu",
5
  "adapter_attn_dim": null,
6
  "adapter_kernel_size": 3,
7
  "adapter_stride": 2,
8
  "add_adapter": false,
9
+ "apply_spec_augment": false,
10
  "architectures": [
11
  "Wav2Vec2ForSequenceClassification"
12
  ],
13
+ "attention_dropout": 0.0,
14
  "bos_token_id": 1,
15
+ "classifier_proj_size": 768,
16
+ "codevector_dim": 768,
17
+ "conformer_conv_dropout": 0.1,
18
  "contrastive_logits_temperature": 0.1,
19
  "conv_bias": false,
20
+ "conv_depthwise_kernel_size": 31,
21
  "conv_dim": [
22
  512,
23
  512,
 
45
  2,
46
  2
47
  ],
48
+ "ctc_loss_reduction": "mean",
49
  "ctc_zero_infinity": false,
50
  "diversity_loss_weight": 0.1,
51
  "do_stable_layer_norm": false,
52
  "eos_token_id": 2,
53
  "feat_extract_activation": "gelu",
 
54
  "feat_extract_norm": "group",
55
+ "feat_proj_dropout": 0.0,
56
  "feat_quantizer_dropout": 0.0,
57
+ "feature_projection_input_dim": 160,
58
  "final_dropout": 0.1,
59
+ "hidden_act": "swish",
60
+ "hidden_dropout": 0.0,
61
+ "hidden_size": 1024,
 
 
62
  "id2label": {
63
  "0": "LABEL_0",
64
  "1": "LABEL_1",
 
71
  "8": "LABEL_8"
72
  },
73
  "initializer_range": 0.02,
74
+ "intermediate_size": 4096,
75
  "label2id": {
76
  "LABEL_0": 0,
77
  "LABEL_1": 1,
 
84
  "LABEL_8": 8
85
  },
86
  "layer_norm_eps": 1e-05,
87
+ "layerdrop": 0.0,
88
+ "left_max_position_embeddings": 64,
89
  "mask_feature_length": 10,
90
  "mask_feature_min_masks": 0,
91
  "mask_feature_prob": 0.0,
92
  "mask_time_length": 10,
93
  "mask_time_min_masks": 2,
94
+ "mask_time_prob": 0.0,
95
+ "max_source_positions": 5000,
96
  "model_type": "wav2vec2",
97
+ "num_adapter_layers": 1,
98
+ "num_attention_heads": 16,
99
  "num_codevector_groups": 2,
100
  "num_codevectors_per_group": 320,
101
  "num_conv_pos_embedding_groups": 16,
102
  "num_conv_pos_embeddings": 128,
103
  "num_feat_extract_layers": 7,
104
+ "num_hidden_layers": 24,
105
  "num_negatives": 100,
106
+ "output_hidden_size": 1024,
107
+ "pad_token_id": 35,
108
+ "position_embeddings_type": "relative_key",
109
+ "proj_codevector_dim": 768,
110
+ "right_max_position_embeddings": 8,
111
+ "rotary_embedding_base": 10000,
112
  "tdnn_dilation": [
113
  1,
114
  2,
 
132
  ],
133
  "torch_dtype": "float32",
134
  "transformers_version": "4.40.1",
135
+ "use_intermediate_ffn_before_adapter": false,
136
  "use_weighted_layer_sum": false,
137
+ "vocab_size": 38,
138
  "xvector_output_dim": 512
139
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69223aa1be85c1a0727093742650a369b3fb89d1d372a34bbb17993c0a676b80
3
- size 378309564
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c159da6c2d11f10f26a32899dd2df5a7c3442071bdfe57fd2b164b3a63caaad6
3
+ size 1264938628
runs/May02_19-13-42_66f4a5d44daa/events.out.tfevents.1714677223.66f4a5d44daa.5906.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aac6040cbc11b8c363cb8818b1865f5328134681c9b79fede2e5326d0c6e96c
3
+ size 7670
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80f29a588cfee9ddf0c86aeeddc61a626fa3f2b90b1fd7182a97da1cebc319f0
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4040a63e96aa775e39a9be00e3638449b75ba13b4591b9397ffbbc6ec482a8bc
3
+ size 4984