lolyafedor commited on
Commit
bcdf77e
1 Parent(s): 3cccb05

End of training

Browse files
adapter_config.json CHANGED
@@ -23,22 +23,22 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "audio_enc_to_dec_proj",
27
- "embed_tokens.1",
28
- "enc_to_dec_proj",
29
- "q_proj",
30
- "lm_heads.1",
31
- "v_proj",
32
  "out_proj",
 
 
 
 
 
33
  "lm_heads.0",
34
- "k_proj",
35
  "lm_heads.3",
36
- "fc2",
37
- "fc1",
 
 
38
  "embed_tokens.3",
39
- "embed_tokens.0",
40
- "embed_tokens.2",
41
- "lm_heads.2"
42
  ],
43
  "task_type": null,
44
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "embed_tokens.2",
 
 
 
 
 
27
  "out_proj",
28
+ "embed_tokens.0",
29
+ "lm_heads.1",
30
+ "q_proj",
31
+ "fc2",
32
+ "lm_heads.2",
33
  "lm_heads.0",
 
34
  "lm_heads.3",
35
+ "enc_to_dec_proj",
36
+ "embed_tokens.1",
37
+ "audio_enc_to_dec_proj",
38
+ "v_proj",
39
  "embed_tokens.3",
40
+ "k_proj",
41
+ "fc1"
 
42
  ],
43
  "task_type": null,
44
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3612eb015af73cbade088639234ac52de0079fef04916f606aaabbb57062e82
3
  size 87103456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b947692009e5564f54e1a1c8064d476f99c0931c814df934f82b46cbdd7b52
3
  size 87103456
trainer_state.json CHANGED
@@ -3,65 +3,107 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 3.84,
5
  "eval_steps": 500,
6
- "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.64,
13
- "grad_norm": 1.2096633911132812,
14
  "learning_rate": 0.0001666666666666667,
15
- "loss": 9.4081,
16
- "step": 2
 
 
 
 
 
 
 
17
  },
18
  {
19
  "epoch": 1.28,
20
- "grad_norm": 2.210158109664917,
21
  "learning_rate": 0.00013333333333333334,
22
- "loss": 8.9596,
23
- "step": 4
 
 
 
 
 
 
 
24
  },
25
  {
26
  "epoch": 1.92,
27
- "grad_norm": 1.7887189388275146,
28
  "learning_rate": 0.0001,
29
- "loss": 8.5658,
30
- "step": 6
 
 
 
 
 
 
 
31
  },
32
  {
33
  "epoch": 2.56,
34
- "grad_norm": 1.780813455581665,
35
  "learning_rate": 6.666666666666667e-05,
36
- "loss": 8.1075,
37
- "step": 8
 
 
 
 
 
 
 
38
  },
39
  {
40
  "epoch": 3.2,
41
- "grad_norm": 1.9974435567855835,
42
  "learning_rate": 3.3333333333333335e-05,
43
- "loss": 7.9692,
44
- "step": 10
 
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 3.84,
48
- "grad_norm": 2.1069281101226807,
49
  "learning_rate": 0.0,
50
- "loss": 7.7777,
51
- "step": 12
52
  },
53
  {
54
  "epoch": 3.84,
55
- "step": 12,
56
- "total_flos": 53380916718552.0,
57
- "train_loss": 8.46463712056478,
58
- "train_runtime": 308.5463,
59
- "train_samples_per_second": 0.648,
60
- "train_steps_per_second": 0.039
61
  }
62
  ],
63
  "logging_steps": 2,
64
- "max_steps": 12,
65
  "num_input_tokens_seen": 0,
66
  "num_train_epochs": 4,
67
  "save_steps": 500,
@@ -77,7 +119,7 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 53380916718552.0,
81
  "train_batch_size": 2,
82
  "trial_name": null,
83
  "trial_params": null
 
3
  "best_model_checkpoint": null,
4
  "epoch": 3.84,
5
  "eval_steps": 500,
6
+ "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.32,
13
+ "grad_norm": 1.0525139570236206,
14
+ "learning_rate": 0.00018333333333333334,
15
+ "loss": 9.5169,
16
+ "step": 2
17
+ },
18
  {
19
  "epoch": 0.64,
20
+ "grad_norm": 1.5870144367218018,
21
  "learning_rate": 0.0001666666666666667,
22
+ "loss": 9.1514,
23
+ "step": 4
24
+ },
25
+ {
26
+ "epoch": 0.96,
27
+ "grad_norm": 1.7580112218856812,
28
+ "learning_rate": 0.00015000000000000001,
29
+ "loss": 8.7557,
30
+ "step": 6
31
  },
32
  {
33
  "epoch": 1.28,
34
+ "grad_norm": 2.0391345024108887,
35
  "learning_rate": 0.00013333333333333334,
36
+ "loss": 8.2163,
37
+ "step": 8
38
+ },
39
+ {
40
+ "epoch": 1.6,
41
+ "grad_norm": 2.67323899269104,
42
+ "learning_rate": 0.00011666666666666668,
43
+ "loss": 7.9266,
44
+ "step": 10
45
  },
46
  {
47
  "epoch": 1.92,
48
+ "grad_norm": 1.5127941370010376,
49
  "learning_rate": 0.0001,
50
+ "loss": 7.6902,
51
+ "step": 12
52
+ },
53
+ {
54
+ "epoch": 2.24,
55
+ "grad_norm": 1.308194875717163,
56
+ "learning_rate": 8.333333333333334e-05,
57
+ "loss": 7.5201,
58
+ "step": 14
59
  },
60
  {
61
  "epoch": 2.56,
62
+ "grad_norm": 0.9491788148880005,
63
  "learning_rate": 6.666666666666667e-05,
64
+ "loss": 7.3685,
65
+ "step": 16
66
+ },
67
+ {
68
+ "epoch": 2.88,
69
+ "grad_norm": 1.15030837059021,
70
+ "learning_rate": 5e-05,
71
+ "loss": 7.3428,
72
+ "step": 18
73
  },
74
  {
75
  "epoch": 3.2,
76
+ "grad_norm": 0.9033100008964539,
77
  "learning_rate": 3.3333333333333335e-05,
78
+ "loss": 7.2229,
79
+ "step": 20
80
+ },
81
+ {
82
+ "epoch": 3.52,
83
+ "grad_norm": 1.4382227659225464,
84
+ "learning_rate": 1.6666666666666667e-05,
85
+ "loss": 7.3239,
86
+ "step": 22
87
  },
88
  {
89
  "epoch": 3.84,
90
+ "grad_norm": 0.7890676259994507,
91
  "learning_rate": 0.0,
92
+ "loss": 7.2211,
93
+ "step": 24
94
  },
95
  {
96
  "epoch": 3.84,
97
+ "step": 24,
98
+ "total_flos": 108535654138032.0,
99
+ "train_loss": 7.9380284150441485,
100
+ "train_runtime": 637.3795,
101
+ "train_samples_per_second": 0.628,
102
+ "train_steps_per_second": 0.038
103
  }
104
  ],
105
  "logging_steps": 2,
106
+ "max_steps": 24,
107
  "num_input_tokens_seen": 0,
108
  "num_train_epochs": 4,
109
  "save_steps": 500,
 
119
  "attributes": {}
120
  }
121
  },
122
+ "total_flos": 108535654138032.0,
123
  "train_batch_size": 2,
124
  "trial_name": null,
125
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c328fcaeeeba701381fe6382d962b94e36b5d2c8c205cebcd08a755b92628acf
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d26e79c9fb97880dd95e934c8113fd0b69eb285f4bdc928c70a8aa73b85d85c
3
  size 5368