marinone94 commited on
Commit
33fa953
β€’
1 Parent(s): bbba84f
{checkpoint-100 β†’ checkpoint-300}/config.json RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-300}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a37fc39f7fbbd9e4e27f32438be5a7defea135497c5dd3778613d2e5336ad19
3
- size 2490337361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae9e41884d3305d9a0797fd14fd35fc8e619e12951eedb6e1396e3ead551313
3
+ size 2490337809
{checkpoint-100 β†’ checkpoint-300}/preprocessor_config.json RENAMED
File without changes
{checkpoint-50 β†’ checkpoint-300}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80ebb6ebc7a95b13fda5cdc22fab5fcfb9ffdb99ca0102065a7147116a7f9f3f
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba896f27a39cdc0a9c70e56aeff43d05c53ba85e3a65d2c02f13edcce3b7ac8
3
  size 1262063089
{checkpoint-50 β†’ checkpoint-300}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fe8f634109af89a7e3706d16f2fb93f152eb68502ebee688d78216511e43324
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dad8817bedcf4b45f9a2d36af4404126640a18d28d8189d505ec2ab56d4b6a1
3
  size 14503
{checkpoint-50 β†’ checkpoint-300}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83a7ca717969781e5bd7f66c4b808307bbfaef76d5ae72d82fe31d60dea27fb7
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0967b9f865f16344c55f5ccc3cf7d6e8e97ca61dda304e931ca6bad130f48dd1
3
  size 559
{checkpoint-100 β†’ checkpoint-300}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48ba5c556f6ae479cce41c7f298c01a0d3452634d5c926dc7e48294e0c69c304
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303fe9412b2bbace437d42323a895571355b0ae20c611a474d9492be12287f9c
3
  size 623
{checkpoint-100 β†’ checkpoint-300}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.281150159744409,
5
- "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -54,11 +54,107 @@
54
  "eval_steps_per_second": 1.239,
55
  "eval_wer": 1.0,
56
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  }
58
  ],
59
  "max_steps": 780,
60
  "num_train_epochs": 10,
61
- "total_flos": 1.5267594688539034e+18,
62
  "trial_name": null,
63
  "trial_params": null
64
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.8434504792332267,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
54
  "eval_steps_per_second": 1.239,
55
  "eval_wer": 1.0,
56
  "step": 100
57
+ },
58
+ {
59
+ "epoch": 1.54,
60
+ "learning_rate": 0.0006346153846153846,
61
+ "loss": 3.0159,
62
+ "step": 120
63
+ },
64
+ {
65
+ "epoch": 1.79,
66
+ "learning_rate": 0.0006153846153846154,
67
+ "loss": 2.977,
68
+ "step": 140
69
+ },
70
+ {
71
+ "epoch": 1.92,
72
+ "eval_loss": 2.973355770111084,
73
+ "eval_runtime": 3.1716,
74
+ "eval_samples_per_second": 31.53,
75
+ "eval_steps_per_second": 1.261,
76
+ "eval_wer": 1.0,
77
+ "step": 150
78
+ },
79
+ {
80
+ "epoch": 2.05,
81
+ "learning_rate": 0.0005961538461538461,
82
+ "loss": 3.0056,
83
+ "step": 160
84
+ },
85
+ {
86
+ "epoch": 2.31,
87
+ "learning_rate": 0.000576923076923077,
88
+ "loss": 2.9591,
89
+ "step": 180
90
+ },
91
+ {
92
+ "epoch": 2.56,
93
+ "learning_rate": 0.0005576923076923078,
94
+ "loss": 2.9398,
95
+ "step": 200
96
+ },
97
+ {
98
+ "epoch": 2.56,
99
+ "eval_loss": 2.940675735473633,
100
+ "eval_runtime": 3.1691,
101
+ "eval_samples_per_second": 31.554,
102
+ "eval_steps_per_second": 1.262,
103
+ "eval_wer": 1.0,
104
+ "step": 200
105
+ },
106
+ {
107
+ "epoch": 2.82,
108
+ "learning_rate": 0.0005384615384615384,
109
+ "loss": 2.9295,
110
+ "step": 220
111
+ },
112
+ {
113
+ "epoch": 3.08,
114
+ "learning_rate": 0.0005192307692307692,
115
+ "loss": 2.9465,
116
+ "step": 240
117
+ },
118
+ {
119
+ "epoch": 3.2,
120
+ "eval_loss": 2.835913896560669,
121
+ "eval_runtime": 3.1824,
122
+ "eval_samples_per_second": 31.423,
123
+ "eval_steps_per_second": 1.257,
124
+ "eval_wer": 1.0,
125
+ "step": 250
126
+ },
127
+ {
128
+ "epoch": 3.33,
129
+ "learning_rate": 0.0005,
130
+ "loss": 2.9053,
131
+ "step": 260
132
+ },
133
+ {
134
+ "epoch": 3.59,
135
+ "learning_rate": 0.0004807692307692308,
136
+ "loss": 2.899,
137
+ "step": 280
138
+ },
139
+ {
140
+ "epoch": 3.84,
141
+ "learning_rate": 0.0004615384615384616,
142
+ "loss": 2.8861,
143
+ "step": 300
144
+ },
145
+ {
146
+ "epoch": 3.84,
147
+ "eval_loss": 2.8373122215270996,
148
+ "eval_runtime": 3.1455,
149
+ "eval_samples_per_second": 31.791,
150
+ "eval_steps_per_second": 1.272,
151
+ "eval_wer": 1.0,
152
+ "step": 300
153
  }
154
  ],
155
  "max_steps": 780,
156
  "num_train_epochs": 10,
157
+ "total_flos": 4.5237874175174973e+18,
158
  "trial_name": null,
159
  "trial_params": null
160
  }
{checkpoint-100 β†’ checkpoint-300}/training_args.bin RENAMED
File without changes
{checkpoint-50 β†’ checkpoint-350}/config.json RENAMED
File without changes
{checkpoint-50 β†’ checkpoint-350}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f8c3352ebf31a7c0483167400d3399b22ccc2be15a1cf307f0d154c758e9499
3
- size 2490337361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ac1e891bd6525ca6e80b6eb98bd75676f15de9c2c7ed28a04409bd578ec0e5
3
+ size 2490337809
{checkpoint-50 β†’ checkpoint-350}/preprocessor_config.json RENAMED
File without changes
{checkpoint-100 β†’ checkpoint-350}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b71ffcd1b91ea379737ddde0779f635b716bdd28dd2319a1848a317ef50fd710
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9143d1cdbe52a5a78c256f6c9739fca212258b1e2cd7f7a50eac95314a70e3a3
3
  size 1262063089
{checkpoint-100 β†’ checkpoint-350}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0b19e6c7c1493b5479a5fdad58d60d11a146aedd47536529f399dc05f5b158e
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e73f3035f0a34c54cee6bdff78f510ce1bacc7a1d34681e6733f0c9a19ff6280
3
  size 14567
{checkpoint-100 β†’ checkpoint-350}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:416c647a9555a5cdcb63e21cfbb531c48e435b4c488666f2c461c0870bce7d8f
3
  size 559
{checkpoint-50 β†’ checkpoint-350}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb5336e922700acd511fdefd5fbbe9ccf91f59901dbca8c97f53c0892943c4b7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55a5628618be5d495f517856cf6ffc4824491879e5d2a0c2d74458d24b484021
3
  size 623
checkpoint-350/trainer_state.json ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.485623003194888,
5
+ "global_step": 350,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.26,
12
+ "learning_rate": 0.0007307692307692308,
13
+ "loss": 4.2559,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.51,
18
+ "learning_rate": 0.0007115384615384615,
19
+ "loss": 3.068,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.64,
24
+ "eval_loss": 3.058549165725708,
25
+ "eval_runtime": 3.3353,
26
+ "eval_samples_per_second": 29.982,
27
+ "eval_steps_per_second": 1.199,
28
+ "eval_wer": 1.0,
29
+ "step": 50
30
+ },
31
+ {
32
+ "epoch": 0.77,
33
+ "learning_rate": 0.0006923076923076924,
34
+ "loss": 3.0637,
35
+ "step": 60
36
+ },
37
+ {
38
+ "epoch": 1.03,
39
+ "learning_rate": 0.0006730769230769232,
40
+ "loss": 3.0752,
41
+ "step": 80
42
+ },
43
+ {
44
+ "epoch": 1.28,
45
+ "learning_rate": 0.0006538461538461538,
46
+ "loss": 3.0083,
47
+ "step": 100
48
+ },
49
+ {
50
+ "epoch": 1.28,
51
+ "eval_loss": 3.1246652603149414,
52
+ "eval_runtime": 3.2285,
53
+ "eval_samples_per_second": 30.974,
54
+ "eval_steps_per_second": 1.239,
55
+ "eval_wer": 1.0,
56
+ "step": 100
57
+ },
58
+ {
59
+ "epoch": 1.54,
60
+ "learning_rate": 0.0006346153846153846,
61
+ "loss": 3.0159,
62
+ "step": 120
63
+ },
64
+ {
65
+ "epoch": 1.79,
66
+ "learning_rate": 0.0006153846153846154,
67
+ "loss": 2.977,
68
+ "step": 140
69
+ },
70
+ {
71
+ "epoch": 1.92,
72
+ "eval_loss": 2.973355770111084,
73
+ "eval_runtime": 3.1716,
74
+ "eval_samples_per_second": 31.53,
75
+ "eval_steps_per_second": 1.261,
76
+ "eval_wer": 1.0,
77
+ "step": 150
78
+ },
79
+ {
80
+ "epoch": 2.05,
81
+ "learning_rate": 0.0005961538461538461,
82
+ "loss": 3.0056,
83
+ "step": 160
84
+ },
85
+ {
86
+ "epoch": 2.31,
87
+ "learning_rate": 0.000576923076923077,
88
+ "loss": 2.9591,
89
+ "step": 180
90
+ },
91
+ {
92
+ "epoch": 2.56,
93
+ "learning_rate": 0.0005576923076923078,
94
+ "loss": 2.9398,
95
+ "step": 200
96
+ },
97
+ {
98
+ "epoch": 2.56,
99
+ "eval_loss": 2.940675735473633,
100
+ "eval_runtime": 3.1691,
101
+ "eval_samples_per_second": 31.554,
102
+ "eval_steps_per_second": 1.262,
103
+ "eval_wer": 1.0,
104
+ "step": 200
105
+ },
106
+ {
107
+ "epoch": 2.82,
108
+ "learning_rate": 0.0005384615384615384,
109
+ "loss": 2.9295,
110
+ "step": 220
111
+ },
112
+ {
113
+ "epoch": 3.08,
114
+ "learning_rate": 0.0005192307692307692,
115
+ "loss": 2.9465,
116
+ "step": 240
117
+ },
118
+ {
119
+ "epoch": 3.2,
120
+ "eval_loss": 2.835913896560669,
121
+ "eval_runtime": 3.1824,
122
+ "eval_samples_per_second": 31.423,
123
+ "eval_steps_per_second": 1.257,
124
+ "eval_wer": 1.0,
125
+ "step": 250
126
+ },
127
+ {
128
+ "epoch": 3.33,
129
+ "learning_rate": 0.0005,
130
+ "loss": 2.9053,
131
+ "step": 260
132
+ },
133
+ {
134
+ "epoch": 3.59,
135
+ "learning_rate": 0.0004807692307692308,
136
+ "loss": 2.899,
137
+ "step": 280
138
+ },
139
+ {
140
+ "epoch": 3.84,
141
+ "learning_rate": 0.0004615384615384616,
142
+ "loss": 2.8861,
143
+ "step": 300
144
+ },
145
+ {
146
+ "epoch": 3.84,
147
+ "eval_loss": 2.8373122215270996,
148
+ "eval_runtime": 3.1455,
149
+ "eval_samples_per_second": 31.791,
150
+ "eval_steps_per_second": 1.272,
151
+ "eval_wer": 1.0,
152
+ "step": 300
153
+ },
154
+ {
155
+ "epoch": 4.1,
156
+ "learning_rate": 0.0004423076923076923,
157
+ "loss": 2.9255,
158
+ "step": 320
159
+ },
160
+ {
161
+ "epoch": 4.36,
162
+ "learning_rate": 0.0004230769230769231,
163
+ "loss": 2.884,
164
+ "step": 340
165
+ },
166
+ {
167
+ "epoch": 4.49,
168
+ "eval_loss": 2.815220832824707,
169
+ "eval_runtime": 3.2038,
170
+ "eval_samples_per_second": 31.213,
171
+ "eval_steps_per_second": 1.249,
172
+ "eval_wer": 1.0476190476190477,
173
+ "step": 350
174
+ }
175
+ ],
176
+ "max_steps": 780,
177
+ "num_train_epochs": 10,
178
+ "total_flos": 5.238613498111745e+18,
179
+ "trial_name": null,
180
+ "trial_params": null
181
+ }
{checkpoint-50 β†’ checkpoint-350}/training_args.bin RENAMED
File without changes
checkpoint-50/trainer_state.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.6389776357827476,
5
- "global_step": 50,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.26,
12
- "learning_rate": 0.0007307692307692308,
13
- "loss": 4.2559,
14
- "step": 20
15
- },
16
- {
17
- "epoch": 0.51,
18
- "learning_rate": 0.0007115384615384615,
19
- "loss": 3.068,
20
- "step": 40
21
- },
22
- {
23
- "epoch": 0.64,
24
- "eval_loss": 3.058549165725708,
25
- "eval_runtime": 3.3353,
26
- "eval_samples_per_second": 29.982,
27
- "eval_steps_per_second": 1.199,
28
- "eval_wer": 1.0,
29
- "step": 50
30
- }
31
- ],
32
- "max_steps": 780,
33
- "num_train_epochs": 10,
34
- "total_flos": 7.755792033779712e+17,
35
- "trial_name": null,
36
- "trial_params": null
37
- }