OneNorth commited on
Commit
24220f6
1 Parent(s): ad0810d

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.5,
3
- "eval_accuracy": 0.8193548387096774,
4
- "eval_loss": 0.6981304287910461,
5
- "eval_runtime": 28.8048,
6
- "eval_samples_per_second": 5.381,
7
- "eval_steps_per_second": 1.354
8
  }
 
1
  {
2
+ "epoch": 5.17,
3
+ "eval_accuracy": 0.8516129032258064,
4
+ "eval_loss": 0.6185442209243774,
5
+ "eval_runtime": 28.1289,
6
+ "eval_samples_per_second": 5.51,
7
+ "eval_steps_per_second": 1.386
8
  }
runs/May24_04-01-25_d951cd11c68c/events.out.tfevents.1716524277.d951cd11c68c.34.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3a2927ac2490e8519c46d6b6377b8c558043bc7c6f7a565eb8c4618c9e2f190
3
- size 411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e829316cd6256fb76eb813630c9f1e6458c019496a19a9aa23b4fbe0c27331
3
+ size 734
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.5,
3
- "eval_accuracy": 0.8193548387096774,
4
- "eval_loss": 0.6981304287910461,
5
- "eval_runtime": 28.8048,
6
- "eval_samples_per_second": 5.381,
7
- "eval_steps_per_second": 1.354
8
  }
 
1
  {
2
+ "epoch": 5.17,
3
+ "eval_accuracy": 0.8516129032258064,
4
+ "eval_loss": 0.6185442209243774,
5
+ "eval_runtime": 28.1289,
6
+ "eval_samples_per_second": 5.51,
7
+ "eval_steps_per_second": 1.386
8
  }
trainer_state.json CHANGED
@@ -1,170 +1,416 @@
1
  {
2
- "best_metric": 0.9428571428571428,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-150",
4
- "epoch": 1.5,
5
  "eval_steps": 500,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.07,
13
- "grad_norm": 0.07965963333845139,
14
- "learning_rate": 3.3333333333333335e-05,
15
- "loss": 0.005,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.13,
20
- "grad_norm": 0.08289032429456711,
21
- "learning_rate": 4.814814814814815e-05,
22
- "loss": 0.201,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.2,
27
- "grad_norm": 0.2235719859600067,
28
- "learning_rate": 4.4444444444444447e-05,
29
- "loss": 0.5754,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.27,
34
- "grad_norm": 0.0644197091460228,
35
- "learning_rate": 4.074074074074074e-05,
36
- "loss": 1.1323,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.33,
41
- "grad_norm": 42.82566452026367,
42
- "learning_rate": 3.7037037037037037e-05,
43
- "loss": 0.2046,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.4,
48
- "grad_norm": 0.10422177612781525,
49
- "learning_rate": 3.3333333333333335e-05,
50
- "loss": 0.2749,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.47,
55
- "grad_norm": 0.051237553358078,
56
- "learning_rate": 2.962962962962963e-05,
57
- "loss": 0.0433,
58
  "step": 70
59
  },
60
  {
61
- "epoch": 0.5,
62
- "eval_accuracy": 0.8857142857142857,
63
- "eval_loss": 0.356876015663147,
64
- "eval_runtime": 12.9523,
65
- "eval_samples_per_second": 5.404,
66
- "eval_steps_per_second": 1.39,
67
  "step": 75
68
  },
69
  {
70
- "epoch": 1.03,
71
- "grad_norm": 0.14869919419288635,
72
- "learning_rate": 2.5925925925925925e-05,
73
- "loss": 0.1337,
74
  "step": 80
75
  },
76
  {
77
- "epoch": 1.1,
78
- "grad_norm": 0.21503913402557373,
79
- "learning_rate": 2.2222222222222223e-05,
80
- "loss": 0.061,
81
  "step": 90
82
  },
83
  {
84
- "epoch": 1.17,
85
- "grad_norm": 0.1664995551109314,
86
- "learning_rate": 1.8518518518518518e-05,
87
- "loss": 0.226,
88
  "step": 100
89
  },
90
  {
91
- "epoch": 1.23,
92
- "grad_norm": 0.13239744305610657,
93
- "learning_rate": 1.4814814814814815e-05,
94
- "loss": 0.1242,
95
  "step": 110
96
  },
97
  {
98
- "epoch": 1.3,
99
- "grad_norm": 0.032610807567834854,
100
- "learning_rate": 1.1111111111111112e-05,
101
- "loss": 0.1647,
102
  "step": 120
103
  },
104
  {
105
- "epoch": 1.37,
106
- "grad_norm": 0.5573199391365051,
107
- "learning_rate": 7.4074074074074075e-06,
108
- "loss": 0.1372,
109
  "step": 130
110
  },
111
  {
112
- "epoch": 1.43,
113
- "grad_norm": 0.3077225685119629,
114
- "learning_rate": 3.7037037037037037e-06,
115
- "loss": 0.0047,
116
  "step": 140
117
  },
118
  {
119
- "epoch": 1.5,
120
- "grad_norm": 0.22845041751861572,
121
- "learning_rate": 0.0,
122
- "loss": 0.0047,
123
  "step": 150
124
  },
125
  {
126
- "epoch": 1.5,
127
- "eval_accuracy": 0.9428571428571428,
128
- "eval_loss": 0.3063296973705292,
129
- "eval_runtime": 12.6147,
130
- "eval_samples_per_second": 5.549,
131
- "eval_steps_per_second": 1.427,
132
  "step": 150
133
  },
134
  {
135
- "epoch": 1.5,
136
- "step": 150,
137
- "total_flos": 7.47692094062592e+17,
138
- "train_loss": 0.2195048264414072,
139
- "train_runtime": 232.8628,
140
- "train_samples_per_second": 2.577,
141
- "train_steps_per_second": 0.644
142
  },
143
  {
144
- "epoch": 1.5,
145
- "eval_accuracy": 0.8205128205128205,
146
- "eval_loss": 0.6944047212600708,
147
- "eval_runtime": 28.6136,
148
- "eval_samples_per_second": 5.452,
149
- "eval_steps_per_second": 1.363,
150
- "step": 150
151
  },
152
  {
153
- "epoch": 1.5,
154
- "eval_accuracy": 0.8193548387096774,
155
- "eval_loss": 0.6981304287910461,
156
- "eval_runtime": 28.8048,
157
- "eval_samples_per_second": 5.381,
158
- "eval_steps_per_second": 1.354,
159
- "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 10,
163
- "max_steps": 150,
164
  "num_input_tokens_seen": 0,
165
  "num_train_epochs": 9223372036854775807,
166
  "save_steps": 500,
167
- "total_flos": 7.47692094062592e+17,
168
  "train_batch_size": 4,
169
  "trial_name": null,
170
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9714285714285714,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-375",
4
+ "epoch": 5.166666666666667,
5
  "eval_steps": 500,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02,
13
+ "grad_norm": 9.09181022644043,
14
+ "learning_rate": 1.1111111111111112e-05,
15
+ "loss": 2.3453,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.04,
20
+ "grad_norm": 9.389509201049805,
21
+ "learning_rate": 2.2222222222222223e-05,
22
+ "loss": 2.3958,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.07,
27
+ "grad_norm": 9.11400032043457,
28
+ "learning_rate": 3.3333333333333335e-05,
29
+ "loss": 2.2492,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.09,
34
+ "grad_norm": 9.454550743103027,
35
+ "learning_rate": 4.4444444444444447e-05,
36
+ "loss": 2.1811,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.11,
41
+ "grad_norm": 12.871296882629395,
42
+ "learning_rate": 4.938271604938271e-05,
43
+ "loss": 2.1261,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.13,
48
+ "grad_norm": 9.507197380065918,
49
+ "learning_rate": 4.814814814814815e-05,
50
+ "loss": 2.0149,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 0.16,
55
+ "grad_norm": 12.240191459655762,
56
+ "learning_rate": 4.691358024691358e-05,
57
+ "loss": 1.9248,
58
  "step": 70
59
  },
60
  {
61
+ "epoch": 0.17,
62
+ "eval_accuracy": 0.3142857142857143,
63
+ "eval_loss": 1.6910260915756226,
64
+ "eval_runtime": 13.1007,
65
+ "eval_samples_per_second": 5.343,
66
+ "eval_steps_per_second": 1.374,
67
  "step": 75
68
  },
69
  {
70
+ "epoch": 1.01,
71
+ "grad_norm": 13.400834083557129,
72
+ "learning_rate": 4.567901234567901e-05,
73
+ "loss": 1.4961,
74
  "step": 80
75
  },
76
  {
77
+ "epoch": 1.03,
78
+ "grad_norm": 12.832711219787598,
79
+ "learning_rate": 4.4444444444444447e-05,
80
+ "loss": 1.4544,
81
  "step": 90
82
  },
83
  {
84
+ "epoch": 1.06,
85
+ "grad_norm": 11.878938674926758,
86
+ "learning_rate": 4.3209876543209875e-05,
87
+ "loss": 1.3226,
88
  "step": 100
89
  },
90
  {
91
+ "epoch": 1.08,
92
+ "grad_norm": 8.267742156982422,
93
+ "learning_rate": 4.197530864197531e-05,
94
+ "loss": 0.9212,
95
  "step": 110
96
  },
97
  {
98
+ "epoch": 1.1,
99
+ "grad_norm": 15.088278770446777,
100
+ "learning_rate": 4.074074074074074e-05,
101
+ "loss": 0.8928,
102
  "step": 120
103
  },
104
  {
105
+ "epoch": 1.12,
106
+ "grad_norm": 14.688928604125977,
107
+ "learning_rate": 3.950617283950617e-05,
108
+ "loss": 0.6026,
109
  "step": 130
110
  },
111
  {
112
+ "epoch": 1.14,
113
+ "grad_norm": 10.090428352355957,
114
+ "learning_rate": 3.82716049382716e-05,
115
+ "loss": 0.7323,
116
  "step": 140
117
  },
118
  {
119
+ "epoch": 1.17,
120
+ "grad_norm": 28.017093658447266,
121
+ "learning_rate": 3.7037037037037037e-05,
122
+ "loss": 1.1184,
123
  "step": 150
124
  },
125
  {
126
+ "epoch": 1.17,
127
+ "eval_accuracy": 0.7,
128
+ "eval_loss": 0.8519060611724854,
129
+ "eval_runtime": 12.8721,
130
+ "eval_samples_per_second": 5.438,
131
+ "eval_steps_per_second": 1.398,
132
  "step": 150
133
  },
134
  {
135
+ "epoch": 2.02,
136
+ "grad_norm": 7.826801776885986,
137
+ "learning_rate": 3.580246913580247e-05,
138
+ "loss": 0.6328,
139
+ "step": 160
 
 
140
  },
141
  {
142
+ "epoch": 2.04,
143
+ "grad_norm": 17.076839447021484,
144
+ "learning_rate": 3.45679012345679e-05,
145
+ "loss": 0.5843,
146
+ "step": 170
 
 
147
  },
148
  {
149
+ "epoch": 2.07,
150
+ "grad_norm": 8.304590225219727,
151
+ "learning_rate": 3.3333333333333335e-05,
152
+ "loss": 0.7969,
153
+ "step": 180
154
+ },
155
+ {
156
+ "epoch": 2.09,
157
+ "grad_norm": 7.5533223152160645,
158
+ "learning_rate": 3.209876543209876e-05,
159
+ "loss": 0.5387,
160
+ "step": 190
161
+ },
162
+ {
163
+ "epoch": 2.11,
164
+ "grad_norm": 3.7337615489959717,
165
+ "learning_rate": 3.08641975308642e-05,
166
+ "loss": 0.5673,
167
+ "step": 200
168
+ },
169
+ {
170
+ "epoch": 2.13,
171
+ "grad_norm": 23.18366050720215,
172
+ "learning_rate": 2.962962962962963e-05,
173
+ "loss": 0.5191,
174
+ "step": 210
175
+ },
176
+ {
177
+ "epoch": 2.16,
178
+ "grad_norm": 11.092031478881836,
179
+ "learning_rate": 2.839506172839506e-05,
180
+ "loss": 0.3505,
181
+ "step": 220
182
+ },
183
+ {
184
+ "epoch": 2.17,
185
+ "eval_accuracy": 0.7857142857142857,
186
+ "eval_loss": 0.5482387542724609,
187
+ "eval_runtime": 12.8506,
188
+ "eval_samples_per_second": 5.447,
189
+ "eval_steps_per_second": 1.401,
190
+ "step": 225
191
+ },
192
+ {
193
+ "epoch": 3.01,
194
+ "grad_norm": 1.397813320159912,
195
+ "learning_rate": 2.7160493827160493e-05,
196
+ "loss": 0.2652,
197
+ "step": 230
198
+ },
199
+ {
200
+ "epoch": 3.03,
201
+ "grad_norm": 2.130690574645996,
202
+ "learning_rate": 2.5925925925925925e-05,
203
+ "loss": 0.1691,
204
+ "step": 240
205
+ },
206
+ {
207
+ "epoch": 3.06,
208
+ "grad_norm": 7.323586940765381,
209
+ "learning_rate": 2.4691358024691357e-05,
210
+ "loss": 0.1669,
211
+ "step": 250
212
+ },
213
+ {
214
+ "epoch": 3.08,
215
+ "grad_norm": 5.903147220611572,
216
+ "learning_rate": 2.345679012345679e-05,
217
+ "loss": 0.324,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 3.1,
222
+ "grad_norm": 24.850133895874023,
223
+ "learning_rate": 2.2222222222222223e-05,
224
+ "loss": 0.4857,
225
+ "step": 270
226
+ },
227
+ {
228
+ "epoch": 3.12,
229
+ "grad_norm": 1.1520411968231201,
230
+ "learning_rate": 2.0987654320987655e-05,
231
+ "loss": 0.231,
232
+ "step": 280
233
+ },
234
+ {
235
+ "epoch": 3.14,
236
+ "grad_norm": 0.6035730838775635,
237
+ "learning_rate": 1.9753086419753087e-05,
238
+ "loss": 0.3852,
239
+ "step": 290
240
+ },
241
+ {
242
+ "epoch": 3.17,
243
+ "grad_norm": 18.224578857421875,
244
+ "learning_rate": 1.8518518518518518e-05,
245
+ "loss": 0.2939,
246
+ "step": 300
247
+ },
248
+ {
249
+ "epoch": 3.17,
250
+ "eval_accuracy": 0.8285714285714286,
251
+ "eval_loss": 0.5074731111526489,
252
+ "eval_runtime": 12.6199,
253
+ "eval_samples_per_second": 5.547,
254
+ "eval_steps_per_second": 1.426,
255
+ "step": 300
256
+ },
257
+ {
258
+ "epoch": 4.02,
259
+ "grad_norm": 4.320995330810547,
260
+ "learning_rate": 1.728395061728395e-05,
261
+ "loss": 0.1894,
262
+ "step": 310
263
+ },
264
+ {
265
+ "epoch": 4.04,
266
+ "grad_norm": 4.220107555389404,
267
+ "learning_rate": 1.604938271604938e-05,
268
+ "loss": 0.2997,
269
+ "step": 320
270
+ },
271
+ {
272
+ "epoch": 4.07,
273
+ "grad_norm": 0.14674872159957886,
274
+ "learning_rate": 1.4814814814814815e-05,
275
+ "loss": 0.2471,
276
+ "step": 330
277
+ },
278
+ {
279
+ "epoch": 4.09,
280
+ "grad_norm": 1.210436224937439,
281
+ "learning_rate": 1.3580246913580247e-05,
282
+ "loss": 0.1045,
283
+ "step": 340
284
+ },
285
+ {
286
+ "epoch": 4.11,
287
+ "grad_norm": 0.28732016682624817,
288
+ "learning_rate": 1.2345679012345678e-05,
289
+ "loss": 0.0638,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 4.13,
294
+ "grad_norm": 0.1985640674829483,
295
+ "learning_rate": 1.1111111111111112e-05,
296
+ "loss": 0.1419,
297
+ "step": 360
298
+ },
299
+ {
300
+ "epoch": 4.16,
301
+ "grad_norm": 0.1826096773147583,
302
+ "learning_rate": 9.876543209876543e-06,
303
+ "loss": 0.0447,
304
+ "step": 370
305
+ },
306
+ {
307
+ "epoch": 4.17,
308
+ "eval_accuracy": 0.9714285714285714,
309
+ "eval_loss": 0.1740979254245758,
310
+ "eval_runtime": 12.7121,
311
+ "eval_samples_per_second": 5.507,
312
+ "eval_steps_per_second": 1.416,
313
+ "step": 375
314
+ },
315
+ {
316
+ "epoch": 5.01,
317
+ "grad_norm": 0.4490085244178772,
318
+ "learning_rate": 8.641975308641975e-06,
319
+ "loss": 0.0338,
320
+ "step": 380
321
+ },
322
+ {
323
+ "epoch": 5.03,
324
+ "grad_norm": 0.615115761756897,
325
+ "learning_rate": 7.4074074074074075e-06,
326
+ "loss": 0.1366,
327
+ "step": 390
328
+ },
329
+ {
330
+ "epoch": 5.06,
331
+ "grad_norm": 0.19469819962978363,
332
+ "learning_rate": 6.172839506172839e-06,
333
+ "loss": 0.2805,
334
+ "step": 400
335
+ },
336
+ {
337
+ "epoch": 5.08,
338
+ "grad_norm": 0.10871770232915878,
339
+ "learning_rate": 4.938271604938272e-06,
340
+ "loss": 0.1858,
341
+ "step": 410
342
+ },
343
+ {
344
+ "epoch": 5.1,
345
+ "grad_norm": 1.5411165952682495,
346
+ "learning_rate": 3.7037037037037037e-06,
347
+ "loss": 0.0189,
348
+ "step": 420
349
+ },
350
+ {
351
+ "epoch": 5.12,
352
+ "grad_norm": 10.748401641845703,
353
+ "learning_rate": 2.469135802469136e-06,
354
+ "loss": 0.0882,
355
+ "step": 430
356
+ },
357
+ {
358
+ "epoch": 5.14,
359
+ "grad_norm": 0.13508407771587372,
360
+ "learning_rate": 1.234567901234568e-06,
361
+ "loss": 0.1669,
362
+ "step": 440
363
+ },
364
+ {
365
+ "epoch": 5.17,
366
+ "grad_norm": 6.92973518371582,
367
+ "learning_rate": 0.0,
368
+ "loss": 0.0643,
369
+ "step": 450
370
+ },
371
+ {
372
+ "epoch": 5.17,
373
+ "eval_accuracy": 0.9571428571428572,
374
+ "eval_loss": 0.1370178759098053,
375
+ "eval_runtime": 12.713,
376
+ "eval_samples_per_second": 5.506,
377
+ "eval_steps_per_second": 1.416,
378
+ "step": 450
379
+ },
380
+ {
381
+ "epoch": 5.17,
382
+ "step": 450,
383
+ "total_flos": 2.243076282187776e+18,
384
+ "train_loss": 0.7145391458272934,
385
+ "train_runtime": 727.1717,
386
+ "train_samples_per_second": 2.475,
387
+ "train_steps_per_second": 0.619
388
+ },
389
+ {
390
+ "epoch": 5.17,
391
+ "eval_accuracy": 0.8516129032258064,
392
+ "eval_loss": 0.6185441613197327,
393
+ "eval_runtime": 29.0205,
394
+ "eval_samples_per_second": 5.341,
395
+ "eval_steps_per_second": 1.344,
396
+ "step": 450
397
+ },
398
+ {
399
+ "epoch": 5.17,
400
+ "eval_accuracy": 0.8516129032258064,
401
+ "eval_loss": 0.6185442209243774,
402
+ "eval_runtime": 28.1289,
403
+ "eval_samples_per_second": 5.51,
404
+ "eval_steps_per_second": 1.386,
405
+ "step": 450
406
  }
407
  ],
408
  "logging_steps": 10,
409
+ "max_steps": 450,
410
  "num_input_tokens_seen": 0,
411
  "num_train_epochs": 9223372036854775807,
412
  "save_steps": 500,
413
+ "total_flos": 2.243076282187776e+18,
414
  "train_batch_size": 4,
415
  "trial_name": null,
416
  "trial_params": null