kiranshivaraju commited on
Commit
fe2aee6
·
verified ·
1 Parent(s): 55bc39c

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.986342943854325,
3
+ "eval_loss": 0.35858574509620667,
4
+ "eval_recall": 0.9782797567332754,
5
+ "eval_runtime": 60.2263,
6
+ "eval_samples_per_second": 38.87,
7
+ "eval_steps_per_second": 1.229,
8
+ "total_flos": 1.1154611028241981e+19,
9
+ "train_loss": 0.4681245271510225,
10
+ "train_runtime": 1816.2465,
11
+ "train_samples_per_second": 34.801,
12
+ "train_steps_per_second": 0.271
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.986342943854325,
3
+ "eval_loss": 0.35858574509620667,
4
+ "eval_recall": 0.9782797567332754,
5
+ "eval_runtime": 60.2263,
6
+ "eval_samples_per_second": 38.87,
7
+ "eval_steps_per_second": 1.229
8
+ }
runs/Nov07_10-09-48_b095e30be617/events.out.tfevents.1730976191.b095e30be617.192.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e6e256b2fab5c081461ea1f3359098df7f02e8d92abb505017c90be1b53cc46
3
+ size 409
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.986342943854325,
3
+ "total_flos": 1.1154611028241981e+19,
4
+ "train_loss": 0.4681245271510225,
5
+ "train_runtime": 1816.2465,
6
+ "train_samples_per_second": 34.801,
7
+ "train_steps_per_second": 0.271
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9782797567332754,
3
+ "best_model_checkpoint": "convnext-large-224-finetuned-dog-vs-cat/checkpoint-492",
4
+ "epoch": 2.986342943854325,
5
+ "eval_steps": 500,
6
+ "global_step": 492,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06069802731411229,
13
+ "grad_norm": 1.2104277610778809,
14
+ "learning_rate": 1e-05,
15
+ "loss": 0.6885,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.12139605462822459,
20
+ "grad_norm": 1.1519063711166382,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.6854,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.18209408194233687,
27
+ "grad_norm": 1.203668475151062,
28
+ "learning_rate": 3e-05,
29
+ "loss": 0.6788,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.24279210925644917,
34
+ "grad_norm": 1.3230795860290527,
35
+ "learning_rate": 4e-05,
36
+ "loss": 0.6664,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.30349013657056145,
41
+ "grad_norm": 1.1332818269729614,
42
+ "learning_rate": 5e-05,
43
+ "loss": 0.6511,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.36418816388467373,
48
+ "grad_norm": 1.1424264907836914,
49
+ "learning_rate": 4.8868778280542986e-05,
50
+ "loss": 0.6321,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.424886191198786,
55
+ "grad_norm": 1.1293511390686035,
56
+ "learning_rate": 4.7737556561085976e-05,
57
+ "loss": 0.6115,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.48558421851289835,
62
+ "grad_norm": 1.1212339401245117,
63
+ "learning_rate": 4.660633484162896e-05,
64
+ "loss": 0.5957,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.5462822458270106,
69
+ "grad_norm": 0.9936708807945251,
70
+ "learning_rate": 4.547511312217195e-05,
71
+ "loss": 0.5815,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.6069802731411229,
76
+ "grad_norm": 1.0561779737472534,
77
+ "learning_rate": 4.434389140271493e-05,
78
+ "loss": 0.5583,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.6676783004552352,
83
+ "grad_norm": 0.9779506921768188,
84
+ "learning_rate": 4.321266968325792e-05,
85
+ "loss": 0.5541,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.7283763277693475,
90
+ "grad_norm": 0.8941373825073242,
91
+ "learning_rate": 4.2081447963800907e-05,
92
+ "loss": 0.5408,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.7890743550834598,
97
+ "grad_norm": 0.9414535164833069,
98
+ "learning_rate": 4.095022624434389e-05,
99
+ "loss": 0.5288,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.849772382397572,
104
+ "grad_norm": 0.8809621334075928,
105
+ "learning_rate": 3.981900452488688e-05,
106
+ "loss": 0.5194,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.9104704097116844,
111
+ "grad_norm": 0.8484501838684082,
112
+ "learning_rate": 3.868778280542987e-05,
113
+ "loss": 0.5038,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.9711684370257967,
118
+ "grad_norm": 0.8708747625350952,
119
+ "learning_rate": 3.7556561085972854e-05,
120
+ "loss": 0.496,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.9954476479514416,
125
+ "eval_loss": 0.4791725277900696,
126
+ "eval_recall": 0.9200695047784535,
127
+ "eval_runtime": 60.2024,
128
+ "eval_samples_per_second": 38.885,
129
+ "eval_steps_per_second": 1.229,
130
+ "step": 164
131
+ },
132
+ {
133
+ "epoch": 1.031866464339909,
134
+ "grad_norm": 0.8436847925186157,
135
+ "learning_rate": 3.642533936651584e-05,
136
+ "loss": 0.4861,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 1.0925644916540211,
141
+ "grad_norm": 0.7819939851760864,
142
+ "learning_rate": 3.529411764705883e-05,
143
+ "loss": 0.4844,
144
+ "step": 180
145
+ },
146
+ {
147
+ "epoch": 1.1532625189681336,
148
+ "grad_norm": 0.847568929195404,
149
+ "learning_rate": 3.416289592760181e-05,
150
+ "loss": 0.4673,
151
+ "step": 190
152
+ },
153
+ {
154
+ "epoch": 1.2139605462822458,
155
+ "grad_norm": 0.7669057846069336,
156
+ "learning_rate": 3.3031674208144794e-05,
157
+ "loss": 0.4583,
158
+ "step": 200
159
+ },
160
+ {
161
+ "epoch": 1.274658573596358,
162
+ "grad_norm": 0.766395092010498,
163
+ "learning_rate": 3.1900452488687784e-05,
164
+ "loss": 0.4473,
165
+ "step": 210
166
+ },
167
+ {
168
+ "epoch": 1.3353566009104705,
169
+ "grad_norm": 0.8044094443321228,
170
+ "learning_rate": 3.0769230769230774e-05,
171
+ "loss": 0.4531,
172
+ "step": 220
173
+ },
174
+ {
175
+ "epoch": 1.3960546282245827,
176
+ "grad_norm": 0.7774869799613953,
177
+ "learning_rate": 2.9638009049773758e-05,
178
+ "loss": 0.439,
179
+ "step": 230
180
+ },
181
+ {
182
+ "epoch": 1.456752655538695,
183
+ "grad_norm": 0.7320767641067505,
184
+ "learning_rate": 2.850678733031674e-05,
185
+ "loss": 0.436,
186
+ "step": 240
187
+ },
188
+ {
189
+ "epoch": 1.5174506828528074,
190
+ "grad_norm": 0.7127722501754761,
191
+ "learning_rate": 2.737556561085973e-05,
192
+ "loss": 0.4314,
193
+ "step": 250
194
+ },
195
+ {
196
+ "epoch": 1.5781487101669196,
197
+ "grad_norm": 0.7172250151634216,
198
+ "learning_rate": 2.6244343891402718e-05,
199
+ "loss": 0.4302,
200
+ "step": 260
201
+ },
202
+ {
203
+ "epoch": 1.6388467374810318,
204
+ "grad_norm": 0.7075967192649841,
205
+ "learning_rate": 2.51131221719457e-05,
206
+ "loss": 0.4174,
207
+ "step": 270
208
+ },
209
+ {
210
+ "epoch": 1.699544764795144,
211
+ "grad_norm": 0.7165055871009827,
212
+ "learning_rate": 2.3981900452488688e-05,
213
+ "loss": 0.4174,
214
+ "step": 280
215
+ },
216
+ {
217
+ "epoch": 1.7602427921092565,
218
+ "grad_norm": 0.6554096341133118,
219
+ "learning_rate": 2.2850678733031675e-05,
220
+ "loss": 0.4193,
221
+ "step": 290
222
+ },
223
+ {
224
+ "epoch": 1.8209408194233687,
225
+ "grad_norm": 0.7225818037986755,
226
+ "learning_rate": 2.1719457013574662e-05,
227
+ "loss": 0.3996,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 1.8816388467374812,
232
+ "grad_norm": 0.6873008012771606,
233
+ "learning_rate": 2.058823529411765e-05,
234
+ "loss": 0.4011,
235
+ "step": 310
236
+ },
237
+ {
238
+ "epoch": 1.9423368740515934,
239
+ "grad_norm": 0.6827677488327026,
240
+ "learning_rate": 1.9457013574660635e-05,
241
+ "loss": 0.3996,
242
+ "step": 320
243
+ },
244
+ {
245
+ "epoch": 1.9969650986342944,
246
+ "eval_loss": 0.3836117684841156,
247
+ "eval_recall": 0.9765421372719374,
248
+ "eval_runtime": 60.251,
249
+ "eval_samples_per_second": 38.854,
250
+ "eval_steps_per_second": 1.228,
251
+ "step": 329
252
+ },
253
+ {
254
+ "epoch": 2.0030349013657056,
255
+ "grad_norm": 0.6939449906349182,
256
+ "learning_rate": 1.832579185520362e-05,
257
+ "loss": 0.4015,
258
+ "step": 330
259
+ },
260
+ {
261
+ "epoch": 2.063732928679818,
262
+ "grad_norm": 0.6604830026626587,
263
+ "learning_rate": 1.7194570135746606e-05,
264
+ "loss": 0.3934,
265
+ "step": 340
266
+ },
267
+ {
268
+ "epoch": 2.12443095599393,
269
+ "grad_norm": 0.6591904759407043,
270
+ "learning_rate": 1.6063348416289596e-05,
271
+ "loss": 0.3956,
272
+ "step": 350
273
+ },
274
+ {
275
+ "epoch": 2.1851289833080423,
276
+ "grad_norm": 0.6776930689811707,
277
+ "learning_rate": 1.493212669683258e-05,
278
+ "loss": 0.3904,
279
+ "step": 360
280
+ },
281
+ {
282
+ "epoch": 2.245827010622155,
283
+ "grad_norm": 0.6641884446144104,
284
+ "learning_rate": 1.3800904977375568e-05,
285
+ "loss": 0.3836,
286
+ "step": 370
287
+ },
288
+ {
289
+ "epoch": 2.306525037936267,
290
+ "grad_norm": 0.6640185117721558,
291
+ "learning_rate": 1.2669683257918553e-05,
292
+ "loss": 0.3849,
293
+ "step": 380
294
+ },
295
+ {
296
+ "epoch": 2.3672230652503794,
297
+ "grad_norm": 0.6439262628555298,
298
+ "learning_rate": 1.153846153846154e-05,
299
+ "loss": 0.3847,
300
+ "step": 390
301
+ },
302
+ {
303
+ "epoch": 2.4279210925644916,
304
+ "grad_norm": 0.6991675496101379,
305
+ "learning_rate": 1.0407239819004526e-05,
306
+ "loss": 0.3789,
307
+ "step": 400
308
+ },
309
+ {
310
+ "epoch": 2.488619119878604,
311
+ "grad_norm": 0.6298201680183411,
312
+ "learning_rate": 9.276018099547511e-06,
313
+ "loss": 0.3769,
314
+ "step": 410
315
+ },
316
+ {
317
+ "epoch": 2.549317147192716,
318
+ "grad_norm": 0.6205734014511108,
319
+ "learning_rate": 8.144796380090498e-06,
320
+ "loss": 0.3769,
321
+ "step": 420
322
+ },
323
+ {
324
+ "epoch": 2.6100151745068283,
325
+ "grad_norm": 0.6230446696281433,
326
+ "learning_rate": 7.013574660633485e-06,
327
+ "loss": 0.3802,
328
+ "step": 430
329
+ },
330
+ {
331
+ "epoch": 2.670713201820941,
332
+ "grad_norm": 0.6412050127983093,
333
+ "learning_rate": 5.882352941176471e-06,
334
+ "loss": 0.3805,
335
+ "step": 440
336
+ },
337
+ {
338
+ "epoch": 2.731411229135053,
339
+ "grad_norm": 0.6700997948646545,
340
+ "learning_rate": 4.751131221719457e-06,
341
+ "loss": 0.3725,
342
+ "step": 450
343
+ },
344
+ {
345
+ "epoch": 2.7921092564491654,
346
+ "grad_norm": 0.6392331123352051,
347
+ "learning_rate": 3.619909502262444e-06,
348
+ "loss": 0.3729,
349
+ "step": 460
350
+ },
351
+ {
352
+ "epoch": 2.8528072837632776,
353
+ "grad_norm": 0.6410810351371765,
354
+ "learning_rate": 2.48868778280543e-06,
355
+ "loss": 0.3582,
356
+ "step": 470
357
+ },
358
+ {
359
+ "epoch": 2.91350531107739,
360
+ "grad_norm": 0.6067666411399841,
361
+ "learning_rate": 1.3574660633484164e-06,
362
+ "loss": 0.3758,
363
+ "step": 480
364
+ },
365
+ {
366
+ "epoch": 2.9742033383915025,
367
+ "grad_norm": 0.6219173669815063,
368
+ "learning_rate": 2.2624434389140275e-07,
369
+ "loss": 0.373,
370
+ "step": 490
371
+ },
372
+ {
373
+ "epoch": 2.986342943854325,
374
+ "eval_loss": 0.35858574509620667,
375
+ "eval_recall": 0.9782797567332754,
376
+ "eval_runtime": 61.7184,
377
+ "eval_samples_per_second": 37.93,
378
+ "eval_steps_per_second": 1.199,
379
+ "step": 492
380
+ },
381
+ {
382
+ "epoch": 2.986342943854325,
383
+ "step": 492,
384
+ "total_flos": 1.1154611028241981e+19,
385
+ "train_loss": 0.4681245271510225,
386
+ "train_runtime": 1816.2465,
387
+ "train_samples_per_second": 34.801,
388
+ "train_steps_per_second": 0.271
389
+ }
390
+ ],
391
+ "logging_steps": 10,
392
+ "max_steps": 492,
393
+ "num_input_tokens_seen": 0,
394
+ "num_train_epochs": 3,
395
+ "save_steps": 500,
396
+ "stateful_callbacks": {
397
+ "TrainerControl": {
398
+ "args": {
399
+ "should_epoch_stop": false,
400
+ "should_evaluate": false,
401
+ "should_log": false,
402
+ "should_save": true,
403
+ "should_training_stop": true
404
+ },
405
+ "attributes": {}
406
+ }
407
+ },
408
+ "total_flos": 1.1154611028241981e+19,
409
+ "train_batch_size": 32,
410
+ "trial_name": null,
411
+ "trial_params": null
412
+ }