dima806 commited on
Commit
a7efa64
1 Parent(s): 64afac1

Upload folder using huggingface_hub

Browse files
checkpoint-22855/config.json CHANGED
@@ -670,5 +670,5 @@
670
  "problem_type": "single_label_classification",
671
  "qkv_bias": true,
672
  "torch_dtype": "float32",
673
- "transformers_version": "4.44.2"
674
  }
 
670
  "problem_type": "single_label_classification",
671
  "qkv_bias": true,
672
  "torch_dtype": "float32",
673
+ "transformers_version": "4.45.1"
674
  }
checkpoint-22855/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec58008d55914ed465e2a0ed073cca3a4ef11a24b5da8c5b0920b4af9a18a11e
3
  size 344211388
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734a3722f2f927c2f3be7607f94eb3a054f72007ff6a4a7b77ddac2a53bccf18
3
  size 344211388
checkpoint-22855/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a3a146be5914ad44805d9d83bfdeb36f331768e2c6587bc6ca0c58cbb784eda
3
  size 688543237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e373d6549f53843cb1805c28c21b91aca667c2cba95d0f6d77c2fed7602a54ec
3
  size 688543237
checkpoint-22855/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 1.12474524974823,
3
  "best_model_checkpoint": "car_brands_image_detection/checkpoint-22855",
4
  "epoch": 7.0,
5
  "eval_steps": 500,
@@ -10,387 +10,387 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.15313935681470137,
13
- "grad_norm": 10.977594375610352,
14
  "learning_rate": 6.861872396404297e-07,
15
- "loss": 1.0769,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.30627871362940273,
20
- "grad_norm": 11.252737998962402,
21
  "learning_rate": 6.708397281297961e-07,
22
- "loss": 1.0703,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.45941807044410415,
27
- "grad_norm": 10.567949295043945,
28
  "learning_rate": 6.554922166191625e-07,
29
- "loss": 1.0579,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.6125574272588055,
34
- "grad_norm": 11.23414421081543,
35
  "learning_rate": 6.401447051085288e-07,
36
- "loss": 1.0463,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.7656967840735069,
41
- "grad_norm": 10.645853996276855,
42
  "learning_rate": 6.247971935978952e-07,
43
- "loss": 1.0569,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 0.9188361408882083,
48
- "grad_norm": 10.294245719909668,
49
  "learning_rate": 6.094496820872616e-07,
50
- "loss": 1.0543,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 1.0,
55
- "eval_accuracy": 0.8411953072272721,
56
- "eval_loss": 1.2030651569366455,
57
- "eval_model_preparation_time": 0.0053,
58
- "eval_runtime": 687.2885,
59
- "eval_samples_per_second": 101.324,
60
- "eval_steps_per_second": 12.666,
61
  "step": 3265
62
  },
63
  {
64
  "epoch": 1.0719754977029097,
65
- "grad_norm": 9.816156387329102,
66
  "learning_rate": 5.941021705766278e-07,
67
- "loss": 1.0425,
68
  "step": 3500
69
  },
70
  {
71
  "epoch": 1.225114854517611,
72
- "grad_norm": 7.021167755126953,
73
  "learning_rate": 5.787546590659943e-07,
74
- "loss": 1.0385,
75
  "step": 4000
76
  },
77
  {
78
  "epoch": 1.3782542113323124,
79
- "grad_norm": 14.457579612731934,
80
  "learning_rate": 5.634071475553606e-07,
81
- "loss": 1.0268,
82
  "step": 4500
83
  },
84
  {
85
  "epoch": 1.5313935681470139,
86
- "grad_norm": 11.13620376586914,
87
  "learning_rate": 5.480596360447269e-07,
88
- "loss": 1.0197,
89
  "step": 5000
90
  },
91
  {
92
  "epoch": 1.6845329249617151,
93
- "grad_norm": 10.852294921875,
94
  "learning_rate": 5.327121245340934e-07,
95
- "loss": 1.0335,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.8376722817764164,
100
- "grad_norm": 11.98221492767334,
101
  "learning_rate": 5.173646130234597e-07,
102
- "loss": 1.0241,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.9908116385911179,
107
- "grad_norm": 10.328956604003906,
108
  "learning_rate": 5.020171015128262e-07,
109
- "loss": 1.0143,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 2.0,
114
- "eval_accuracy": 0.8442108588578239,
115
- "eval_loss": 1.1817330121994019,
116
- "eval_model_preparation_time": 0.0053,
117
- "eval_runtime": 667.4646,
118
- "eval_samples_per_second": 104.334,
119
- "eval_steps_per_second": 13.042,
120
  "step": 6530
121
  },
122
  {
123
  "epoch": 2.1439509954058193,
124
- "grad_norm": 8.650470733642578,
125
  "learning_rate": 4.866695900021925e-07,
126
- "loss": 1.0117,
127
  "step": 7000
128
  },
129
  {
130
  "epoch": 2.2970903522205206,
131
- "grad_norm": 10.077228546142578,
132
  "learning_rate": 4.713220784915589e-07,
133
- "loss": 0.9998,
134
  "step": 7500
135
  },
136
  {
137
  "epoch": 2.450229709035222,
138
- "grad_norm": 12.940803527832031,
139
  "learning_rate": 4.559745669809252e-07,
140
- "loss": 1.0062,
141
  "step": 8000
142
  },
143
  {
144
  "epoch": 2.6033690658499236,
145
- "grad_norm": 13.832907676696777,
146
  "learning_rate": 4.406270554702916e-07,
147
- "loss": 1.0014,
148
  "step": 8500
149
  },
150
  {
151
  "epoch": 2.756508422664625,
152
- "grad_norm": 9.304670333862305,
153
  "learning_rate": 4.25279543959658e-07,
154
- "loss": 0.9983,
155
  "step": 9000
156
  },
157
  {
158
  "epoch": 2.909647779479326,
159
- "grad_norm": 12.306177139282227,
160
  "learning_rate": 4.0993203244902437e-07,
161
- "loss": 0.993,
162
  "step": 9500
163
  },
164
  {
165
  "epoch": 3.0,
166
- "eval_accuracy": 0.8465802208532575,
167
- "eval_loss": 1.1596074104309082,
168
- "eval_model_preparation_time": 0.0053,
169
- "eval_runtime": 677.1878,
170
- "eval_samples_per_second": 102.836,
171
- "eval_steps_per_second": 12.855,
172
  "step": 9795
173
  },
174
  {
175
  "epoch": 3.0627871362940278,
176
- "grad_norm": 9.197342872619629,
177
  "learning_rate": 3.9458452093839065e-07,
178
- "loss": 0.994,
179
  "step": 10000
180
  },
181
  {
182
  "epoch": 3.215926493108729,
183
- "grad_norm": 10.245026588439941,
184
  "learning_rate": 3.7923700942775704e-07,
185
- "loss": 0.9887,
186
  "step": 10500
187
  },
188
  {
189
  "epoch": 3.3690658499234303,
190
- "grad_norm": 8.360330581665039,
191
  "learning_rate": 3.638894979171234e-07,
192
- "loss": 0.9795,
193
  "step": 11000
194
  },
195
  {
196
  "epoch": 3.522205206738132,
197
- "grad_norm": 11.287829399108887,
198
  "learning_rate": 3.4854198640648976e-07,
199
- "loss": 1.0054,
200
  "step": 11500
201
  },
202
  {
203
  "epoch": 3.6753445635528332,
204
- "grad_norm": 9.926981925964355,
205
  "learning_rate": 3.3319447489585615e-07,
206
- "loss": 0.9681,
207
  "step": 12000
208
  },
209
  {
210
  "epoch": 3.8284839203675345,
211
- "grad_norm": 12.259571075439453,
212
  "learning_rate": 3.1784696338522254e-07,
213
- "loss": 0.978,
214
  "step": 12500
215
  },
216
  {
217
  "epoch": 3.9816232771822357,
218
- "grad_norm": 18.110218048095703,
219
  "learning_rate": 3.0249945187458887e-07,
220
- "loss": 0.9582,
221
  "step": 13000
222
  },
223
  {
224
  "epoch": 4.0,
225
- "eval_accuracy": 0.848748546073321,
226
- "eval_loss": 1.1430388689041138,
227
- "eval_model_preparation_time": 0.0053,
228
- "eval_runtime": 678.3623,
229
- "eval_samples_per_second": 102.658,
230
- "eval_steps_per_second": 12.832,
231
  "step": 13060
232
  },
233
  {
234
  "epoch": 4.134762633996937,
235
- "grad_norm": 7.420970439910889,
236
  "learning_rate": 2.8715194036395526e-07,
237
- "loss": 0.9722,
238
  "step": 13500
239
  },
240
  {
241
  "epoch": 4.287901990811639,
242
- "grad_norm": 10.573201179504395,
243
  "learning_rate": 2.7180442885332164e-07,
244
- "loss": 0.9552,
245
  "step": 14000
246
  },
247
  {
248
  "epoch": 4.44104134762634,
249
- "grad_norm": 11.360106468200684,
250
  "learning_rate": 2.56456917342688e-07,
251
- "loss": 0.9677,
252
  "step": 14500
253
  },
254
  {
255
  "epoch": 4.594180704441041,
256
- "grad_norm": 10.936739921569824,
257
  "learning_rate": 2.4110940583205437e-07,
258
- "loss": 0.9619,
259
  "step": 15000
260
  },
261
  {
262
  "epoch": 4.747320061255743,
263
- "grad_norm": 11.35300350189209,
264
  "learning_rate": 2.2576189432142073e-07,
265
- "loss": 0.9664,
266
  "step": 15500
267
  },
268
  {
269
  "epoch": 4.900459418070444,
270
- "grad_norm": 9.238828659057617,
271
  "learning_rate": 2.104143828107871e-07,
272
- "loss": 0.9697,
273
  "step": 16000
274
  },
275
  {
276
  "epoch": 5.0,
277
- "eval_accuracy": 0.8492654977814156,
278
- "eval_loss": 1.1333091259002686,
279
- "eval_model_preparation_time": 0.0053,
280
- "eval_runtime": 668.0112,
281
- "eval_samples_per_second": 104.248,
282
- "eval_steps_per_second": 13.031,
283
  "step": 16325
284
  },
285
  {
286
  "epoch": 5.053598774885145,
287
- "grad_norm": 9.134923934936523,
288
  "learning_rate": 1.9506687130015347e-07,
289
- "loss": 0.9528,
290
  "step": 16500
291
  },
292
  {
293
  "epoch": 5.206738131699847,
294
- "grad_norm": 8.237107276916504,
295
  "learning_rate": 1.7971935978951984e-07,
296
- "loss": 0.9611,
297
  "step": 17000
298
  },
299
  {
300
  "epoch": 5.359877488514548,
301
- "grad_norm": 9.242733001708984,
302
  "learning_rate": 1.643718482788862e-07,
303
- "loss": 0.961,
304
  "step": 17500
305
  },
306
  {
307
  "epoch": 5.51301684532925,
308
- "grad_norm": 10.467522621154785,
309
  "learning_rate": 1.4902433676825258e-07,
310
- "loss": 0.9469,
311
  "step": 18000
312
  },
313
  {
314
  "epoch": 5.666156202143951,
315
- "grad_norm": 13.296605110168457,
316
  "learning_rate": 1.3367682525761894e-07,
317
- "loss": 0.9538,
318
  "step": 18500
319
  },
320
  {
321
  "epoch": 5.819295558958652,
322
- "grad_norm": 8.11686897277832,
323
  "learning_rate": 1.183293137469853e-07,
324
- "loss": 0.9486,
325
  "step": 19000
326
  },
327
  {
328
  "epoch": 5.972434915773354,
329
- "grad_norm": 8.29335880279541,
330
  "learning_rate": 1.0298180223635168e-07,
331
- "loss": 0.9666,
332
  "step": 19500
333
  },
334
  {
335
  "epoch": 6.0,
336
- "eval_accuracy": 0.8497680897198409,
337
- "eval_loss": 1.1270993947982788,
338
- "eval_model_preparation_time": 0.0053,
339
- "eval_runtime": 668.7633,
340
- "eval_samples_per_second": 104.131,
341
- "eval_steps_per_second": 13.017,
342
  "step": 19590
343
  },
344
  {
345
  "epoch": 6.1255742725880555,
346
- "grad_norm": 14.328752517700195,
347
  "learning_rate": 8.763429072571804e-08,
348
- "loss": 0.9437,
349
  "step": 20000
350
  },
351
  {
352
  "epoch": 6.278713629402756,
353
- "grad_norm": 12.567534446716309,
354
  "learning_rate": 7.228677921508441e-08,
355
- "loss": 0.9583,
356
  "step": 20500
357
  },
358
  {
359
  "epoch": 6.431852986217458,
360
- "grad_norm": 10.546801567077637,
361
  "learning_rate": 5.693926770445078e-08,
362
- "loss": 0.9436,
363
  "step": 21000
364
  },
365
  {
366
  "epoch": 6.584992343032159,
367
- "grad_norm": 9.688272476196289,
368
  "learning_rate": 4.159175619381715e-08,
369
- "loss": 0.9496,
370
  "step": 21500
371
  },
372
  {
373
  "epoch": 6.738131699846861,
374
- "grad_norm": 10.878347396850586,
375
  "learning_rate": 2.6244244683183514e-08,
376
- "loss": 0.9516,
377
  "step": 22000
378
  },
379
  {
380
  "epoch": 6.891271056661562,
381
- "grad_norm": 9.418506622314453,
382
  "learning_rate": 1.0896733172549878e-08,
383
- "loss": 0.9518,
384
  "step": 22500
385
  },
386
  {
387
  "epoch": 7.0,
388
- "eval_accuracy": 0.8505722368213214,
389
- "eval_loss": 1.12474524974823,
390
- "eval_model_preparation_time": 0.0053,
391
- "eval_runtime": 668.4785,
392
- "eval_samples_per_second": 104.175,
393
- "eval_steps_per_second": 13.022,
394
  "step": 22855
395
  }
396
  ],
 
1
  {
2
+ "best_metric": 0.6283333897590637,
3
  "best_model_checkpoint": "car_brands_image_detection/checkpoint-22855",
4
  "epoch": 7.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.15313935681470137,
13
+ "grad_norm": 13.366690635681152,
14
  "learning_rate": 6.861872396404297e-07,
15
+ "loss": 0.5511,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.30627871362940273,
20
+ "grad_norm": 11.934271812438965,
21
  "learning_rate": 6.708397281297961e-07,
22
+ "loss": 0.5487,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.45941807044410415,
27
+ "grad_norm": 9.262362480163574,
28
  "learning_rate": 6.554922166191625e-07,
29
+ "loss": 0.5382,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.6125574272588055,
34
+ "grad_norm": 7.17333459854126,
35
  "learning_rate": 6.401447051085288e-07,
36
+ "loss": 0.561,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.7656967840735069,
41
+ "grad_norm": 12.425559043884277,
42
  "learning_rate": 6.247971935978952e-07,
43
+ "loss": 0.5503,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 0.9188361408882083,
48
+ "grad_norm": 8.107797622680664,
49
  "learning_rate": 6.094496820872616e-07,
50
+ "loss": 0.5443,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 1.0,
55
+ "eval_accuracy": 0.9070922902396645,
56
+ "eval_loss": 0.6742454767227173,
57
+ "eval_model_preparation_time": 0.006,
58
+ "eval_runtime": 684.1135,
59
+ "eval_samples_per_second": 101.795,
60
+ "eval_steps_per_second": 12.724,
61
  "step": 3265
62
  },
63
  {
64
  "epoch": 1.0719754977029097,
65
+ "grad_norm": 7.829226493835449,
66
  "learning_rate": 5.941021705766278e-07,
67
+ "loss": 0.534,
68
  "step": 3500
69
  },
70
  {
71
  "epoch": 1.225114854517611,
72
+ "grad_norm": 10.400870323181152,
73
  "learning_rate": 5.787546590659943e-07,
74
+ "loss": 0.5356,
75
  "step": 4000
76
  },
77
  {
78
  "epoch": 1.3782542113323124,
79
+ "grad_norm": 8.150808334350586,
80
  "learning_rate": 5.634071475553606e-07,
81
+ "loss": 0.5218,
82
  "step": 4500
83
  },
84
  {
85
  "epoch": 1.5313935681470139,
86
+ "grad_norm": 15.422158241271973,
87
  "learning_rate": 5.480596360447269e-07,
88
+ "loss": 0.5339,
89
  "step": 5000
90
  },
91
  {
92
  "epoch": 1.6845329249617151,
93
+ "grad_norm": 10.069522857666016,
94
  "learning_rate": 5.327121245340934e-07,
95
+ "loss": 0.5384,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.8376722817764164,
100
+ "grad_norm": 8.527743339538574,
101
  "learning_rate": 5.173646130234597e-07,
102
+ "loss": 0.5286,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.9908116385911179,
107
+ "grad_norm": 13.6627836227417,
108
  "learning_rate": 5.020171015128262e-07,
109
+ "loss": 0.5228,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 2.0,
114
+ "eval_accuracy": 0.90901649937535,
115
+ "eval_loss": 0.6600815057754517,
116
+ "eval_model_preparation_time": 0.006,
117
+ "eval_runtime": 684.6003,
118
+ "eval_samples_per_second": 101.722,
119
+ "eval_steps_per_second": 12.715,
120
  "step": 6530
121
  },
122
  {
123
  "epoch": 2.1439509954058193,
124
+ "grad_norm": 10.269744873046875,
125
  "learning_rate": 4.866695900021925e-07,
126
+ "loss": 0.5175,
127
  "step": 7000
128
  },
129
  {
130
  "epoch": 2.2970903522205206,
131
+ "grad_norm": 10.207755088806152,
132
  "learning_rate": 4.713220784915589e-07,
133
+ "loss": 0.5207,
134
  "step": 7500
135
  },
136
  {
137
  "epoch": 2.450229709035222,
138
+ "grad_norm": 9.40577507019043,
139
  "learning_rate": 4.559745669809252e-07,
140
+ "loss": 0.5108,
141
  "step": 8000
142
  },
143
  {
144
  "epoch": 2.6033690658499236,
145
+ "grad_norm": 14.63872241973877,
146
  "learning_rate": 4.406270554702916e-07,
147
+ "loss": 0.5157,
148
  "step": 8500
149
  },
150
  {
151
  "epoch": 2.756508422664625,
152
+ "grad_norm": 10.473233222961426,
153
  "learning_rate": 4.25279543959658e-07,
154
+ "loss": 0.5082,
155
  "step": 9000
156
  },
157
  {
158
  "epoch": 2.909647779479326,
159
+ "grad_norm": 9.536513328552246,
160
  "learning_rate": 4.0993203244902437e-07,
161
+ "loss": 0.5089,
162
  "step": 9500
163
  },
164
  {
165
  "epoch": 3.0,
166
+ "eval_accuracy": 0.9105817142693031,
167
+ "eval_loss": 0.6478191614151001,
168
+ "eval_model_preparation_time": 0.006,
169
+ "eval_runtime": 690.9545,
170
+ "eval_samples_per_second": 100.787,
171
+ "eval_steps_per_second": 12.599,
172
  "step": 9795
173
  },
174
  {
175
  "epoch": 3.0627871362940278,
176
+ "grad_norm": 8.641783714294434,
177
  "learning_rate": 3.9458452093839065e-07,
178
+ "loss": 0.5092,
179
  "step": 10000
180
  },
181
  {
182
  "epoch": 3.215926493108729,
183
+ "grad_norm": 19.282365798950195,
184
  "learning_rate": 3.7923700942775704e-07,
185
+ "loss": 0.5005,
186
  "step": 10500
187
  },
188
  {
189
  "epoch": 3.3690658499234303,
190
+ "grad_norm": 11.717490196228027,
191
  "learning_rate": 3.638894979171234e-07,
192
+ "loss": 0.502,
193
  "step": 11000
194
  },
195
  {
196
  "epoch": 3.522205206738132,
197
+ "grad_norm": 5.86570405960083,
198
  "learning_rate": 3.4854198640648976e-07,
199
+ "loss": 0.4932,
200
  "step": 11500
201
  },
202
  {
203
  "epoch": 3.6753445635528332,
204
+ "grad_norm": 19.602014541625977,
205
  "learning_rate": 3.3319447489585615e-07,
206
+ "loss": 0.5014,
207
  "step": 12000
208
  },
209
  {
210
  "epoch": 3.8284839203675345,
211
+ "grad_norm": 8.634654998779297,
212
  "learning_rate": 3.1784696338522254e-07,
213
+ "loss": 0.5099,
214
  "step": 12500
215
  },
216
  {
217
  "epoch": 3.9816232771822357,
218
+ "grad_norm": 10.192227363586426,
219
  "learning_rate": 3.0249945187458887e-07,
220
+ "loss": 0.5002,
221
  "step": 13000
222
  },
223
  {
224
  "epoch": 4.0,
225
+ "eval_accuracy": 0.9107540315053346,
226
+ "eval_loss": 0.6401029229164124,
227
+ "eval_model_preparation_time": 0.006,
228
+ "eval_runtime": 702.914,
229
+ "eval_samples_per_second": 99.072,
230
+ "eval_steps_per_second": 12.384,
231
  "step": 13060
232
  },
233
  {
234
  "epoch": 4.134762633996937,
235
+ "grad_norm": 10.418832778930664,
236
  "learning_rate": 2.8715194036395526e-07,
237
+ "loss": 0.4992,
238
  "step": 13500
239
  },
240
  {
241
  "epoch": 4.287901990811639,
242
+ "grad_norm": 12.171610832214355,
243
  "learning_rate": 2.7180442885332164e-07,
244
+ "loss": 0.483,
245
  "step": 14000
246
  },
247
  {
248
  "epoch": 4.44104134762634,
249
+ "grad_norm": 18.0849552154541,
250
  "learning_rate": 2.56456917342688e-07,
251
+ "loss": 0.4865,
252
  "step": 14500
253
  },
254
  {
255
  "epoch": 4.594180704441041,
256
+ "grad_norm": 9.136711120605469,
257
  "learning_rate": 2.4110940583205437e-07,
258
+ "loss": 0.4893,
259
  "step": 15000
260
  },
261
  {
262
  "epoch": 4.747320061255743,
263
+ "grad_norm": 7.19853401184082,
264
  "learning_rate": 2.2576189432142073e-07,
265
+ "loss": 0.4956,
266
  "step": 15500
267
  },
268
  {
269
  "epoch": 4.900459418070444,
270
+ "grad_norm": 9.32518196105957,
271
  "learning_rate": 2.104143828107871e-07,
272
+ "loss": 0.4858,
273
  "step": 16000
274
  },
275
  {
276
  "epoch": 5.0,
277
+ "eval_accuracy": 0.9118022946911931,
278
+ "eval_loss": 0.6333341598510742,
279
+ "eval_model_preparation_time": 0.006,
280
+ "eval_runtime": 694.3482,
281
+ "eval_samples_per_second": 100.294,
282
+ "eval_steps_per_second": 12.537,
283
  "step": 16325
284
  },
285
  {
286
  "epoch": 5.053598774885145,
287
+ "grad_norm": 6.03609561920166,
288
  "learning_rate": 1.9506687130015347e-07,
289
+ "loss": 0.4907,
290
  "step": 16500
291
  },
292
  {
293
  "epoch": 5.206738131699847,
294
+ "grad_norm": 8.596969604492188,
295
  "learning_rate": 1.7971935978951984e-07,
296
+ "loss": 0.4829,
297
  "step": 17000
298
  },
299
  {
300
  "epoch": 5.359877488514548,
301
+ "grad_norm": 17.825912475585938,
302
  "learning_rate": 1.643718482788862e-07,
303
+ "loss": 0.4831,
304
  "step": 17500
305
  },
306
  {
307
  "epoch": 5.51301684532925,
308
+ "grad_norm": 6.781469821929932,
309
  "learning_rate": 1.4902433676825258e-07,
310
+ "loss": 0.4901,
311
  "step": 18000
312
  },
313
  {
314
  "epoch": 5.666156202143951,
315
+ "grad_norm": 7.068932056427002,
316
  "learning_rate": 1.3367682525761894e-07,
317
+ "loss": 0.4923,
318
  "step": 18500
319
  },
320
  {
321
  "epoch": 5.819295558958652,
322
+ "grad_norm": 13.472633361816406,
323
  "learning_rate": 1.183293137469853e-07,
324
+ "loss": 0.4845,
325
  "step": 19000
326
  },
327
  {
328
  "epoch": 5.972434915773354,
329
+ "grad_norm": 9.40524673461914,
330
  "learning_rate": 1.0298180223635168e-07,
331
+ "loss": 0.4672,
332
  "step": 19500
333
  },
334
  {
335
  "epoch": 6.0,
336
+ "eval_accuracy": 0.9119315326182168,
337
+ "eval_loss": 0.6297235488891602,
338
+ "eval_model_preparation_time": 0.006,
339
+ "eval_runtime": 697.8503,
340
+ "eval_samples_per_second": 99.791,
341
+ "eval_steps_per_second": 12.474,
342
  "step": 19590
343
  },
344
  {
345
  "epoch": 6.1255742725880555,
346
+ "grad_norm": 8.320501327514648,
347
  "learning_rate": 8.763429072571804e-08,
348
+ "loss": 0.4753,
349
  "step": 20000
350
  },
351
  {
352
  "epoch": 6.278713629402756,
353
+ "grad_norm": 10.223684310913086,
354
  "learning_rate": 7.228677921508441e-08,
355
+ "loss": 0.4746,
356
  "step": 20500
357
  },
358
  {
359
  "epoch": 6.431852986217458,
360
+ "grad_norm": 12.393083572387695,
361
  "learning_rate": 5.693926770445078e-08,
362
+ "loss": 0.4822,
363
  "step": 21000
364
  },
365
  {
366
  "epoch": 6.584992343032159,
367
+ "grad_norm": 14.066522598266602,
368
  "learning_rate": 4.159175619381715e-08,
369
+ "loss": 0.4865,
370
  "step": 21500
371
  },
372
  {
373
  "epoch": 6.738131699846861,
374
+ "grad_norm": 8.407339096069336,
375
  "learning_rate": 2.6244244683183514e-08,
376
+ "loss": 0.4758,
377
  "step": 22000
378
  },
379
  {
380
  "epoch": 6.891271056661562,
381
+ "grad_norm": 10.466651916503906,
382
  "learning_rate": 1.0896733172549878e-08,
383
+ "loss": 0.4861,
384
  "step": 22500
385
  },
386
  {
387
  "epoch": 7.0,
388
+ "eval_accuracy": 0.9121325693935869,
389
+ "eval_loss": 0.6283333897590637,
390
+ "eval_model_preparation_time": 0.006,
391
+ "eval_runtime": 695.1527,
392
+ "eval_samples_per_second": 100.178,
393
+ "eval_steps_per_second": 12.522,
394
  "step": 22855
395
  }
396
  ],
checkpoint-22855/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c100526e0b4b24bfa7592465d2fb171f2d02c219dd18fbd5f4fdb665859dc45c
3
- size 4667
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:383bdd66dedd2d02f4607e2845640bf60017805b0ddd9c46c51a7a69f93f2ee7
3
+ size 4731
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfe1b038fb0f521f664649f712800fbfe7fa261ad4ca3e25ba1e41967005a47c
3
  size 344211388
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:734a3722f2f927c2f3be7607f94eb3a054f72007ff6a4a7b77ddac2a53bccf18
3
  size 344211388
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c5e5c5924d5e574908eb65d0a1490c65c3fa3d146ca5a04ac236d9a8b5ab276
3
  size 4731
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:383bdd66dedd2d02f4607e2845640bf60017805b0ddd9c46c51a7a69f93f2ee7
3
  size 4731