Text Generation
Transformers
PyTorch
English
llama
text-generation-inference
Inference Endpoints
bleysg commited on
Commit
0c4d0d6
·
1 Parent(s): 15d486c

Delete checkpoint-768

Browse files
checkpoint-768/added_tokens.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "<pad>": 32000,
3
- "<|im_end|>": 32002,
4
- "<|im_start|>": 32001,
5
- "<|system|>": 32003
6
- }
 
 
 
 
 
 
 
checkpoint-768/config.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "_name_or_path": "conceptofmind/LLongMA-2-13b-16k",
3
- "architectures": [
4
- "LlamaForCausalLM"
5
- ],
6
- "bos_token_id": 1,
7
- "eos_token_id": 2,
8
- "hidden_act": "silu",
9
- "hidden_size": 5120,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 13824,
12
- "max_position_embeddings": 16384,
13
- "model_type": "llama",
14
- "num_attention_heads": 40,
15
- "num_hidden_layers": 40,
16
- "num_key_value_heads": 40,
17
- "pad_token_id": 0,
18
- "pretraining_tp": 2,
19
- "rms_norm_eps": 1e-05,
20
- "rope_scaling": {
21
- "factor": 4.0,
22
- "type": "linear"
23
- },
24
- "tie_word_embeddings": false,
25
- "torch_dtype": "bfloat16",
26
- "transformers_version": "4.32.0.dev0",
27
- "use_cache": false,
28
- "use_flash_attention": false,
29
- "vocab_size": 32004
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-768/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:81074310b07fb7a22d1eed8482ec13500c8261687ab9d33aba13137418ed9e62
3
- size 26031995033
 
 
 
 
checkpoint-768/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64bd1bb3ba8b0a3e03ea6b9ea79188f2e9aa0b1127982f65e7910552a683009
3
- size 21687
 
 
 
 
checkpoint-768/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4599ed04a3ea8005c93981da3ac51adab6f9e48356f416faf5999160f70aa612
3
- size 21687
 
 
 
 
checkpoint-768/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e480e19d8c9c591d8b3285867ca3c557cad3b54a5a11f8ddba4cc58128ca39e
3
- size 21687
 
 
 
 
checkpoint-768/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:96b235d1282fb055977ff5416b4461dd8d7116aaa6764bcf768e6e190331a636
3
- size 21687
 
 
 
 
checkpoint-768/rng_state_4.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:17a98e2d1f43b20a0b49ae2d540712c0a057b1c7cae69c4542940db7fc7fa55f
3
- size 21687
 
 
 
 
checkpoint-768/rng_state_5.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee9f8a10bed33741098006f1f70d09d2438a9eecd8b2fd59a0883798ed3f57fc
3
- size 21687
 
 
 
 
checkpoint-768/rng_state_6.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f6b1fae8cf3c4a63e1b8163a1c2366dd0498c12ab762c3d827578047c24211c
3
- size 21687
 
 
 
 
checkpoint-768/rng_state_7.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:20e1a1bd086523383052a57e4cbe614821a157b38c27f20a13779cb885a5affb
3
- size 21687
 
 
 
 
checkpoint-768/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "<unk>"
6
- }
 
 
 
 
 
 
 
checkpoint-768/tokenizer.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
- size 499723
 
 
 
 
checkpoint-768/tokenizer_config.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "bos_token": {
5
- "__type": "AddedToken",
6
- "content": "<s>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "clean_up_tokenization_spaces": false,
13
- "eos_token": {
14
- "__type": "AddedToken",
15
- "content": "</s>",
16
- "lstrip": false,
17
- "normalized": true,
18
- "rstrip": false,
19
- "single_word": false
20
- },
21
- "legacy": true,
22
- "model_max_length": 8192,
23
- "pad_token": null,
24
- "sp_model_kwargs": {},
25
- "tokenizer_class": "LlamaTokenizer",
26
- "trust_remote_code": false,
27
- "unk_token": {
28
- "__type": "AddedToken",
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false
34
- },
35
- "use_fast": true
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-768/trainer_state.json DELETED
@@ -1,4723 +0,0 @@
1
- {
2
- "best_metric": 0.7497929930686951,
3
- "best_model_checkpoint": "./open-long-orca-13b/checkpoint-768",
4
- "epoch": 0.5408450704225352,
5
- "eval_steps": 64,
6
- "global_step": 768,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0,
13
- "learning_rate": 1.66015625e-08,
14
- "loss": 1.2144,
15
- "step": 1
16
- },
17
- {
18
- "epoch": 0.0,
19
- "learning_rate": 6.640625e-08,
20
- "loss": 1.0802,
21
- "step": 2
22
- },
23
- {
24
- "epoch": 0.0,
25
- "learning_rate": 1.494140625e-07,
26
- "loss": 1.0077,
27
- "step": 3
28
- },
29
- {
30
- "epoch": 0.0,
31
- "learning_rate": 2.65625e-07,
32
- "loss": 0.9755,
33
- "step": 4
34
- },
35
- {
36
- "epoch": 0.0,
37
- "learning_rate": 4.150390625e-07,
38
- "loss": 0.9372,
39
- "step": 5
40
- },
41
- {
42
- "epoch": 0.0,
43
- "learning_rate": 5.9765625e-07,
44
- "loss": 0.9344,
45
- "step": 6
46
- },
47
- {
48
- "epoch": 0.0,
49
- "learning_rate": 8.134765625e-07,
50
- "loss": 0.9305,
51
- "step": 7
52
- },
53
- {
54
- "epoch": 0.01,
55
- "learning_rate": 1.0625e-06,
56
- "loss": 0.9065,
57
- "step": 8
58
- },
59
- {
60
- "epoch": 0.01,
61
- "learning_rate": 1.3447265625e-06,
62
- "loss": 0.9292,
63
- "step": 9
64
- },
65
- {
66
- "epoch": 0.01,
67
- "learning_rate": 1.66015625e-06,
68
- "loss": 0.8798,
69
- "step": 10
70
- },
71
- {
72
- "epoch": 0.01,
73
- "learning_rate": 2.0087890625e-06,
74
- "loss": 0.8876,
75
- "step": 11
76
- },
77
- {
78
- "epoch": 0.01,
79
- "learning_rate": 2.390625e-06,
80
- "loss": 0.878,
81
- "step": 12
82
- },
83
- {
84
- "epoch": 0.01,
85
- "learning_rate": 2.8056640625e-06,
86
- "loss": 0.8938,
87
- "step": 13
88
- },
89
- {
90
- "epoch": 0.01,
91
- "learning_rate": 3.25390625e-06,
92
- "loss": 0.9152,
93
- "step": 14
94
- },
95
- {
96
- "epoch": 0.01,
97
- "learning_rate": 3.7353515625e-06,
98
- "loss": 0.8738,
99
- "step": 15
100
- },
101
- {
102
- "epoch": 0.01,
103
- "learning_rate": 4.25e-06,
104
- "loss": 0.8774,
105
- "step": 16
106
- },
107
- {
108
- "epoch": 0.01,
109
- "learning_rate": 4.7978515625e-06,
110
- "loss": 0.905,
111
- "step": 17
112
- },
113
- {
114
- "epoch": 0.01,
115
- "learning_rate": 5.37890625e-06,
116
- "loss": 0.8726,
117
- "step": 18
118
- },
119
- {
120
- "epoch": 0.01,
121
- "learning_rate": 5.9931640625e-06,
122
- "loss": 0.8728,
123
- "step": 19
124
- },
125
- {
126
- "epoch": 0.01,
127
- "learning_rate": 6.640625e-06,
128
- "loss": 0.843,
129
- "step": 20
130
- },
131
- {
132
- "epoch": 0.01,
133
- "learning_rate": 7.3212890625e-06,
134
- "loss": 0.8841,
135
- "step": 21
136
- },
137
- {
138
- "epoch": 0.02,
139
- "learning_rate": 8.03515625e-06,
140
- "loss": 0.8722,
141
- "step": 22
142
- },
143
- {
144
- "epoch": 0.02,
145
- "learning_rate": 8.7822265625e-06,
146
- "loss": 0.8486,
147
- "step": 23
148
- },
149
- {
150
- "epoch": 0.02,
151
- "learning_rate": 9.5625e-06,
152
- "loss": 0.8664,
153
- "step": 24
154
- },
155
- {
156
- "epoch": 0.02,
157
- "learning_rate": 1.03759765625e-05,
158
- "loss": 0.8673,
159
- "step": 25
160
- },
161
- {
162
- "epoch": 0.02,
163
- "learning_rate": 1.122265625e-05,
164
- "loss": 0.8698,
165
- "step": 26
166
- },
167
- {
168
- "epoch": 0.02,
169
- "learning_rate": 1.21025390625e-05,
170
- "loss": 0.8451,
171
- "step": 27
172
- },
173
- {
174
- "epoch": 0.02,
175
- "learning_rate": 1.3015625e-05,
176
- "loss": 0.8263,
177
- "step": 28
178
- },
179
- {
180
- "epoch": 0.02,
181
- "learning_rate": 1.39619140625e-05,
182
- "loss": 0.8782,
183
- "step": 29
184
- },
185
- {
186
- "epoch": 0.02,
187
- "learning_rate": 1.494140625e-05,
188
- "loss": 0.8244,
189
- "step": 30
190
- },
191
- {
192
- "epoch": 0.02,
193
- "learning_rate": 1.59541015625e-05,
194
- "loss": 0.8715,
195
- "step": 31
196
- },
197
- {
198
- "epoch": 0.02,
199
- "learning_rate": 1.7e-05,
200
- "loss": 0.8789,
201
- "step": 32
202
- },
203
- {
204
- "epoch": 0.02,
205
- "learning_rate": 1.699999868508013e-05,
206
- "loss": 0.864,
207
- "step": 33
208
- },
209
- {
210
- "epoch": 0.02,
211
- "learning_rate": 1.6999994740320928e-05,
212
- "loss": 0.8453,
213
- "step": 34
214
- },
215
- {
216
- "epoch": 0.02,
217
- "learning_rate": 1.699998816572361e-05,
218
- "loss": 0.8404,
219
- "step": 35
220
- },
221
- {
222
- "epoch": 0.03,
223
- "learning_rate": 1.6999978961290217e-05,
224
- "loss": 0.8443,
225
- "step": 36
226
- },
227
- {
228
- "epoch": 0.03,
229
- "learning_rate": 1.6999967127023588e-05,
230
- "loss": 0.8815,
231
- "step": 37
232
- },
233
- {
234
- "epoch": 0.03,
235
- "learning_rate": 1.6999952662927394e-05,
236
- "loss": 0.8536,
237
- "step": 38
238
- },
239
- {
240
- "epoch": 0.03,
241
- "learning_rate": 1.6999935569006104e-05,
242
- "loss": 0.8616,
243
- "step": 39
244
- },
245
- {
246
- "epoch": 0.03,
247
- "learning_rate": 1.699991584526501e-05,
248
- "loss": 0.8587,
249
- "step": 40
250
- },
251
- {
252
- "epoch": 0.03,
253
- "learning_rate": 1.699989349171021e-05,
254
- "loss": 0.8769,
255
- "step": 41
256
- },
257
- {
258
- "epoch": 0.03,
259
- "learning_rate": 1.6999868508348625e-05,
260
- "loss": 0.8507,
261
- "step": 42
262
- },
263
- {
264
- "epoch": 0.03,
265
- "learning_rate": 1.699984089518798e-05,
266
- "loss": 0.8687,
267
- "step": 43
268
- },
269
- {
270
- "epoch": 0.03,
271
- "learning_rate": 1.6999810652236823e-05,
272
- "loss": 0.8789,
273
- "step": 44
274
- },
275
- {
276
- "epoch": 0.03,
277
- "learning_rate": 1.699977777950451e-05,
278
- "loss": 0.864,
279
- "step": 45
280
- },
281
- {
282
- "epoch": 0.03,
283
- "learning_rate": 1.6999742277001208e-05,
284
- "loss": 0.8344,
285
- "step": 46
286
- },
287
- {
288
- "epoch": 0.03,
289
- "learning_rate": 1.69997041447379e-05,
290
- "loss": 0.8568,
291
- "step": 47
292
- },
293
- {
294
- "epoch": 0.03,
295
- "learning_rate": 1.6999663382726394e-05,
296
- "loss": 0.84,
297
- "step": 48
298
- },
299
- {
300
- "epoch": 0.03,
301
- "learning_rate": 1.699961999097929e-05,
302
- "loss": 0.8359,
303
- "step": 49
304
- },
305
- {
306
- "epoch": 0.04,
307
- "learning_rate": 1.699957396951002e-05,
308
- "loss": 0.831,
309
- "step": 50
310
- },
311
- {
312
- "epoch": 0.04,
313
- "learning_rate": 1.699952531833282e-05,
314
- "loss": 0.8057,
315
- "step": 51
316
- },
317
- {
318
- "epoch": 0.04,
319
- "learning_rate": 1.6999474037462744e-05,
320
- "loss": 0.8357,
321
- "step": 52
322
- },
323
- {
324
- "epoch": 0.04,
325
- "learning_rate": 1.6999420126915656e-05,
326
- "loss": 0.8385,
327
- "step": 53
328
- },
329
- {
330
- "epoch": 0.04,
331
- "learning_rate": 1.6999363586708237e-05,
332
- "loss": 0.8164,
333
- "step": 54
334
- },
335
- {
336
- "epoch": 0.04,
337
- "learning_rate": 1.699930441685798e-05,
338
- "loss": 0.8253,
339
- "step": 55
340
- },
341
- {
342
- "epoch": 0.04,
343
- "learning_rate": 1.699924261738319e-05,
344
- "loss": 0.8385,
345
- "step": 56
346
- },
347
- {
348
- "epoch": 0.04,
349
- "learning_rate": 1.699917818830299e-05,
350
- "loss": 0.8133,
351
- "step": 57
352
- },
353
- {
354
- "epoch": 0.04,
355
- "learning_rate": 1.6999111129637313e-05,
356
- "loss": 0.8257,
357
- "step": 58
358
- },
359
- {
360
- "epoch": 0.04,
361
- "learning_rate": 1.6999041441406904e-05,
362
- "loss": 0.8646,
363
- "step": 59
364
- },
365
- {
366
- "epoch": 0.04,
367
- "learning_rate": 1.699896912363333e-05,
368
- "loss": 0.8636,
369
- "step": 60
370
- },
371
- {
372
- "epoch": 0.04,
373
- "learning_rate": 1.6998894176338956e-05,
374
- "loss": 0.8174,
375
- "step": 61
376
- },
377
- {
378
- "epoch": 0.04,
379
- "learning_rate": 1.699881659954698e-05,
380
- "loss": 0.8697,
381
- "step": 62
382
- },
383
- {
384
- "epoch": 0.04,
385
- "learning_rate": 1.6998736393281398e-05,
386
- "loss": 0.8244,
387
- "step": 63
388
- },
389
- {
390
- "epoch": 0.05,
391
- "learning_rate": 1.699865355756703e-05,
392
- "loss": 0.8393,
393
- "step": 64
394
- },
395
- {
396
- "epoch": 0.05,
397
- "eval_loss": 0.8116241097450256,
398
- "eval_runtime": 30.7962,
399
- "eval_samples_per_second": 265.975,
400
- "eval_steps_per_second": 16.625,
401
- "step": 64
402
- },
403
- {
404
- "epoch": 0.05,
405
- "learning_rate": 1.6998568092429495e-05,
406
- "loss": 0.834,
407
- "step": 65
408
- },
409
- {
410
- "epoch": 0.05,
411
- "learning_rate": 1.6998479997895247e-05,
412
- "loss": 0.8292,
413
- "step": 66
414
- },
415
- {
416
- "epoch": 0.05,
417
- "learning_rate": 1.6998389273991536e-05,
418
- "loss": 0.8447,
419
- "step": 67
420
- },
421
- {
422
- "epoch": 0.05,
423
- "learning_rate": 1.699829592074643e-05,
424
- "loss": 0.8214,
425
- "step": 68
426
- },
427
- {
428
- "epoch": 0.05,
429
- "learning_rate": 1.6998199938188817e-05,
430
- "loss": 0.8416,
431
- "step": 69
432
- },
433
- {
434
- "epoch": 0.05,
435
- "learning_rate": 1.6998101326348386e-05,
436
- "loss": 0.8104,
437
- "step": 70
438
- },
439
- {
440
- "epoch": 0.05,
441
- "learning_rate": 1.6998000085255654e-05,
442
- "loss": 0.8263,
443
- "step": 71
444
- },
445
- {
446
- "epoch": 0.05,
447
- "learning_rate": 1.699789621494194e-05,
448
- "loss": 0.8412,
449
- "step": 72
450
- },
451
- {
452
- "epoch": 0.05,
453
- "learning_rate": 1.6997789715439384e-05,
454
- "loss": 0.8024,
455
- "step": 73
456
- },
457
- {
458
- "epoch": 0.05,
459
- "learning_rate": 1.6997680586780933e-05,
460
- "loss": 0.8421,
461
- "step": 74
462
- },
463
- {
464
- "epoch": 0.05,
465
- "learning_rate": 1.699756882900035e-05,
466
- "loss": 0.879,
467
- "step": 75
468
- },
469
- {
470
- "epoch": 0.05,
471
- "learning_rate": 1.6997454442132217e-05,
472
- "loss": 0.8332,
473
- "step": 76
474
- },
475
- {
476
- "epoch": 0.05,
477
- "learning_rate": 1.699733742621192e-05,
478
- "loss": 0.826,
479
- "step": 77
480
- },
481
- {
482
- "epoch": 0.05,
483
- "learning_rate": 1.6997217781275666e-05,
484
- "loss": 0.8455,
485
- "step": 78
486
- },
487
- {
488
- "epoch": 0.06,
489
- "learning_rate": 1.6997095507360467e-05,
490
- "loss": 0.853,
491
- "step": 79
492
- },
493
- {
494
- "epoch": 0.06,
495
- "learning_rate": 1.699697060450416e-05,
496
- "loss": 0.8383,
497
- "step": 80
498
- },
499
- {
500
- "epoch": 0.06,
501
- "learning_rate": 1.6996843072745386e-05,
502
- "loss": 0.8108,
503
- "step": 81
504
- },
505
- {
506
- "epoch": 0.06,
507
- "learning_rate": 1.6996712912123603e-05,
508
- "loss": 0.8451,
509
- "step": 82
510
- },
511
- {
512
- "epoch": 0.06,
513
- "learning_rate": 1.699658012267908e-05,
514
- "loss": 0.8002,
515
- "step": 83
516
- },
517
- {
518
- "epoch": 0.06,
519
- "learning_rate": 1.6996444704452903e-05,
520
- "loss": 0.8447,
521
- "step": 84
522
- },
523
- {
524
- "epoch": 0.06,
525
- "learning_rate": 1.6996306657486972e-05,
526
- "loss": 0.8096,
527
- "step": 85
528
- },
529
- {
530
- "epoch": 0.06,
531
- "learning_rate": 1.699616598182399e-05,
532
- "loss": 0.8056,
533
- "step": 86
534
- },
535
- {
536
- "epoch": 0.06,
537
- "learning_rate": 1.699602267750749e-05,
538
- "loss": 0.8237,
539
- "step": 87
540
- },
541
- {
542
- "epoch": 0.06,
543
- "learning_rate": 1.6995876744581804e-05,
544
- "loss": 0.8593,
545
- "step": 88
546
- },
547
- {
548
- "epoch": 0.06,
549
- "learning_rate": 1.699572818309208e-05,
550
- "loss": 0.8264,
551
- "step": 89
552
- },
553
- {
554
- "epoch": 0.06,
555
- "learning_rate": 1.6995576993084292e-05,
556
- "loss": 0.8252,
557
- "step": 90
558
- },
559
- {
560
- "epoch": 0.06,
561
- "learning_rate": 1.699542317460521e-05,
562
- "loss": 0.8261,
563
- "step": 91
564
- },
565
- {
566
- "epoch": 0.06,
567
- "learning_rate": 1.699526672770242e-05,
568
- "loss": 0.8302,
569
- "step": 92
570
- },
571
- {
572
- "epoch": 0.07,
573
- "learning_rate": 1.6995107652424334e-05,
574
- "loss": 0.8142,
575
- "step": 93
576
- },
577
- {
578
- "epoch": 0.07,
579
- "learning_rate": 1.6994945948820167e-05,
580
- "loss": 0.8608,
581
- "step": 94
582
- },
583
- {
584
- "epoch": 0.07,
585
- "learning_rate": 1.6994781616939947e-05,
586
- "loss": 0.8916,
587
- "step": 95
588
- },
589
- {
590
- "epoch": 0.07,
591
- "learning_rate": 1.6994614656834514e-05,
592
- "loss": 0.7868,
593
- "step": 96
594
- },
595
- {
596
- "epoch": 0.07,
597
- "learning_rate": 1.699444506855553e-05,
598
- "loss": 0.8149,
599
- "step": 97
600
- },
601
- {
602
- "epoch": 0.07,
603
- "learning_rate": 1.6994272852155465e-05,
604
- "loss": 0.8389,
605
- "step": 98
606
- },
607
- {
608
- "epoch": 0.07,
609
- "learning_rate": 1.6994098007687595e-05,
610
- "loss": 0.8483,
611
- "step": 99
612
- },
613
- {
614
- "epoch": 0.07,
615
- "learning_rate": 1.699392053520602e-05,
616
- "loss": 0.8242,
617
- "step": 100
618
- },
619
- {
620
- "epoch": 0.07,
621
- "learning_rate": 1.699374043476565e-05,
622
- "loss": 0.8569,
623
- "step": 101
624
- },
625
- {
626
- "epoch": 0.07,
627
- "learning_rate": 1.6993557706422203e-05,
628
- "loss": 0.8628,
629
- "step": 102
630
- },
631
- {
632
- "epoch": 0.07,
633
- "learning_rate": 1.6993372350232215e-05,
634
- "loss": 0.8084,
635
- "step": 103
636
- },
637
- {
638
- "epoch": 0.07,
639
- "learning_rate": 1.699318436625304e-05,
640
- "loss": 0.8565,
641
- "step": 104
642
- },
643
- {
644
- "epoch": 0.07,
645
- "learning_rate": 1.6992993754542827e-05,
646
- "loss": 0.8014,
647
- "step": 105
648
- },
649
- {
650
- "epoch": 0.07,
651
- "learning_rate": 1.699280051516056e-05,
652
- "loss": 0.8055,
653
- "step": 106
654
- },
655
- {
656
- "epoch": 0.08,
657
- "learning_rate": 1.699260464816602e-05,
658
- "loss": 0.887,
659
- "step": 107
660
- },
661
- {
662
- "epoch": 0.08,
663
- "learning_rate": 1.6992406153619813e-05,
664
- "loss": 0.8289,
665
- "step": 108
666
- },
667
- {
668
- "epoch": 0.08,
669
- "learning_rate": 1.6992205031583348e-05,
670
- "loss": 0.8397,
671
- "step": 109
672
- },
673
- {
674
- "epoch": 0.08,
675
- "learning_rate": 1.699200128211885e-05,
676
- "loss": 0.8056,
677
- "step": 110
678
- },
679
- {
680
- "epoch": 0.08,
681
- "learning_rate": 1.6991794905289355e-05,
682
- "loss": 0.8382,
683
- "step": 111
684
- },
685
- {
686
- "epoch": 0.08,
687
- "learning_rate": 1.6991585901158723e-05,
688
- "loss": 0.8269,
689
- "step": 112
690
- },
691
- {
692
- "epoch": 0.08,
693
- "learning_rate": 1.6991374269791612e-05,
694
- "loss": 0.8216,
695
- "step": 113
696
- },
697
- {
698
- "epoch": 0.08,
699
- "learning_rate": 1.6991160011253498e-05,
700
- "loss": 0.8076,
701
- "step": 114
702
- },
703
- {
704
- "epoch": 0.08,
705
- "learning_rate": 1.6990943125610674e-05,
706
- "loss": 0.8282,
707
- "step": 115
708
- },
709
- {
710
- "epoch": 0.08,
711
- "learning_rate": 1.6990723612930245e-05,
712
- "loss": 0.832,
713
- "step": 116
714
- },
715
- {
716
- "epoch": 0.08,
717
- "learning_rate": 1.6990501473280122e-05,
718
- "loss": 0.8165,
719
- "step": 117
720
- },
721
- {
722
- "epoch": 0.08,
723
- "learning_rate": 1.6990276706729038e-05,
724
- "loss": 0.8087,
725
- "step": 118
726
- },
727
- {
728
- "epoch": 0.08,
729
- "learning_rate": 1.699004931334653e-05,
730
- "loss": 0.8169,
731
- "step": 119
732
- },
733
- {
734
- "epoch": 0.08,
735
- "learning_rate": 1.6989819293202955e-05,
736
- "loss": 0.8212,
737
- "step": 120
738
- },
739
- {
740
- "epoch": 0.09,
741
- "learning_rate": 1.698958664636948e-05,
742
- "loss": 0.8284,
743
- "step": 121
744
- },
745
- {
746
- "epoch": 0.09,
747
- "learning_rate": 1.6989351372918076e-05,
748
- "loss": 0.8465,
749
- "step": 122
750
- },
751
- {
752
- "epoch": 0.09,
753
- "learning_rate": 1.6989113472921546e-05,
754
- "loss": 0.8151,
755
- "step": 123
756
- },
757
- {
758
- "epoch": 0.09,
759
- "learning_rate": 1.698887294645349e-05,
760
- "loss": 0.7995,
761
- "step": 124
762
- },
763
- {
764
- "epoch": 0.09,
765
- "learning_rate": 1.6988629793588324e-05,
766
- "loss": 0.8824,
767
- "step": 125
768
- },
769
- {
770
- "epoch": 0.09,
771
- "learning_rate": 1.6988384014401282e-05,
772
- "loss": 0.7813,
773
- "step": 126
774
- },
775
- {
776
- "epoch": 0.09,
777
- "learning_rate": 1.69881356089684e-05,
778
- "loss": 0.8257,
779
- "step": 127
780
- },
781
- {
782
- "epoch": 0.09,
783
- "learning_rate": 1.698788457736654e-05,
784
- "loss": 0.7901,
785
- "step": 128
786
- },
787
- {
788
- "epoch": 0.09,
789
- "eval_loss": 0.7931999564170837,
790
- "eval_runtime": 30.7629,
791
- "eval_samples_per_second": 266.263,
792
- "eval_steps_per_second": 16.643,
793
- "step": 128
794
- },
795
- {
796
- "epoch": 0.09,
797
- "learning_rate": 1.6987630919673366e-05,
798
- "loss": 0.7926,
799
- "step": 129
800
- },
801
- {
802
- "epoch": 0.09,
803
- "learning_rate": 1.6987374635967355e-05,
804
- "loss": 0.7834,
805
- "step": 130
806
- },
807
- {
808
- "epoch": 0.09,
809
- "learning_rate": 1.6987115726327804e-05,
810
- "loss": 0.8247,
811
- "step": 131
812
- },
813
- {
814
- "epoch": 0.09,
815
- "learning_rate": 1.698685419083481e-05,
816
- "loss": 0.8198,
817
- "step": 132
818
- },
819
- {
820
- "epoch": 0.09,
821
- "learning_rate": 1.6986590029569304e-05,
822
- "loss": 0.7966,
823
- "step": 133
824
- },
825
- {
826
- "epoch": 0.09,
827
- "learning_rate": 1.6986323242613e-05,
828
- "loss": 0.8046,
829
- "step": 134
830
- },
831
- {
832
- "epoch": 0.1,
833
- "learning_rate": 1.6986053830048454e-05,
834
- "loss": 0.8209,
835
- "step": 135
836
- },
837
- {
838
- "epoch": 0.1,
839
- "learning_rate": 1.698578179195901e-05,
840
- "loss": 0.7962,
841
- "step": 136
842
- },
843
- {
844
- "epoch": 0.1,
845
- "learning_rate": 1.698550712842884e-05,
846
- "loss": 0.8746,
847
- "step": 137
848
- },
849
- {
850
- "epoch": 0.1,
851
- "learning_rate": 1.6985229839542924e-05,
852
- "loss": 0.8058,
853
- "step": 138
854
- },
855
- {
856
- "epoch": 0.1,
857
- "learning_rate": 1.6984949925387047e-05,
858
- "loss": 0.8034,
859
- "step": 139
860
- },
861
- {
862
- "epoch": 0.1,
863
- "learning_rate": 1.698466738604782e-05,
864
- "loss": 0.8141,
865
- "step": 140
866
- },
867
- {
868
- "epoch": 0.1,
869
- "learning_rate": 1.6984382221612652e-05,
870
- "loss": 0.7985,
871
- "step": 141
872
- },
873
- {
874
- "epoch": 0.1,
875
- "learning_rate": 1.698409443216977e-05,
876
- "loss": 0.7984,
877
- "step": 142
878
- },
879
- {
880
- "epoch": 0.1,
881
- "learning_rate": 1.6983804017808227e-05,
882
- "loss": 0.8707,
883
- "step": 143
884
- },
885
- {
886
- "epoch": 0.1,
887
- "learning_rate": 1.698351097861786e-05,
888
- "loss": 0.8473,
889
- "step": 144
890
- },
891
- {
892
- "epoch": 0.1,
893
- "learning_rate": 1.698321531468934e-05,
894
- "loss": 0.8104,
895
- "step": 145
896
- },
897
- {
898
- "epoch": 0.1,
899
- "learning_rate": 1.6982917026114144e-05,
900
- "loss": 0.8232,
901
- "step": 146
902
- },
903
- {
904
- "epoch": 0.1,
905
- "learning_rate": 1.6982616112984555e-05,
906
- "loss": 0.8075,
907
- "step": 147
908
- },
909
- {
910
- "epoch": 0.1,
911
- "learning_rate": 1.698231257539368e-05,
912
- "loss": 0.8264,
913
- "step": 148
914
- },
915
- {
916
- "epoch": 0.1,
917
- "learning_rate": 1.698200641343543e-05,
918
- "loss": 0.8066,
919
- "step": 149
920
- },
921
- {
922
- "epoch": 0.11,
923
- "learning_rate": 1.6981697627204525e-05,
924
- "loss": 0.8199,
925
- "step": 150
926
- },
927
- {
928
- "epoch": 0.11,
929
- "learning_rate": 1.6981386216796505e-05,
930
- "loss": 0.8038,
931
- "step": 151
932
- },
933
- {
934
- "epoch": 0.11,
935
- "learning_rate": 1.698107218230772e-05,
936
- "loss": 0.8123,
937
- "step": 152
938
- },
939
- {
940
- "epoch": 0.11,
941
- "learning_rate": 1.6980755523835324e-05,
942
- "loss": 0.8189,
943
- "step": 153
944
- },
945
- {
946
- "epoch": 0.11,
947
- "learning_rate": 1.6980436241477295e-05,
948
- "loss": 0.8062,
949
- "step": 154
950
- },
951
- {
952
- "epoch": 0.11,
953
- "learning_rate": 1.6980114335332414e-05,
954
- "loss": 0.8191,
955
- "step": 155
956
- },
957
- {
958
- "epoch": 0.11,
959
- "learning_rate": 1.6979789805500272e-05,
960
- "loss": 0.7982,
961
- "step": 156
962
- },
963
- {
964
- "epoch": 0.11,
965
- "learning_rate": 1.6979462652081286e-05,
966
- "loss": 0.7912,
967
- "step": 157
968
- },
969
- {
970
- "epoch": 0.11,
971
- "learning_rate": 1.6979132875176666e-05,
972
- "loss": 0.8342,
973
- "step": 158
974
- },
975
- {
976
- "epoch": 0.11,
977
- "learning_rate": 1.697880047488845e-05,
978
- "loss": 0.7968,
979
- "step": 159
980
- },
981
- {
982
- "epoch": 0.11,
983
- "learning_rate": 1.6978465451319474e-05,
984
- "loss": 0.7917,
985
- "step": 160
986
- },
987
- {
988
- "epoch": 0.11,
989
- "learning_rate": 1.6978127804573394e-05,
990
- "loss": 0.8096,
991
- "step": 161
992
- },
993
- {
994
- "epoch": 0.11,
995
- "learning_rate": 1.6977787534754678e-05,
996
- "loss": 0.8001,
997
- "step": 162
998
- },
999
- {
1000
- "epoch": 0.11,
1001
- "learning_rate": 1.69774446419686e-05,
1002
- "loss": 0.7972,
1003
- "step": 163
1004
- },
1005
- {
1006
- "epoch": 0.12,
1007
- "learning_rate": 1.6977099126321253e-05,
1008
- "loss": 0.8092,
1009
- "step": 164
1010
- },
1011
- {
1012
- "epoch": 0.12,
1013
- "learning_rate": 1.697675098791953e-05,
1014
- "loss": 0.8328,
1015
- "step": 165
1016
- },
1017
- {
1018
- "epoch": 0.12,
1019
- "learning_rate": 1.6976400226871147e-05,
1020
- "loss": 0.8235,
1021
- "step": 166
1022
- },
1023
- {
1024
- "epoch": 0.12,
1025
- "learning_rate": 1.6976046843284627e-05,
1026
- "loss": 0.8616,
1027
- "step": 167
1028
- },
1029
- {
1030
- "epoch": 0.12,
1031
- "learning_rate": 1.6975690837269304e-05,
1032
- "loss": 0.7643,
1033
- "step": 168
1034
- },
1035
- {
1036
- "epoch": 0.12,
1037
- "learning_rate": 1.6975332208935324e-05,
1038
- "loss": 0.7986,
1039
- "step": 169
1040
- },
1041
- {
1042
- "epoch": 0.12,
1043
- "learning_rate": 1.697497095839364e-05,
1044
- "loss": 0.8478,
1045
- "step": 170
1046
- },
1047
- {
1048
- "epoch": 0.12,
1049
- "learning_rate": 1.6974607085756025e-05,
1050
- "loss": 0.8107,
1051
- "step": 171
1052
- },
1053
- {
1054
- "epoch": 0.12,
1055
- "learning_rate": 1.697424059113506e-05,
1056
- "loss": 0.8232,
1057
- "step": 172
1058
- },
1059
- {
1060
- "epoch": 0.12,
1061
- "learning_rate": 1.6973871474644132e-05,
1062
- "loss": 0.8082,
1063
- "step": 173
1064
- },
1065
- {
1066
- "epoch": 0.12,
1067
- "learning_rate": 1.6973499736397444e-05,
1068
- "loss": 0.8148,
1069
- "step": 174
1070
- },
1071
- {
1072
- "epoch": 0.12,
1073
- "learning_rate": 1.697312537651001e-05,
1074
- "loss": 0.8161,
1075
- "step": 175
1076
- },
1077
- {
1078
- "epoch": 0.12,
1079
- "learning_rate": 1.6972748395097657e-05,
1080
- "loss": 0.8142,
1081
- "step": 176
1082
- },
1083
- {
1084
- "epoch": 0.12,
1085
- "learning_rate": 1.6972368792277013e-05,
1086
- "loss": 0.8291,
1087
- "step": 177
1088
- },
1089
- {
1090
- "epoch": 0.13,
1091
- "learning_rate": 1.6971986568165528e-05,
1092
- "loss": 0.8054,
1093
- "step": 178
1094
- },
1095
- {
1096
- "epoch": 0.13,
1097
- "learning_rate": 1.6971601722881463e-05,
1098
- "loss": 0.8364,
1099
- "step": 179
1100
- },
1101
- {
1102
- "epoch": 0.13,
1103
- "learning_rate": 1.697121425654388e-05,
1104
- "loss": 0.7836,
1105
- "step": 180
1106
- },
1107
- {
1108
- "epoch": 0.13,
1109
- "learning_rate": 1.6970824169272666e-05,
1110
- "loss": 0.8156,
1111
- "step": 181
1112
- },
1113
- {
1114
- "epoch": 0.13,
1115
- "learning_rate": 1.6970431461188505e-05,
1116
- "loss": 0.8271,
1117
- "step": 182
1118
- },
1119
- {
1120
- "epoch": 0.13,
1121
- "learning_rate": 1.69700361324129e-05,
1122
- "loss": 0.7945,
1123
- "step": 183
1124
- },
1125
- {
1126
- "epoch": 0.13,
1127
- "learning_rate": 1.6969638183068165e-05,
1128
- "loss": 0.8056,
1129
- "step": 184
1130
- },
1131
- {
1132
- "epoch": 0.13,
1133
- "learning_rate": 1.696923761327742e-05,
1134
- "loss": 0.8292,
1135
- "step": 185
1136
- },
1137
- {
1138
- "epoch": 0.13,
1139
- "learning_rate": 1.6968834423164595e-05,
1140
- "loss": 0.8248,
1141
- "step": 186
1142
- },
1143
- {
1144
- "epoch": 0.13,
1145
- "learning_rate": 1.696842861285444e-05,
1146
- "loss": 0.8358,
1147
- "step": 187
1148
- },
1149
- {
1150
- "epoch": 0.13,
1151
- "learning_rate": 1.6968020182472512e-05,
1152
- "loss": 0.8067,
1153
- "step": 188
1154
- },
1155
- {
1156
- "epoch": 0.13,
1157
- "learning_rate": 1.696760913214517e-05,
1158
- "loss": 0.8161,
1159
- "step": 189
1160
- },
1161
- {
1162
- "epoch": 0.13,
1163
- "learning_rate": 1.6967195461999595e-05,
1164
- "loss": 0.7748,
1165
- "step": 190
1166
- },
1167
- {
1168
- "epoch": 0.13,
1169
- "learning_rate": 1.6966779172163767e-05,
1170
- "loss": 0.8015,
1171
- "step": 191
1172
- },
1173
- {
1174
- "epoch": 0.14,
1175
- "learning_rate": 1.696636026276649e-05,
1176
- "loss": 0.8115,
1177
- "step": 192
1178
- },
1179
- {
1180
- "epoch": 0.14,
1181
- "eval_loss": 0.7831856608390808,
1182
- "eval_runtime": 30.7557,
1183
- "eval_samples_per_second": 266.324,
1184
- "eval_steps_per_second": 16.647,
1185
- "step": 192
1186
- },
1187
- {
1188
- "epoch": 0.14,
1189
- "learning_rate": 1.696593873393737e-05,
1190
- "loss": 0.833,
1191
- "step": 193
1192
- },
1193
- {
1194
- "epoch": 0.14,
1195
- "learning_rate": 1.6965514585806825e-05,
1196
- "loss": 0.8177,
1197
- "step": 194
1198
- },
1199
- {
1200
- "epoch": 0.14,
1201
- "learning_rate": 1.696508781850608e-05,
1202
- "loss": 0.8243,
1203
- "step": 195
1204
- },
1205
- {
1206
- "epoch": 0.14,
1207
- "learning_rate": 1.6964658432167176e-05,
1208
- "loss": 0.8172,
1209
- "step": 196
1210
- },
1211
- {
1212
- "epoch": 0.14,
1213
- "learning_rate": 1.6964226426922963e-05,
1214
- "loss": 0.8428,
1215
- "step": 197
1216
- },
1217
- {
1218
- "epoch": 0.14,
1219
- "learning_rate": 1.69637918029071e-05,
1220
- "loss": 0.8082,
1221
- "step": 198
1222
- },
1223
- {
1224
- "epoch": 0.14,
1225
- "learning_rate": 1.6963354560254054e-05,
1226
- "loss": 0.8236,
1227
- "step": 199
1228
- },
1229
- {
1230
- "epoch": 0.14,
1231
- "learning_rate": 1.6962914699099112e-05,
1232
- "loss": 0.7451,
1233
- "step": 200
1234
- },
1235
- {
1236
- "epoch": 0.14,
1237
- "learning_rate": 1.6962472219578356e-05,
1238
- "loss": 0.771,
1239
- "step": 201
1240
- },
1241
- {
1242
- "epoch": 0.14,
1243
- "learning_rate": 1.696202712182869e-05,
1244
- "loss": 0.8074,
1245
- "step": 202
1246
- },
1247
- {
1248
- "epoch": 0.14,
1249
- "learning_rate": 1.6961579405987824e-05,
1250
- "loss": 0.7791,
1251
- "step": 203
1252
- },
1253
- {
1254
- "epoch": 0.14,
1255
- "learning_rate": 1.6961129072194273e-05,
1256
- "loss": 0.7732,
1257
- "step": 204
1258
- },
1259
- {
1260
- "epoch": 0.14,
1261
- "learning_rate": 1.6960676120587377e-05,
1262
- "loss": 0.7845,
1263
- "step": 205
1264
- },
1265
- {
1266
- "epoch": 0.15,
1267
- "learning_rate": 1.6960220551307266e-05,
1268
- "loss": 0.8016,
1269
- "step": 206
1270
- },
1271
- {
1272
- "epoch": 0.15,
1273
- "learning_rate": 1.6959762364494895e-05,
1274
- "loss": 0.8266,
1275
- "step": 207
1276
- },
1277
- {
1278
- "epoch": 0.15,
1279
- "learning_rate": 1.6959301560292025e-05,
1280
- "loss": 0.7897,
1281
- "step": 208
1282
- },
1283
- {
1284
- "epoch": 0.15,
1285
- "learning_rate": 1.6958838138841224e-05,
1286
- "loss": 0.8058,
1287
- "step": 209
1288
- },
1289
- {
1290
- "epoch": 0.15,
1291
- "learning_rate": 1.695837210028587e-05,
1292
- "loss": 0.8034,
1293
- "step": 210
1294
- },
1295
- {
1296
- "epoch": 0.15,
1297
- "learning_rate": 1.695790344477015e-05,
1298
- "loss": 0.7905,
1299
- "step": 211
1300
- },
1301
- {
1302
- "epoch": 0.15,
1303
- "learning_rate": 1.6957432172439068e-05,
1304
- "loss": 0.8175,
1305
- "step": 212
1306
- },
1307
- {
1308
- "epoch": 0.15,
1309
- "learning_rate": 1.695695828343843e-05,
1310
- "loss": 0.8155,
1311
- "step": 213
1312
- },
1313
- {
1314
- "epoch": 0.15,
1315
- "learning_rate": 1.6956481777914853e-05,
1316
- "loss": 0.7877,
1317
- "step": 214
1318
- },
1319
- {
1320
- "epoch": 0.15,
1321
- "learning_rate": 1.6956002656015766e-05,
1322
- "loss": 0.847,
1323
- "step": 215
1324
- },
1325
- {
1326
- "epoch": 0.15,
1327
- "learning_rate": 1.6955520917889403e-05,
1328
- "loss": 0.8201,
1329
- "step": 216
1330
- },
1331
- {
1332
- "epoch": 0.15,
1333
- "learning_rate": 1.6955036563684815e-05,
1334
- "loss": 0.7787,
1335
- "step": 217
1336
- },
1337
- {
1338
- "epoch": 0.15,
1339
- "learning_rate": 1.695454959355186e-05,
1340
- "loss": 0.8092,
1341
- "step": 218
1342
- },
1343
- {
1344
- "epoch": 0.15,
1345
- "learning_rate": 1.695406000764119e-05,
1346
- "loss": 0.7894,
1347
- "step": 219
1348
- },
1349
- {
1350
- "epoch": 0.15,
1351
- "learning_rate": 1.695356780610429e-05,
1352
- "loss": 0.7949,
1353
- "step": 220
1354
- },
1355
- {
1356
- "epoch": 0.16,
1357
- "learning_rate": 1.6953072989093443e-05,
1358
- "loss": 0.8033,
1359
- "step": 221
1360
- },
1361
- {
1362
- "epoch": 0.16,
1363
- "learning_rate": 1.6952575556761742e-05,
1364
- "loss": 0.8124,
1365
- "step": 222
1366
- },
1367
- {
1368
- "epoch": 0.16,
1369
- "learning_rate": 1.6952075509263084e-05,
1370
- "loss": 0.7793,
1371
- "step": 223
1372
- },
1373
- {
1374
- "epoch": 0.16,
1375
- "learning_rate": 1.6951572846752185e-05,
1376
- "loss": 0.7998,
1377
- "step": 224
1378
- },
1379
- {
1380
- "epoch": 0.16,
1381
- "learning_rate": 1.6951067569384564e-05,
1382
- "loss": 0.7997,
1383
- "step": 225
1384
- },
1385
- {
1386
- "epoch": 0.16,
1387
- "learning_rate": 1.6950559677316548e-05,
1388
- "loss": 0.8083,
1389
- "step": 226
1390
- },
1391
- {
1392
- "epoch": 0.16,
1393
- "learning_rate": 1.695004917070528e-05,
1394
- "loss": 0.8149,
1395
- "step": 227
1396
- },
1397
- {
1398
- "epoch": 0.16,
1399
- "learning_rate": 1.69495360497087e-05,
1400
- "loss": 0.8128,
1401
- "step": 228
1402
- },
1403
- {
1404
- "epoch": 0.16,
1405
- "learning_rate": 1.6949020314485574e-05,
1406
- "loss": 0.8051,
1407
- "step": 229
1408
- },
1409
- {
1410
- "epoch": 0.16,
1411
- "learning_rate": 1.6948501965195458e-05,
1412
- "loss": 0.793,
1413
- "step": 230
1414
- },
1415
- {
1416
- "epoch": 0.16,
1417
- "learning_rate": 1.694798100199873e-05,
1418
- "loss": 0.8074,
1419
- "step": 231
1420
- },
1421
- {
1422
- "epoch": 0.16,
1423
- "learning_rate": 1.6947457425056568e-05,
1424
- "loss": 0.7519,
1425
- "step": 232
1426
- },
1427
- {
1428
- "epoch": 0.16,
1429
- "learning_rate": 1.694693123453097e-05,
1430
- "loss": 0.7624,
1431
- "step": 233
1432
- },
1433
- {
1434
- "epoch": 0.16,
1435
- "learning_rate": 1.694640243058473e-05,
1436
- "loss": 0.7876,
1437
- "step": 234
1438
- },
1439
- {
1440
- "epoch": 0.17,
1441
- "learning_rate": 1.6945871013381458e-05,
1442
- "loss": 0.8178,
1443
- "step": 235
1444
- },
1445
- {
1446
- "epoch": 0.17,
1447
- "learning_rate": 1.694533698308557e-05,
1448
- "loss": 0.7908,
1449
- "step": 236
1450
- },
1451
- {
1452
- "epoch": 0.17,
1453
- "learning_rate": 1.694480033986229e-05,
1454
- "loss": 0.8134,
1455
- "step": 237
1456
- },
1457
- {
1458
- "epoch": 0.17,
1459
- "learning_rate": 1.6944261083877654e-05,
1460
- "loss": 0.8117,
1461
- "step": 238
1462
- },
1463
- {
1464
- "epoch": 0.17,
1465
- "learning_rate": 1.6943719215298506e-05,
1466
- "loss": 0.8114,
1467
- "step": 239
1468
- },
1469
- {
1470
- "epoch": 0.17,
1471
- "learning_rate": 1.6943174734292492e-05,
1472
- "loss": 0.7986,
1473
- "step": 240
1474
- },
1475
- {
1476
- "epoch": 0.17,
1477
- "learning_rate": 1.6942627641028073e-05,
1478
- "loss": 0.7622,
1479
- "step": 241
1480
- },
1481
- {
1482
- "epoch": 0.17,
1483
- "learning_rate": 1.6942077935674514e-05,
1484
- "loss": 0.8067,
1485
- "step": 242
1486
- },
1487
- {
1488
- "epoch": 0.17,
1489
- "learning_rate": 1.6941525618401894e-05,
1490
- "loss": 0.7817,
1491
- "step": 243
1492
- },
1493
- {
1494
- "epoch": 0.17,
1495
- "learning_rate": 1.694097068938109e-05,
1496
- "loss": 0.817,
1497
- "step": 244
1498
- },
1499
- {
1500
- "epoch": 0.17,
1501
- "learning_rate": 1.6940413148783794e-05,
1502
- "loss": 0.7959,
1503
- "step": 245
1504
- },
1505
- {
1506
- "epoch": 0.17,
1507
- "learning_rate": 1.693985299678251e-05,
1508
- "loss": 0.828,
1509
- "step": 246
1510
- },
1511
- {
1512
- "epoch": 0.17,
1513
- "learning_rate": 1.6939290233550543e-05,
1514
- "loss": 0.7632,
1515
- "step": 247
1516
- },
1517
- {
1518
- "epoch": 0.17,
1519
- "learning_rate": 1.6938724859262002e-05,
1520
- "loss": 0.8183,
1521
- "step": 248
1522
- },
1523
- {
1524
- "epoch": 0.18,
1525
- "learning_rate": 1.693815687409182e-05,
1526
- "loss": 0.8055,
1527
- "step": 249
1528
- },
1529
- {
1530
- "epoch": 0.18,
1531
- "learning_rate": 1.6937586278215724e-05,
1532
- "loss": 0.8306,
1533
- "step": 250
1534
- },
1535
- {
1536
- "epoch": 0.18,
1537
- "learning_rate": 1.6937013071810247e-05,
1538
- "loss": 0.7819,
1539
- "step": 251
1540
- },
1541
- {
1542
- "epoch": 0.18,
1543
- "learning_rate": 1.693643725505274e-05,
1544
- "loss": 0.8061,
1545
- "step": 252
1546
- },
1547
- {
1548
- "epoch": 0.18,
1549
- "learning_rate": 1.6935858828121354e-05,
1550
- "loss": 0.7467,
1551
- "step": 253
1552
- },
1553
- {
1554
- "epoch": 0.18,
1555
- "learning_rate": 1.6935277791195055e-05,
1556
- "loss": 0.7992,
1557
- "step": 254
1558
- },
1559
- {
1560
- "epoch": 0.18,
1561
- "learning_rate": 1.6934694144453608e-05,
1562
- "loss": 0.7991,
1563
- "step": 255
1564
- },
1565
- {
1566
- "epoch": 0.18,
1567
- "learning_rate": 1.693410788807759e-05,
1568
- "loss": 0.8025,
1569
- "step": 256
1570
- },
1571
- {
1572
- "epoch": 0.18,
1573
- "eval_loss": 0.7759588956832886,
1574
- "eval_runtime": 30.747,
1575
- "eval_samples_per_second": 266.4,
1576
- "eval_steps_per_second": 16.652,
1577
- "step": 256
1578
- },
1579
- {
1580
- "epoch": 0.18,
1581
- "learning_rate": 1.693351902224838e-05,
1582
- "loss": 0.7956,
1583
- "step": 257
1584
- },
1585
- {
1586
- "epoch": 0.18,
1587
- "learning_rate": 1.693292754714818e-05,
1588
- "loss": 0.8035,
1589
- "step": 258
1590
- },
1591
- {
1592
- "epoch": 0.18,
1593
- "learning_rate": 1.6932333462959973e-05,
1594
- "loss": 0.8116,
1595
- "step": 259
1596
- },
1597
- {
1598
- "epoch": 0.18,
1599
- "learning_rate": 1.693173676986758e-05,
1600
- "loss": 0.8157,
1601
- "step": 260
1602
- },
1603
- {
1604
- "epoch": 0.18,
1605
- "learning_rate": 1.69311374680556e-05,
1606
- "loss": 0.8008,
1607
- "step": 261
1608
- },
1609
- {
1610
- "epoch": 0.18,
1611
- "learning_rate": 1.6930535557709463e-05,
1612
- "loss": 0.7822,
1613
- "step": 262
1614
- },
1615
- {
1616
- "epoch": 0.19,
1617
- "learning_rate": 1.692993103901539e-05,
1618
- "loss": 0.7952,
1619
- "step": 263
1620
- },
1621
- {
1622
- "epoch": 0.19,
1623
- "learning_rate": 1.692932391216042e-05,
1624
- "loss": 0.8231,
1625
- "step": 264
1626
- },
1627
- {
1628
- "epoch": 0.19,
1629
- "learning_rate": 1.6928714177332386e-05,
1630
- "loss": 0.8292,
1631
- "step": 265
1632
- },
1633
- {
1634
- "epoch": 0.19,
1635
- "learning_rate": 1.6928101834719942e-05,
1636
- "loss": 0.8161,
1637
- "step": 266
1638
- },
1639
- {
1640
- "epoch": 0.19,
1641
- "learning_rate": 1.692748688451254e-05,
1642
- "loss": 0.8237,
1643
- "step": 267
1644
- },
1645
- {
1646
- "epoch": 0.19,
1647
- "learning_rate": 1.6926869326900444e-05,
1648
- "loss": 0.8355,
1649
- "step": 268
1650
- },
1651
- {
1652
- "epoch": 0.19,
1653
- "learning_rate": 1.6926249162074718e-05,
1654
- "loss": 0.8237,
1655
- "step": 269
1656
- },
1657
- {
1658
- "epoch": 0.19,
1659
- "learning_rate": 1.6925626390227236e-05,
1660
- "loss": 0.8091,
1661
- "step": 270
1662
- },
1663
- {
1664
- "epoch": 0.19,
1665
- "learning_rate": 1.6925001011550684e-05,
1666
- "loss": 0.7625,
1667
- "step": 271
1668
- },
1669
- {
1670
- "epoch": 0.19,
1671
- "learning_rate": 1.6924373026238546e-05,
1672
- "loss": 0.793,
1673
- "step": 272
1674
- },
1675
- {
1676
- "epoch": 0.19,
1677
- "learning_rate": 1.6923742434485117e-05,
1678
- "loss": 0.798,
1679
- "step": 273
1680
- },
1681
- {
1682
- "epoch": 0.19,
1683
- "learning_rate": 1.69231092364855e-05,
1684
- "loss": 0.7903,
1685
- "step": 274
1686
- },
1687
- {
1688
- "epoch": 0.19,
1689
- "learning_rate": 1.69224734324356e-05,
1690
- "loss": 0.8097,
1691
- "step": 275
1692
- },
1693
- {
1694
- "epoch": 0.19,
1695
- "learning_rate": 1.6921835022532127e-05,
1696
- "loss": 0.7645,
1697
- "step": 276
1698
- },
1699
- {
1700
- "epoch": 0.2,
1701
- "learning_rate": 1.692119400697261e-05,
1702
- "loss": 0.7728,
1703
- "step": 277
1704
- },
1705
- {
1706
- "epoch": 0.2,
1707
- "learning_rate": 1.6920550385955362e-05,
1708
- "loss": 0.7921,
1709
- "step": 278
1710
- },
1711
- {
1712
- "epoch": 0.2,
1713
- "learning_rate": 1.6919904159679522e-05,
1714
- "loss": 0.7963,
1715
- "step": 279
1716
- },
1717
- {
1718
- "epoch": 0.2,
1719
- "learning_rate": 1.6919255328345027e-05,
1720
- "loss": 0.7704,
1721
- "step": 280
1722
- },
1723
- {
1724
- "epoch": 0.2,
1725
- "learning_rate": 1.6918603892152623e-05,
1726
- "loss": 0.8011,
1727
- "step": 281
1728
- },
1729
- {
1730
- "epoch": 0.2,
1731
- "learning_rate": 1.6917949851303857e-05,
1732
- "loss": 0.8388,
1733
- "step": 282
1734
- },
1735
- {
1736
- "epoch": 0.2,
1737
- "learning_rate": 1.691729320600109e-05,
1738
- "loss": 0.7913,
1739
- "step": 283
1740
- },
1741
- {
1742
- "epoch": 0.2,
1743
- "learning_rate": 1.691663395644747e-05,
1744
- "loss": 0.793,
1745
- "step": 284
1746
- },
1747
- {
1748
- "epoch": 0.2,
1749
- "learning_rate": 1.6915972102846976e-05,
1750
- "loss": 0.8045,
1751
- "step": 285
1752
- },
1753
- {
1754
- "epoch": 0.2,
1755
- "learning_rate": 1.6915307645404377e-05,
1756
- "loss": 0.7984,
1757
- "step": 286
1758
- },
1759
- {
1760
- "epoch": 0.2,
1761
- "learning_rate": 1.6914640584325254e-05,
1762
- "loss": 0.775,
1763
- "step": 287
1764
- },
1765
- {
1766
- "epoch": 0.2,
1767
- "learning_rate": 1.6913970919815987e-05,
1768
- "loss": 0.7898,
1769
- "step": 288
1770
- },
1771
- {
1772
- "epoch": 0.2,
1773
- "learning_rate": 1.6913298652083768e-05,
1774
- "loss": 0.7951,
1775
- "step": 289
1776
- },
1777
- {
1778
- "epoch": 0.2,
1779
- "learning_rate": 1.691262378133659e-05,
1780
- "loss": 0.8241,
1781
- "step": 290
1782
- },
1783
- {
1784
- "epoch": 0.2,
1785
- "learning_rate": 1.6911946307783257e-05,
1786
- "loss": 0.8118,
1787
- "step": 291
1788
- },
1789
- {
1790
- "epoch": 0.21,
1791
- "learning_rate": 1.691126623163337e-05,
1792
- "loss": 0.8119,
1793
- "step": 292
1794
- },
1795
- {
1796
- "epoch": 0.21,
1797
- "learning_rate": 1.6910583553097343e-05,
1798
- "loss": 0.8161,
1799
- "step": 293
1800
- },
1801
- {
1802
- "epoch": 0.21,
1803
- "learning_rate": 1.6909898272386392e-05,
1804
- "loss": 0.8225,
1805
- "step": 294
1806
- },
1807
- {
1808
- "epoch": 0.21,
1809
- "learning_rate": 1.6909210389712532e-05,
1810
- "loss": 0.7714,
1811
- "step": 295
1812
- },
1813
- {
1814
- "epoch": 0.21,
1815
- "learning_rate": 1.6908519905288598e-05,
1816
- "loss": 0.814,
1817
- "step": 296
1818
- },
1819
- {
1820
- "epoch": 0.21,
1821
- "learning_rate": 1.6907826819328216e-05,
1822
- "loss": 0.8049,
1823
- "step": 297
1824
- },
1825
- {
1826
- "epoch": 0.21,
1827
- "learning_rate": 1.690713113204582e-05,
1828
- "loss": 0.7944,
1829
- "step": 298
1830
- },
1831
- {
1832
- "epoch": 0.21,
1833
- "learning_rate": 1.6906432843656652e-05,
1834
- "loss": 0.7852,
1835
- "step": 299
1836
- },
1837
- {
1838
- "epoch": 0.21,
1839
- "learning_rate": 1.6905731954376763e-05,
1840
- "loss": 0.8208,
1841
- "step": 300
1842
- },
1843
- {
1844
- "epoch": 0.21,
1845
- "learning_rate": 1.6905028464422996e-05,
1846
- "loss": 0.7889,
1847
- "step": 301
1848
- },
1849
- {
1850
- "epoch": 0.21,
1851
- "learning_rate": 1.690432237401301e-05,
1852
- "loss": 0.83,
1853
- "step": 302
1854
- },
1855
- {
1856
- "epoch": 0.21,
1857
- "learning_rate": 1.6903613683365264e-05,
1858
- "loss": 0.79,
1859
- "step": 303
1860
- },
1861
- {
1862
- "epoch": 0.21,
1863
- "learning_rate": 1.690290239269902e-05,
1864
- "loss": 0.7996,
1865
- "step": 304
1866
- },
1867
- {
1868
- "epoch": 0.21,
1869
- "learning_rate": 1.6902188502234345e-05,
1870
- "loss": 0.8154,
1871
- "step": 305
1872
- },
1873
- {
1874
- "epoch": 0.22,
1875
- "learning_rate": 1.690147201219212e-05,
1876
- "loss": 0.7931,
1877
- "step": 306
1878
- },
1879
- {
1880
- "epoch": 0.22,
1881
- "learning_rate": 1.6900752922794008e-05,
1882
- "loss": 0.8305,
1883
- "step": 307
1884
- },
1885
- {
1886
- "epoch": 0.22,
1887
- "learning_rate": 1.6900031234262502e-05,
1888
- "loss": 0.8065,
1889
- "step": 308
1890
- },
1891
- {
1892
- "epoch": 0.22,
1893
- "learning_rate": 1.6899306946820885e-05,
1894
- "loss": 0.8157,
1895
- "step": 309
1896
- },
1897
- {
1898
- "epoch": 0.22,
1899
- "learning_rate": 1.689858006069324e-05,
1900
- "loss": 0.8263,
1901
- "step": 310
1902
- },
1903
- {
1904
- "epoch": 0.22,
1905
- "learning_rate": 1.6897850576104464e-05,
1906
- "loss": 0.7927,
1907
- "step": 311
1908
- },
1909
- {
1910
- "epoch": 0.22,
1911
- "learning_rate": 1.6897118493280262e-05,
1912
- "loss": 0.8144,
1913
- "step": 312
1914
- },
1915
- {
1916
- "epoch": 0.22,
1917
- "learning_rate": 1.6896383812447125e-05,
1918
- "loss": 0.8039,
1919
- "step": 313
1920
- },
1921
- {
1922
- "epoch": 0.22,
1923
- "learning_rate": 1.689564653383236e-05,
1924
- "loss": 0.7707,
1925
- "step": 314
1926
- },
1927
- {
1928
- "epoch": 0.22,
1929
- "learning_rate": 1.689490665766408e-05,
1930
- "loss": 0.7944,
1931
- "step": 315
1932
- },
1933
- {
1934
- "epoch": 0.22,
1935
- "learning_rate": 1.6894164184171197e-05,
1936
- "loss": 0.8215,
1937
- "step": 316
1938
- },
1939
- {
1940
- "epoch": 0.22,
1941
- "learning_rate": 1.689341911358342e-05,
1942
- "loss": 0.8066,
1943
- "step": 317
1944
- },
1945
- {
1946
- "epoch": 0.22,
1947
- "learning_rate": 1.6892671446131277e-05,
1948
- "loss": 0.7923,
1949
- "step": 318
1950
- },
1951
- {
1952
- "epoch": 0.22,
1953
- "learning_rate": 1.6891921182046087e-05,
1954
- "loss": 0.7741,
1955
- "step": 319
1956
- },
1957
- {
1958
- "epoch": 0.23,
1959
- "learning_rate": 1.6891168321559976e-05,
1960
- "loss": 0.777,
1961
- "step": 320
1962
- },
1963
- {
1964
- "epoch": 0.23,
1965
- "eval_loss": 0.7704695463180542,
1966
- "eval_runtime": 30.755,
1967
- "eval_samples_per_second": 266.33,
1968
- "eval_steps_per_second": 16.648,
1969
- "step": 320
1970
- },
1971
- {
1972
- "epoch": 0.23,
1973
- "learning_rate": 1.6890412864905874e-05,
1974
- "loss": 0.7892,
1975
- "step": 321
1976
- },
1977
- {
1978
- "epoch": 0.23,
1979
- "learning_rate": 1.6889654812317518e-05,
1980
- "loss": 0.7889,
1981
- "step": 322
1982
- },
1983
- {
1984
- "epoch": 0.23,
1985
- "learning_rate": 1.6888894164029435e-05,
1986
- "loss": 0.7891,
1987
- "step": 323
1988
- },
1989
- {
1990
- "epoch": 0.23,
1991
- "learning_rate": 1.6888130920276974e-05,
1992
- "loss": 0.7871,
1993
- "step": 324
1994
- },
1995
- {
1996
- "epoch": 0.23,
1997
- "learning_rate": 1.6887365081296272e-05,
1998
- "loss": 0.805,
1999
- "step": 325
2000
- },
2001
- {
2002
- "epoch": 0.23,
2003
- "learning_rate": 1.6886596647324277e-05,
2004
- "loss": 0.8071,
2005
- "step": 326
2006
- },
2007
- {
2008
- "epoch": 0.23,
2009
- "learning_rate": 1.688582561859873e-05,
2010
- "loss": 0.7928,
2011
- "step": 327
2012
- },
2013
- {
2014
- "epoch": 0.23,
2015
- "learning_rate": 1.6885051995358192e-05,
2016
- "loss": 0.7882,
2017
- "step": 328
2018
- },
2019
- {
2020
- "epoch": 0.23,
2021
- "learning_rate": 1.6884275777842013e-05,
2022
- "loss": 0.8244,
2023
- "step": 329
2024
- },
2025
- {
2026
- "epoch": 0.23,
2027
- "learning_rate": 1.6883496966290342e-05,
2028
- "loss": 0.8312,
2029
- "step": 330
2030
- },
2031
- {
2032
- "epoch": 0.23,
2033
- "learning_rate": 1.688271556094415e-05,
2034
- "loss": 0.7886,
2035
- "step": 331
2036
- },
2037
- {
2038
- "epoch": 0.23,
2039
- "learning_rate": 1.6881931562045185e-05,
2040
- "loss": 0.804,
2041
- "step": 332
2042
- },
2043
- {
2044
- "epoch": 0.23,
2045
- "learning_rate": 1.688114496983602e-05,
2046
- "loss": 0.7902,
2047
- "step": 333
2048
- },
2049
- {
2050
- "epoch": 0.24,
2051
- "learning_rate": 1.688035578456002e-05,
2052
- "loss": 0.8037,
2053
- "step": 334
2054
- },
2055
- {
2056
- "epoch": 0.24,
2057
- "learning_rate": 1.6879564006461352e-05,
2058
- "loss": 0.8034,
2059
- "step": 335
2060
- },
2061
- {
2062
- "epoch": 0.24,
2063
- "learning_rate": 1.6878769635784984e-05,
2064
- "loss": 0.8324,
2065
- "step": 336
2066
- },
2067
- {
2068
- "epoch": 0.24,
2069
- "learning_rate": 1.687797267277669e-05,
2070
- "loss": 0.8136,
2071
- "step": 337
2072
- },
2073
- {
2074
- "epoch": 0.24,
2075
- "learning_rate": 1.6877173117683052e-05,
2076
- "loss": 0.7752,
2077
- "step": 338
2078
- },
2079
- {
2080
- "epoch": 0.24,
2081
- "learning_rate": 1.6876370970751435e-05,
2082
- "loss": 0.7666,
2083
- "step": 339
2084
- },
2085
- {
2086
- "epoch": 0.24,
2087
- "learning_rate": 1.6875566232230026e-05,
2088
- "loss": 0.8252,
2089
- "step": 340
2090
- },
2091
- {
2092
- "epoch": 0.24,
2093
- "learning_rate": 1.6874758902367802e-05,
2094
- "loss": 0.8021,
2095
- "step": 341
2096
- },
2097
- {
2098
- "epoch": 0.24,
2099
- "learning_rate": 1.6873948981414544e-05,
2100
- "loss": 0.8147,
2101
- "step": 342
2102
- },
2103
- {
2104
- "epoch": 0.24,
2105
- "learning_rate": 1.6873136469620837e-05,
2106
- "loss": 0.8018,
2107
- "step": 343
2108
- },
2109
- {
2110
- "epoch": 0.24,
2111
- "learning_rate": 1.6872321367238068e-05,
2112
- "loss": 0.7716,
2113
- "step": 344
2114
- },
2115
- {
2116
- "epoch": 0.24,
2117
- "learning_rate": 1.6871503674518422e-05,
2118
- "loss": 0.8231,
2119
- "step": 345
2120
- },
2121
- {
2122
- "epoch": 0.24,
2123
- "learning_rate": 1.687068339171489e-05,
2124
- "loss": 0.7701,
2125
- "step": 346
2126
- },
2127
- {
2128
- "epoch": 0.24,
2129
- "learning_rate": 1.686986051908126e-05,
2130
- "loss": 0.7897,
2131
- "step": 347
2132
- },
2133
- {
2134
- "epoch": 0.25,
2135
- "learning_rate": 1.686903505687212e-05,
2136
- "loss": 0.7866,
2137
- "step": 348
2138
- },
2139
- {
2140
- "epoch": 0.25,
2141
- "learning_rate": 1.6868207005342865e-05,
2142
- "loss": 0.7816,
2143
- "step": 349
2144
- },
2145
- {
2146
- "epoch": 0.25,
2147
- "learning_rate": 1.6867376364749692e-05,
2148
- "loss": 0.8047,
2149
- "step": 350
2150
- },
2151
- {
2152
- "epoch": 0.25,
2153
- "learning_rate": 1.6866543135349586e-05,
2154
- "loss": 0.7958,
2155
- "step": 351
2156
- },
2157
- {
2158
- "epoch": 0.25,
2159
- "learning_rate": 1.686570731740035e-05,
2160
- "loss": 0.7998,
2161
- "step": 352
2162
- },
2163
- {
2164
- "epoch": 0.25,
2165
- "learning_rate": 1.686486891116058e-05,
2166
- "loss": 0.8225,
2167
- "step": 353
2168
- },
2169
- {
2170
- "epoch": 0.25,
2171
- "learning_rate": 1.6864027916889666e-05,
2172
- "loss": 0.7708,
2173
- "step": 354
2174
- },
2175
- {
2176
- "epoch": 0.25,
2177
- "learning_rate": 1.6863184334847812e-05,
2178
- "loss": 0.7555,
2179
- "step": 355
2180
- },
2181
- {
2182
- "epoch": 0.25,
2183
- "learning_rate": 1.6862338165296013e-05,
2184
- "loss": 0.7811,
2185
- "step": 356
2186
- },
2187
- {
2188
- "epoch": 0.25,
2189
- "learning_rate": 1.6861489408496077e-05,
2190
- "loss": 0.8011,
2191
- "step": 357
2192
- },
2193
- {
2194
- "epoch": 0.25,
2195
- "learning_rate": 1.686063806471059e-05,
2196
- "loss": 0.7982,
2197
- "step": 358
2198
- },
2199
- {
2200
- "epoch": 0.25,
2201
- "learning_rate": 1.685978413420296e-05,
2202
- "loss": 0.7498,
2203
- "step": 359
2204
- },
2205
- {
2206
- "epoch": 0.25,
2207
- "learning_rate": 1.685892761723738e-05,
2208
- "loss": 0.8083,
2209
- "step": 360
2210
- },
2211
- {
2212
- "epoch": 0.25,
2213
- "learning_rate": 1.685806851407886e-05,
2214
- "loss": 0.7749,
2215
- "step": 361
2216
- },
2217
- {
2218
- "epoch": 0.25,
2219
- "learning_rate": 1.6857206824993192e-05,
2220
- "loss": 0.8077,
2221
- "step": 362
2222
- },
2223
- {
2224
- "epoch": 0.26,
2225
- "learning_rate": 1.6856342550246983e-05,
2226
- "loss": 0.7787,
2227
- "step": 363
2228
- },
2229
- {
2230
- "epoch": 0.26,
2231
- "learning_rate": 1.685547569010763e-05,
2232
- "loss": 0.8639,
2233
- "step": 364
2234
- },
2235
- {
2236
- "epoch": 0.26,
2237
- "learning_rate": 1.6854606244843333e-05,
2238
- "loss": 0.7871,
2239
- "step": 365
2240
- },
2241
- {
2242
- "epoch": 0.26,
2243
- "learning_rate": 1.685373421472309e-05,
2244
- "loss": 0.7694,
2245
- "step": 366
2246
- },
2247
- {
2248
- "epoch": 0.26,
2249
- "learning_rate": 1.6852859600016707e-05,
2250
- "loss": 0.7757,
2251
- "step": 367
2252
- },
2253
- {
2254
- "epoch": 0.26,
2255
- "learning_rate": 1.685198240099478e-05,
2256
- "loss": 0.8223,
2257
- "step": 368
2258
- },
2259
- {
2260
- "epoch": 0.26,
2261
- "learning_rate": 1.6851102617928706e-05,
2262
- "loss": 0.7738,
2263
- "step": 369
2264
- },
2265
- {
2266
- "epoch": 0.26,
2267
- "learning_rate": 1.685022025109069e-05,
2268
- "loss": 0.7939,
2269
- "step": 370
2270
- },
2271
- {
2272
- "epoch": 0.26,
2273
- "learning_rate": 1.6849335300753726e-05,
2274
- "loss": 0.8224,
2275
- "step": 371
2276
- },
2277
- {
2278
- "epoch": 0.26,
2279
- "learning_rate": 1.684844776719161e-05,
2280
- "loss": 0.7705,
2281
- "step": 372
2282
- },
2283
- {
2284
- "epoch": 0.26,
2285
- "learning_rate": 1.684755765067894e-05,
2286
- "loss": 0.798,
2287
- "step": 373
2288
- },
2289
- {
2290
- "epoch": 0.26,
2291
- "learning_rate": 1.6846664951491114e-05,
2292
- "loss": 0.8298,
2293
- "step": 374
2294
- },
2295
- {
2296
- "epoch": 0.26,
2297
- "learning_rate": 1.6845769669904323e-05,
2298
- "loss": 0.7963,
2299
- "step": 375
2300
- },
2301
- {
2302
- "epoch": 0.26,
2303
- "learning_rate": 1.6844871806195564e-05,
2304
- "loss": 0.769,
2305
- "step": 376
2306
- },
2307
- {
2308
- "epoch": 0.27,
2309
- "learning_rate": 1.6843971360642628e-05,
2310
- "loss": 0.8088,
2311
- "step": 377
2312
- },
2313
- {
2314
- "epoch": 0.27,
2315
- "learning_rate": 1.6843068333524106e-05,
2316
- "loss": 0.7776,
2317
- "step": 378
2318
- },
2319
- {
2320
- "epoch": 0.27,
2321
- "learning_rate": 1.684216272511939e-05,
2322
- "loss": 0.7909,
2323
- "step": 379
2324
- },
2325
- {
2326
- "epoch": 0.27,
2327
- "learning_rate": 1.6841254535708666e-05,
2328
- "loss": 0.8098,
2329
- "step": 380
2330
- },
2331
- {
2332
- "epoch": 0.27,
2333
- "learning_rate": 1.6840343765572924e-05,
2334
- "loss": 0.7935,
2335
- "step": 381
2336
- },
2337
- {
2338
- "epoch": 0.27,
2339
- "learning_rate": 1.683943041499395e-05,
2340
- "loss": 0.7797,
2341
- "step": 382
2342
- },
2343
- {
2344
- "epoch": 0.27,
2345
- "learning_rate": 1.6838514484254326e-05,
2346
- "loss": 0.7852,
2347
- "step": 383
2348
- },
2349
- {
2350
- "epoch": 0.27,
2351
- "learning_rate": 1.6837595973637433e-05,
2352
- "loss": 0.7819,
2353
- "step": 384
2354
- },
2355
- {
2356
- "epoch": 0.27,
2357
- "eval_loss": 0.7658857107162476,
2358
- "eval_runtime": 30.7762,
2359
- "eval_samples_per_second": 266.147,
2360
- "eval_steps_per_second": 16.636,
2361
- "step": 384
2362
- },
2363
- {
2364
- "epoch": 0.27,
2365
- "learning_rate": 1.683667488342746e-05,
2366
- "loss": 0.7761,
2367
- "step": 385
2368
- },
2369
- {
2370
- "epoch": 0.27,
2371
- "learning_rate": 1.6835751213909377e-05,
2372
- "loss": 0.7965,
2373
- "step": 386
2374
- },
2375
- {
2376
- "epoch": 0.27,
2377
- "learning_rate": 1.6834824965368965e-05,
2378
- "loss": 0.7995,
2379
- "step": 387
2380
- },
2381
- {
2382
- "epoch": 0.27,
2383
- "learning_rate": 1.683389613809279e-05,
2384
- "loss": 0.8347,
2385
- "step": 388
2386
- },
2387
- {
2388
- "epoch": 0.27,
2389
- "learning_rate": 1.683296473236824e-05,
2390
- "loss": 0.8372,
2391
- "step": 389
2392
- },
2393
- {
2394
- "epoch": 0.27,
2395
- "learning_rate": 1.683203074848348e-05,
2396
- "loss": 0.7939,
2397
- "step": 390
2398
- },
2399
- {
2400
- "epoch": 0.28,
2401
- "learning_rate": 1.683109418672747e-05,
2402
- "loss": 0.8014,
2403
- "step": 391
2404
- },
2405
- {
2406
- "epoch": 0.28,
2407
- "learning_rate": 1.6830155047389983e-05,
2408
- "loss": 0.7707,
2409
- "step": 392
2410
- },
2411
- {
2412
- "epoch": 0.28,
2413
- "learning_rate": 1.682921333076158e-05,
2414
- "loss": 0.7995,
2415
- "step": 393
2416
- },
2417
- {
2418
- "epoch": 0.28,
2419
- "learning_rate": 1.682826903713362e-05,
2420
- "loss": 0.7665,
2421
- "step": 394
2422
- },
2423
- {
2424
- "epoch": 0.28,
2425
- "learning_rate": 1.682732216679826e-05,
2426
- "loss": 0.8015,
2427
- "step": 395
2428
- },
2429
- {
2430
- "epoch": 0.28,
2431
- "learning_rate": 1.6826372720048464e-05,
2432
- "loss": 0.8058,
2433
- "step": 396
2434
- },
2435
- {
2436
- "epoch": 0.28,
2437
- "learning_rate": 1.6825420697177973e-05,
2438
- "loss": 0.7726,
2439
- "step": 397
2440
- },
2441
- {
2442
- "epoch": 0.28,
2443
- "learning_rate": 1.682446609848134e-05,
2444
- "loss": 0.7905,
2445
- "step": 398
2446
- },
2447
- {
2448
- "epoch": 0.28,
2449
- "learning_rate": 1.6823508924253916e-05,
2450
- "loss": 0.8288,
2451
- "step": 399
2452
- },
2453
- {
2454
- "epoch": 0.28,
2455
- "learning_rate": 1.6822549174791836e-05,
2456
- "loss": 0.7661,
2457
- "step": 400
2458
- },
2459
- {
2460
- "epoch": 0.28,
2461
- "learning_rate": 1.6821586850392043e-05,
2462
- "loss": 0.8125,
2463
- "step": 401
2464
- },
2465
- {
2466
- "epoch": 0.28,
2467
- "learning_rate": 1.6820621951352272e-05,
2468
- "loss": 0.7937,
2469
- "step": 402
2470
- },
2471
- {
2472
- "epoch": 0.28,
2473
- "learning_rate": 1.681965447797106e-05,
2474
- "loss": 0.7842,
2475
- "step": 403
2476
- },
2477
- {
2478
- "epoch": 0.28,
2479
- "learning_rate": 1.6818684430547733e-05,
2480
- "loss": 0.7951,
2481
- "step": 404
2482
- },
2483
- {
2484
- "epoch": 0.29,
2485
- "learning_rate": 1.681771180938242e-05,
2486
- "loss": 0.8059,
2487
- "step": 405
2488
- },
2489
- {
2490
- "epoch": 0.29,
2491
- "learning_rate": 1.681673661477604e-05,
2492
- "loss": 0.7858,
2493
- "step": 406
2494
- },
2495
- {
2496
- "epoch": 0.29,
2497
- "learning_rate": 1.681575884703031e-05,
2498
- "loss": 0.7819,
2499
- "step": 407
2500
- },
2501
- {
2502
- "epoch": 0.29,
2503
- "learning_rate": 1.681477850644775e-05,
2504
- "loss": 0.7627,
2505
- "step": 408
2506
- },
2507
- {
2508
- "epoch": 0.29,
2509
- "learning_rate": 1.681379559333166e-05,
2510
- "loss": 0.7706,
2511
- "step": 409
2512
- },
2513
- {
2514
- "epoch": 0.29,
2515
- "learning_rate": 1.6812810107986157e-05,
2516
- "loss": 0.7542,
2517
- "step": 410
2518
- },
2519
- {
2520
- "epoch": 0.29,
2521
- "learning_rate": 1.681182205071614e-05,
2522
- "loss": 0.7907,
2523
- "step": 411
2524
- },
2525
- {
2526
- "epoch": 0.29,
2527
- "learning_rate": 1.6810831421827303e-05,
2528
- "loss": 0.7613,
2529
- "step": 412
2530
- },
2531
- {
2532
- "epoch": 0.29,
2533
- "learning_rate": 1.6809838221626147e-05,
2534
- "loss": 0.782,
2535
- "step": 413
2536
- },
2537
- {
2538
- "epoch": 0.29,
2539
- "learning_rate": 1.6808842450419957e-05,
2540
- "loss": 0.7546,
2541
- "step": 414
2542
- },
2543
- {
2544
- "epoch": 0.29,
2545
- "learning_rate": 1.6807844108516815e-05,
2546
- "loss": 0.7801,
2547
- "step": 415
2548
- },
2549
- {
2550
- "epoch": 0.29,
2551
- "learning_rate": 1.6806843196225605e-05,
2552
- "loss": 0.7721,
2553
- "step": 416
2554
- },
2555
- {
2556
- "epoch": 0.29,
2557
- "learning_rate": 1.6805839713855998e-05,
2558
- "loss": 0.8184,
2559
- "step": 417
2560
- },
2561
- {
2562
- "epoch": 0.29,
2563
- "learning_rate": 1.680483366171847e-05,
2564
- "loss": 0.7918,
2565
- "step": 418
2566
- },
2567
- {
2568
- "epoch": 0.3,
2569
- "learning_rate": 1.6803825040124284e-05,
2570
- "loss": 0.7765,
2571
- "step": 419
2572
- },
2573
- {
2574
- "epoch": 0.3,
2575
- "learning_rate": 1.6802813849385497e-05,
2576
- "loss": 0.7909,
2577
- "step": 420
2578
- },
2579
- {
2580
- "epoch": 0.3,
2581
- "learning_rate": 1.6801800089814968e-05,
2582
- "loss": 0.7874,
2583
- "step": 421
2584
- },
2585
- {
2586
- "epoch": 0.3,
2587
- "learning_rate": 1.6800783761726345e-05,
2588
- "loss": 0.8151,
2589
- "step": 422
2590
- },
2591
- {
2592
- "epoch": 0.3,
2593
- "learning_rate": 1.6799764865434077e-05,
2594
- "loss": 0.8054,
2595
- "step": 423
2596
- },
2597
- {
2598
- "epoch": 0.3,
2599
- "learning_rate": 1.6798743401253395e-05,
2600
- "loss": 0.8204,
2601
- "step": 424
2602
- },
2603
- {
2604
- "epoch": 0.3,
2605
- "learning_rate": 1.679771936950034e-05,
2606
- "loss": 0.7757,
2607
- "step": 425
2608
- },
2609
- {
2610
- "epoch": 0.3,
2611
- "learning_rate": 1.679669277049174e-05,
2612
- "loss": 0.7886,
2613
- "step": 426
2614
- },
2615
- {
2616
- "epoch": 0.3,
2617
- "learning_rate": 1.6795663604545214e-05,
2618
- "loss": 0.8062,
2619
- "step": 427
2620
- },
2621
- {
2622
- "epoch": 0.3,
2623
- "learning_rate": 1.6794631871979177e-05,
2624
- "loss": 0.7943,
2625
- "step": 428
2626
- },
2627
- {
2628
- "epoch": 0.3,
2629
- "learning_rate": 1.6793597573112846e-05,
2630
- "loss": 0.8008,
2631
- "step": 429
2632
- },
2633
- {
2634
- "epoch": 0.3,
2635
- "learning_rate": 1.679256070826622e-05,
2636
- "loss": 0.7885,
2637
- "step": 430
2638
- },
2639
- {
2640
- "epoch": 0.3,
2641
- "learning_rate": 1.6791521277760102e-05,
2642
- "loss": 0.7788,
2643
- "step": 431
2644
- },
2645
- {
2646
- "epoch": 0.3,
2647
- "learning_rate": 1.6790479281916082e-05,
2648
- "loss": 0.7901,
2649
- "step": 432
2650
- },
2651
- {
2652
- "epoch": 0.3,
2653
- "learning_rate": 1.6789434721056544e-05,
2654
- "loss": 0.8038,
2655
- "step": 433
2656
- },
2657
- {
2658
- "epoch": 0.31,
2659
- "learning_rate": 1.6788387595504675e-05,
2660
- "loss": 0.8045,
2661
- "step": 434
2662
- },
2663
- {
2664
- "epoch": 0.31,
2665
- "learning_rate": 1.6787337905584444e-05,
2666
- "loss": 0.8027,
2667
- "step": 435
2668
- },
2669
- {
2670
- "epoch": 0.31,
2671
- "learning_rate": 1.678628565162061e-05,
2672
- "loss": 0.8245,
2673
- "step": 436
2674
- },
2675
- {
2676
- "epoch": 0.31,
2677
- "learning_rate": 1.6785230833938748e-05,
2678
- "loss": 0.7691,
2679
- "step": 437
2680
- },
2681
- {
2682
- "epoch": 0.31,
2683
- "learning_rate": 1.67841734528652e-05,
2684
- "loss": 0.7861,
2685
- "step": 438
2686
- },
2687
- {
2688
- "epoch": 0.31,
2689
- "learning_rate": 1.6783113508727115e-05,
2690
- "loss": 0.834,
2691
- "step": 439
2692
- },
2693
- {
2694
- "epoch": 0.31,
2695
- "learning_rate": 1.6782051001852433e-05,
2696
- "loss": 0.8019,
2697
- "step": 440
2698
- },
2699
- {
2700
- "epoch": 0.31,
2701
- "learning_rate": 1.6780985932569888e-05,
2702
- "loss": 0.7899,
2703
- "step": 441
2704
- },
2705
- {
2706
- "epoch": 0.31,
2707
- "learning_rate": 1.6779918301208996e-05,
2708
- "loss": 0.7886,
2709
- "step": 442
2710
- },
2711
- {
2712
- "epoch": 0.31,
2713
- "learning_rate": 1.6778848108100087e-05,
2714
- "loss": 0.7996,
2715
- "step": 443
2716
- },
2717
- {
2718
- "epoch": 0.31,
2719
- "learning_rate": 1.677777535357427e-05,
2720
- "loss": 0.7796,
2721
- "step": 444
2722
- },
2723
- {
2724
- "epoch": 0.31,
2725
- "learning_rate": 1.6776700037963432e-05,
2726
- "loss": 0.7893,
2727
- "step": 445
2728
- },
2729
- {
2730
- "epoch": 0.31,
2731
- "learning_rate": 1.6775622161600286e-05,
2732
- "loss": 0.7857,
2733
- "step": 446
2734
- },
2735
- {
2736
- "epoch": 0.31,
2737
- "learning_rate": 1.677454172481831e-05,
2738
- "loss": 0.8031,
2739
- "step": 447
2740
- },
2741
- {
2742
- "epoch": 0.32,
2743
- "learning_rate": 1.6773458727951787e-05,
2744
- "loss": 0.7693,
2745
- "step": 448
2746
- },
2747
- {
2748
- "epoch": 0.32,
2749
- "eval_loss": 0.7626763582229614,
2750
- "eval_runtime": 30.8155,
2751
- "eval_samples_per_second": 265.808,
2752
- "eval_steps_per_second": 16.615,
2753
- "step": 448
2754
- },
2755
- {
2756
- "epoch": 0.32,
2757
- "learning_rate": 1.6772373171335787e-05,
2758
- "loss": 0.789,
2759
- "step": 449
2760
- },
2761
- {
2762
- "epoch": 0.32,
2763
- "learning_rate": 1.6771285055306175e-05,
2764
- "loss": 0.8024,
2765
- "step": 450
2766
- },
2767
- {
2768
- "epoch": 0.32,
2769
- "learning_rate": 1.6770194380199603e-05,
2770
- "loss": 0.7926,
2771
- "step": 451
2772
- },
2773
- {
2774
- "epoch": 0.32,
2775
- "learning_rate": 1.6769101146353522e-05,
2776
- "loss": 0.7943,
2777
- "step": 452
2778
- },
2779
- {
2780
- "epoch": 0.32,
2781
- "learning_rate": 1.676800535410617e-05,
2782
- "loss": 0.7935,
2783
- "step": 453
2784
- },
2785
- {
2786
- "epoch": 0.32,
2787
- "learning_rate": 1.6766907003796574e-05,
2788
- "loss": 0.7821,
2789
- "step": 454
2790
- },
2791
- {
2792
- "epoch": 0.32,
2793
- "learning_rate": 1.6765806095764557e-05,
2794
- "loss": 0.7625,
2795
- "step": 455
2796
- },
2797
- {
2798
- "epoch": 0.32,
2799
- "learning_rate": 1.676470263035074e-05,
2800
- "loss": 0.7916,
2801
- "step": 456
2802
- },
2803
- {
2804
- "epoch": 0.32,
2805
- "learning_rate": 1.6763596607896517e-05,
2806
- "loss": 0.7806,
2807
- "step": 457
2808
- },
2809
- {
2810
- "epoch": 0.32,
2811
- "learning_rate": 1.6762488028744085e-05,
2812
- "loss": 0.7946,
2813
- "step": 458
2814
- },
2815
- {
2816
- "epoch": 0.32,
2817
- "learning_rate": 1.6761376893236436e-05,
2818
- "loss": 0.7895,
2819
- "step": 459
2820
- },
2821
- {
2822
- "epoch": 0.32,
2823
- "learning_rate": 1.6760263201717344e-05,
2824
- "loss": 0.7761,
2825
- "step": 460
2826
- },
2827
- {
2828
- "epoch": 0.32,
2829
- "learning_rate": 1.675914695453138e-05,
2830
- "loss": 0.7972,
2831
- "step": 461
2832
- },
2833
- {
2834
- "epoch": 0.33,
2835
- "learning_rate": 1.6758028152023896e-05,
2836
- "loss": 0.8048,
2837
- "step": 462
2838
- },
2839
- {
2840
- "epoch": 0.33,
2841
- "learning_rate": 1.6756906794541047e-05,
2842
- "loss": 0.7963,
2843
- "step": 463
2844
- },
2845
- {
2846
- "epoch": 0.33,
2847
- "learning_rate": 1.6755782882429774e-05,
2848
- "loss": 0.8188,
2849
- "step": 464
2850
- },
2851
- {
2852
- "epoch": 0.33,
2853
- "learning_rate": 1.6754656416037803e-05,
2854
- "loss": 0.772,
2855
- "step": 465
2856
- },
2857
- {
2858
- "epoch": 0.33,
2859
- "learning_rate": 1.6753527395713663e-05,
2860
- "loss": 0.8051,
2861
- "step": 466
2862
- },
2863
- {
2864
- "epoch": 0.33,
2865
- "learning_rate": 1.6752395821806655e-05,
2866
- "loss": 0.796,
2867
- "step": 467
2868
- },
2869
- {
2870
- "epoch": 0.33,
2871
- "learning_rate": 1.6751261694666885e-05,
2872
- "loss": 0.7662,
2873
- "step": 468
2874
- },
2875
- {
2876
- "epoch": 0.33,
2877
- "learning_rate": 1.6750125014645243e-05,
2878
- "loss": 0.7821,
2879
- "step": 469
2880
- },
2881
- {
2882
- "epoch": 0.33,
2883
- "learning_rate": 1.674898578209341e-05,
2884
- "loss": 0.7934,
2885
- "step": 470
2886
- },
2887
- {
2888
- "epoch": 0.33,
2889
- "learning_rate": 1.6747843997363857e-05,
2890
- "loss": 0.7688,
2891
- "step": 471
2892
- },
2893
- {
2894
- "epoch": 0.33,
2895
- "learning_rate": 1.674669966080984e-05,
2896
- "loss": 0.7957,
2897
- "step": 472
2898
- },
2899
- {
2900
- "epoch": 0.33,
2901
- "learning_rate": 1.6745552772785417e-05,
2902
- "loss": 0.7565,
2903
- "step": 473
2904
- },
2905
- {
2906
- "epoch": 0.33,
2907
- "learning_rate": 1.674440333364542e-05,
2908
- "loss": 0.7563,
2909
- "step": 474
2910
- },
2911
- {
2912
- "epoch": 0.33,
2913
- "learning_rate": 1.6743251343745478e-05,
2914
- "loss": 0.7857,
2915
- "step": 475
2916
- },
2917
- {
2918
- "epoch": 0.34,
2919
- "learning_rate": 1.6742096803442012e-05,
2920
- "loss": 0.8149,
2921
- "step": 476
2922
- },
2923
- {
2924
- "epoch": 0.34,
2925
- "learning_rate": 1.6740939713092225e-05,
2926
- "loss": 0.776,
2927
- "step": 477
2928
- },
2929
- {
2930
- "epoch": 0.34,
2931
- "learning_rate": 1.6739780073054112e-05,
2932
- "loss": 0.7595,
2933
- "step": 478
2934
- },
2935
- {
2936
- "epoch": 0.34,
2937
- "learning_rate": 1.6738617883686464e-05,
2938
- "loss": 0.7764,
2939
- "step": 479
2940
- },
2941
- {
2942
- "epoch": 0.34,
2943
- "learning_rate": 1.673745314534885e-05,
2944
- "loss": 0.7709,
2945
- "step": 480
2946
- },
2947
- {
2948
- "epoch": 0.34,
2949
- "learning_rate": 1.6736285858401627e-05,
2950
- "loss": 0.7807,
2951
- "step": 481
2952
- },
2953
- {
2954
- "epoch": 0.34,
2955
- "learning_rate": 1.673511602320595e-05,
2956
- "loss": 0.7923,
2957
- "step": 482
2958
- },
2959
- {
2960
- "epoch": 0.34,
2961
- "learning_rate": 1.673394364012376e-05,
2962
- "loss": 0.7862,
2963
- "step": 483
2964
- },
2965
- {
2966
- "epoch": 0.34,
2967
- "learning_rate": 1.6732768709517784e-05,
2968
- "loss": 0.7902,
2969
- "step": 484
2970
- },
2971
- {
2972
- "epoch": 0.34,
2973
- "learning_rate": 1.673159123175153e-05,
2974
- "loss": 0.8103,
2975
- "step": 485
2976
- },
2977
- {
2978
- "epoch": 0.34,
2979
- "learning_rate": 1.6730411207189306e-05,
2980
- "loss": 0.8304,
2981
- "step": 486
2982
- },
2983
- {
2984
- "epoch": 0.34,
2985
- "learning_rate": 1.6729228636196203e-05,
2986
- "loss": 0.7832,
2987
- "step": 487
2988
- },
2989
- {
2990
- "epoch": 0.34,
2991
- "learning_rate": 1.6728043519138102e-05,
2992
- "loss": 0.7679,
2993
- "step": 488
2994
- },
2995
- {
2996
- "epoch": 0.34,
2997
- "learning_rate": 1.6726855856381668e-05,
2998
- "loss": 0.7873,
2999
- "step": 489
3000
- },
3001
- {
3002
- "epoch": 0.35,
3003
- "learning_rate": 1.6725665648294355e-05,
3004
- "loss": 0.7932,
3005
- "step": 490
3006
- },
3007
- {
3008
- "epoch": 0.35,
3009
- "learning_rate": 1.6724472895244405e-05,
3010
- "loss": 0.7543,
3011
- "step": 491
3012
- },
3013
- {
3014
- "epoch": 0.35,
3015
- "learning_rate": 1.6723277597600847e-05,
3016
- "loss": 0.7762,
3017
- "step": 492
3018
- },
3019
- {
3020
- "epoch": 0.35,
3021
- "learning_rate": 1.67220797557335e-05,
3022
- "loss": 0.805,
3023
- "step": 493
3024
- },
3025
- {
3026
- "epoch": 0.35,
3027
- "learning_rate": 1.6720879370012967e-05,
3028
- "loss": 0.7891,
3029
- "step": 494
3030
- },
3031
- {
3032
- "epoch": 0.35,
3033
- "learning_rate": 1.6719676440810636e-05,
3034
- "loss": 0.806,
3035
- "step": 495
3036
- },
3037
- {
3038
- "epoch": 0.35,
3039
- "learning_rate": 1.6718470968498685e-05,
3040
- "loss": 0.7886,
3041
- "step": 496
3042
- },
3043
- {
3044
- "epoch": 0.35,
3045
- "learning_rate": 1.6717262953450083e-05,
3046
- "loss": 0.8091,
3047
- "step": 497
3048
- },
3049
- {
3050
- "epoch": 0.35,
3051
- "learning_rate": 1.671605239603858e-05,
3052
- "loss": 0.8032,
3053
- "step": 498
3054
- },
3055
- {
3056
- "epoch": 0.35,
3057
- "learning_rate": 1.671483929663871e-05,
3058
- "loss": 0.8083,
3059
- "step": 499
3060
- },
3061
- {
3062
- "epoch": 0.35,
3063
- "learning_rate": 1.67136236556258e-05,
3064
- "loss": 0.7518,
3065
- "step": 500
3066
- },
3067
- {
3068
- "epoch": 0.35,
3069
- "learning_rate": 1.671240547337596e-05,
3070
- "loss": 0.7648,
3071
- "step": 501
3072
- },
3073
- {
3074
- "epoch": 0.35,
3075
- "learning_rate": 1.671118475026609e-05,
3076
- "loss": 0.7885,
3077
- "step": 502
3078
- },
3079
- {
3080
- "epoch": 0.35,
3081
- "learning_rate": 1.670996148667387e-05,
3082
- "loss": 0.7936,
3083
- "step": 503
3084
- },
3085
- {
3086
- "epoch": 0.35,
3087
- "learning_rate": 1.6708735682977767e-05,
3088
- "loss": 0.808,
3089
- "step": 504
3090
- },
3091
- {
3092
- "epoch": 0.36,
3093
- "learning_rate": 1.6707507339557045e-05,
3094
- "loss": 0.7597,
3095
- "step": 505
3096
- },
3097
- {
3098
- "epoch": 0.36,
3099
- "learning_rate": 1.670627645679173e-05,
3100
- "loss": 0.7953,
3101
- "step": 506
3102
- },
3103
- {
3104
- "epoch": 0.36,
3105
- "learning_rate": 1.6705043035062662e-05,
3106
- "loss": 0.7972,
3107
- "step": 507
3108
- },
3109
- {
3110
- "epoch": 0.36,
3111
- "learning_rate": 1.6703807074751447e-05,
3112
- "loss": 0.8289,
3113
- "step": 508
3114
- },
3115
- {
3116
- "epoch": 0.36,
3117
- "learning_rate": 1.6702568576240482e-05,
3118
- "loss": 0.8059,
3119
- "step": 509
3120
- },
3121
- {
3122
- "epoch": 0.36,
3123
- "learning_rate": 1.6701327539912952e-05,
3124
- "loss": 0.7948,
3125
- "step": 510
3126
- },
3127
- {
3128
- "epoch": 0.36,
3129
- "learning_rate": 1.670008396615282e-05,
3130
- "loss": 0.7763,
3131
- "step": 511
3132
- },
3133
- {
3134
- "epoch": 0.36,
3135
- "learning_rate": 1.6698837855344846e-05,
3136
- "loss": 0.7933,
3137
- "step": 512
3138
- },
3139
- {
3140
- "epoch": 0.36,
3141
- "eval_loss": 0.7596627473831177,
3142
- "eval_runtime": 30.8283,
3143
- "eval_samples_per_second": 265.697,
3144
- "eval_steps_per_second": 16.608,
3145
- "step": 512
3146
- },
3147
- {
3148
- "epoch": 0.36,
3149
- "learning_rate": 1.6697589207874564e-05,
3150
- "loss": 0.7925,
3151
- "step": 513
3152
- },
3153
- {
3154
- "epoch": 0.36,
3155
- "learning_rate": 1.6696338024128295e-05,
3156
- "loss": 0.7741,
3157
- "step": 514
3158
- },
3159
- {
3160
- "epoch": 0.36,
3161
- "learning_rate": 1.669508430449315e-05,
3162
- "loss": 0.7993,
3163
- "step": 515
3164
- },
3165
- {
3166
- "epoch": 0.36,
3167
- "learning_rate": 1.6693828049357022e-05,
3168
- "loss": 0.8138,
3169
- "step": 516
3170
- },
3171
- {
3172
- "epoch": 0.36,
3173
- "learning_rate": 1.669256925910858e-05,
3174
- "loss": 0.7782,
3175
- "step": 517
3176
- },
3177
- {
3178
- "epoch": 0.36,
3179
- "learning_rate": 1.6691307934137292e-05,
3180
- "loss": 0.7814,
3181
- "step": 518
3182
- },
3183
- {
3184
- "epoch": 0.37,
3185
- "learning_rate": 1.66900440748334e-05,
3186
- "loss": 0.7569,
3187
- "step": 519
3188
- },
3189
- {
3190
- "epoch": 0.37,
3191
- "learning_rate": 1.6688777681587937e-05,
3192
- "loss": 0.7742,
3193
- "step": 520
3194
- },
3195
- {
3196
- "epoch": 0.37,
3197
- "learning_rate": 1.668750875479271e-05,
3198
- "loss": 0.7797,
3199
- "step": 521
3200
- },
3201
- {
3202
- "epoch": 0.37,
3203
- "learning_rate": 1.6686237294840315e-05,
3204
- "loss": 0.7802,
3205
- "step": 522
3206
- },
3207
- {
3208
- "epoch": 0.37,
3209
- "learning_rate": 1.6684963302124142e-05,
3210
- "loss": 0.7829,
3211
- "step": 523
3212
- },
3213
- {
3214
- "epoch": 0.37,
3215
- "learning_rate": 1.668368677703835e-05,
3216
- "loss": 0.8244,
3217
- "step": 524
3218
- },
3219
- {
3220
- "epoch": 0.37,
3221
- "learning_rate": 1.668240771997788e-05,
3222
- "loss": 0.7899,
3223
- "step": 525
3224
- },
3225
- {
3226
- "epoch": 0.37,
3227
- "learning_rate": 1.6681126131338476e-05,
3228
- "loss": 0.7653,
3229
- "step": 526
3230
- },
3231
- {
3232
- "epoch": 0.37,
3233
- "learning_rate": 1.6679842011516643e-05,
3234
- "loss": 0.7826,
3235
- "step": 527
3236
- },
3237
- {
3238
- "epoch": 0.37,
3239
- "learning_rate": 1.6678555360909683e-05,
3240
- "loss": 0.8299,
3241
- "step": 528
3242
- },
3243
- {
3244
- "epoch": 0.37,
3245
- "learning_rate": 1.6677266179915677e-05,
3246
- "loss": 0.7942,
3247
- "step": 529
3248
- },
3249
- {
3250
- "epoch": 0.37,
3251
- "learning_rate": 1.6675974468933486e-05,
3252
- "loss": 0.7867,
3253
- "step": 530
3254
- },
3255
- {
3256
- "epoch": 0.37,
3257
- "learning_rate": 1.6674680228362756e-05,
3258
- "loss": 0.7607,
3259
- "step": 531
3260
- },
3261
- {
3262
- "epoch": 0.37,
3263
- "learning_rate": 1.6673383458603918e-05,
3264
- "loss": 0.7654,
3265
- "step": 532
3266
- },
3267
- {
3268
- "epoch": 0.38,
3269
- "learning_rate": 1.667208416005818e-05,
3270
- "loss": 0.8049,
3271
- "step": 533
3272
- },
3273
- {
3274
- "epoch": 0.38,
3275
- "learning_rate": 1.667078233312754e-05,
3276
- "loss": 0.7771,
3277
- "step": 534
3278
- },
3279
- {
3280
- "epoch": 0.38,
3281
- "learning_rate": 1.666947797821477e-05,
3282
- "loss": 0.7967,
3283
- "step": 535
3284
- },
3285
- {
3286
- "epoch": 0.38,
3287
- "learning_rate": 1.6668171095723432e-05,
3288
- "loss": 0.7697,
3289
- "step": 536
3290
- },
3291
- {
3292
- "epoch": 0.38,
3293
- "learning_rate": 1.6666861686057864e-05,
3294
- "loss": 0.7902,
3295
- "step": 537
3296
- },
3297
- {
3298
- "epoch": 0.38,
3299
- "learning_rate": 1.6665549749623188e-05,
3300
- "loss": 0.7899,
3301
- "step": 538
3302
- },
3303
- {
3304
- "epoch": 0.38,
3305
- "learning_rate": 1.6664235286825306e-05,
3306
- "loss": 0.7745,
3307
- "step": 539
3308
- },
3309
- {
3310
- "epoch": 0.38,
3311
- "learning_rate": 1.6662918298070913e-05,
3312
- "loss": 0.7855,
3313
- "step": 540
3314
- },
3315
- {
3316
- "epoch": 0.38,
3317
- "learning_rate": 1.666159878376746e-05,
3318
- "loss": 0.7599,
3319
- "step": 541
3320
- },
3321
- {
3322
- "epoch": 0.38,
3323
- "learning_rate": 1.666027674432321e-05,
3324
- "loss": 0.7473,
3325
- "step": 542
3326
- },
3327
- {
3328
- "epoch": 0.38,
3329
- "learning_rate": 1.6658952180147184e-05,
3330
- "loss": 0.8001,
3331
- "step": 543
3332
- },
3333
- {
3334
- "epoch": 0.38,
3335
- "learning_rate": 1.6657625091649198e-05,
3336
- "loss": 0.7813,
3337
- "step": 544
3338
- },
3339
- {
3340
- "epoch": 0.38,
3341
- "learning_rate": 1.665629547923984e-05,
3342
- "loss": 0.7844,
3343
- "step": 545
3344
- },
3345
- {
3346
- "epoch": 0.38,
3347
- "learning_rate": 1.6654963343330485e-05,
3348
- "loss": 0.7768,
3349
- "step": 546
3350
- },
3351
- {
3352
- "epoch": 0.39,
3353
- "learning_rate": 1.665362868433328e-05,
3354
- "loss": 0.7846,
3355
- "step": 547
3356
- },
3357
- {
3358
- "epoch": 0.39,
3359
- "learning_rate": 1.6652291502661167e-05,
3360
- "loss": 0.7653,
3361
- "step": 548
3362
- },
3363
- {
3364
- "epoch": 0.39,
3365
- "learning_rate": 1.665095179872786e-05,
3366
- "loss": 0.7992,
3367
- "step": 549
3368
- },
3369
- {
3370
- "epoch": 0.39,
3371
- "learning_rate": 1.6649609572947853e-05,
3372
- "loss": 0.8163,
3373
- "step": 550
3374
- },
3375
- {
3376
- "epoch": 0.39,
3377
- "learning_rate": 1.6648264825736417e-05,
3378
- "loss": 0.7728,
3379
- "step": 551
3380
- },
3381
- {
3382
- "epoch": 0.39,
3383
- "learning_rate": 1.664691755750961e-05,
3384
- "loss": 0.7804,
3385
- "step": 552
3386
- },
3387
- {
3388
- "epoch": 0.39,
3389
- "learning_rate": 1.6645567768684268e-05,
3390
- "loss": 0.7294,
3391
- "step": 553
3392
- },
3393
- {
3394
- "epoch": 0.39,
3395
- "learning_rate": 1.6644215459678008e-05,
3396
- "loss": 0.768,
3397
- "step": 554
3398
- },
3399
- {
3400
- "epoch": 0.39,
3401
- "learning_rate": 1.6642860630909218e-05,
3402
- "loss": 0.8061,
3403
- "step": 555
3404
- },
3405
- {
3406
- "epoch": 0.39,
3407
- "learning_rate": 1.664150328279708e-05,
3408
- "loss": 0.7684,
3409
- "step": 556
3410
- },
3411
- {
3412
- "epoch": 0.39,
3413
- "learning_rate": 1.6640143415761543e-05,
3414
- "loss": 0.7731,
3415
- "step": 557
3416
- },
3417
- {
3418
- "epoch": 0.39,
3419
- "learning_rate": 1.6638781030223344e-05,
3420
- "loss": 0.7596,
3421
- "step": 558
3422
- },
3423
- {
3424
- "epoch": 0.39,
3425
- "learning_rate": 1.663741612660399e-05,
3426
- "loss": 0.7558,
3427
- "step": 559
3428
- },
3429
- {
3430
- "epoch": 0.39,
3431
- "learning_rate": 1.6636048705325778e-05,
3432
- "loss": 0.7913,
3433
- "step": 560
3434
- },
3435
- {
3436
- "epoch": 0.4,
3437
- "learning_rate": 1.6634678766811776e-05,
3438
- "loss": 0.8199,
3439
- "step": 561
3440
- },
3441
- {
3442
- "epoch": 0.4,
3443
- "learning_rate": 1.6633306311485835e-05,
3444
- "loss": 0.8068,
3445
- "step": 562
3446
- },
3447
- {
3448
- "epoch": 0.4,
3449
- "learning_rate": 1.6631931339772576e-05,
3450
- "loss": 0.8131,
3451
- "step": 563
3452
- },
3453
- {
3454
- "epoch": 0.4,
3455
- "learning_rate": 1.6630553852097416e-05,
3456
- "loss": 0.7612,
3457
- "step": 564
3458
- },
3459
- {
3460
- "epoch": 0.4,
3461
- "learning_rate": 1.6629173848886533e-05,
3462
- "loss": 0.763,
3463
- "step": 565
3464
- },
3465
- {
3466
- "epoch": 0.4,
3467
- "learning_rate": 1.6627791330566892e-05,
3468
- "loss": 0.7748,
3469
- "step": 566
3470
- },
3471
- {
3472
- "epoch": 0.4,
3473
- "learning_rate": 1.6626406297566236e-05,
3474
- "loss": 0.7856,
3475
- "step": 567
3476
- },
3477
- {
3478
- "epoch": 0.4,
3479
- "learning_rate": 1.662501875031308e-05,
3480
- "loss": 0.8002,
3481
- "step": 568
3482
- },
3483
- {
3484
- "epoch": 0.4,
3485
- "learning_rate": 1.6623628689236728e-05,
3486
- "loss": 0.7847,
3487
- "step": 569
3488
- },
3489
- {
3490
- "epoch": 0.4,
3491
- "learning_rate": 1.662223611476725e-05,
3492
- "loss": 0.7852,
3493
- "step": 570
3494
- },
3495
- {
3496
- "epoch": 0.4,
3497
- "learning_rate": 1.6620841027335495e-05,
3498
- "loss": 0.7828,
3499
- "step": 571
3500
- },
3501
- {
3502
- "epoch": 0.4,
3503
- "learning_rate": 1.6619443427373103e-05,
3504
- "loss": 0.7516,
3505
- "step": 572
3506
- },
3507
- {
3508
- "epoch": 0.4,
3509
- "learning_rate": 1.6618043315312473e-05,
3510
- "loss": 0.7916,
3511
- "step": 573
3512
- },
3513
- {
3514
- "epoch": 0.4,
3515
- "learning_rate": 1.6616640691586798e-05,
3516
- "loss": 0.7788,
3517
- "step": 574
3518
- },
3519
- {
3520
- "epoch": 0.4,
3521
- "learning_rate": 1.661523555663003e-05,
3522
- "loss": 0.7618,
3523
- "step": 575
3524
- },
3525
- {
3526
- "epoch": 0.41,
3527
- "learning_rate": 1.6613827910876918e-05,
3528
- "loss": 0.7454,
3529
- "step": 576
3530
- },
3531
- {
3532
- "epoch": 0.41,
3533
- "eval_loss": 0.7561846375465393,
3534
- "eval_runtime": 30.83,
3535
- "eval_samples_per_second": 265.683,
3536
- "eval_steps_per_second": 16.607,
3537
- "step": 576
3538
- },
3539
- {
3540
- "epoch": 0.41,
3541
- "learning_rate": 1.661241775476297e-05,
3542
- "loss": 0.8057,
3543
- "step": 577
3544
- },
3545
- {
3546
- "epoch": 0.41,
3547
- "learning_rate": 1.661100508872448e-05,
3548
- "loss": 0.791,
3549
- "step": 578
3550
- },
3551
- {
3552
- "epoch": 0.41,
3553
- "learning_rate": 1.6609589913198517e-05,
3554
- "loss": 0.7781,
3555
- "step": 579
3556
- },
3557
- {
3558
- "epoch": 0.41,
3559
- "learning_rate": 1.6608172228622932e-05,
3560
- "loss": 0.7609,
3561
- "step": 580
3562
- },
3563
- {
3564
- "epoch": 0.41,
3565
- "learning_rate": 1.6606752035436335e-05,
3566
- "loss": 0.7903,
3567
- "step": 581
3568
- },
3569
- {
3570
- "epoch": 0.41,
3571
- "learning_rate": 1.6605329334078134e-05,
3572
- "loss": 0.7905,
3573
- "step": 582
3574
- },
3575
- {
3576
- "epoch": 0.41,
3577
- "learning_rate": 1.6603904124988496e-05,
3578
- "loss": 0.7614,
3579
- "step": 583
3580
- },
3581
- {
3582
- "epoch": 0.41,
3583
- "learning_rate": 1.660247640860838e-05,
3584
- "loss": 0.7791,
3585
- "step": 584
3586
- },
3587
- {
3588
- "epoch": 0.41,
3589
- "learning_rate": 1.6601046185379498e-05,
3590
- "loss": 0.7685,
3591
- "step": 585
3592
- },
3593
- {
3594
- "epoch": 0.41,
3595
- "learning_rate": 1.6599613455744358e-05,
3596
- "loss": 0.764,
3597
- "step": 586
3598
- },
3599
- {
3600
- "epoch": 0.41,
3601
- "learning_rate": 1.6598178220146238e-05,
3602
- "loss": 0.7461,
3603
- "step": 587
3604
- },
3605
- {
3606
- "epoch": 0.41,
3607
- "learning_rate": 1.659674047902919e-05,
3608
- "loss": 0.8095,
3609
- "step": 588
3610
- },
3611
- {
3612
- "epoch": 0.41,
3613
- "learning_rate": 1.6595300232838038e-05,
3614
- "loss": 0.77,
3615
- "step": 589
3616
- },
3617
- {
3618
- "epoch": 0.42,
3619
- "learning_rate": 1.6593857482018383e-05,
3620
- "loss": 0.7693,
3621
- "step": 590
3622
- },
3623
- {
3624
- "epoch": 0.42,
3625
- "learning_rate": 1.6592412227016604e-05,
3626
- "loss": 0.8061,
3627
- "step": 591
3628
- },
3629
- {
3630
- "epoch": 0.42,
3631
- "learning_rate": 1.659096446827985e-05,
3632
- "loss": 0.7873,
3633
- "step": 592
3634
- },
3635
- {
3636
- "epoch": 0.42,
3637
- "learning_rate": 1.6589514206256052e-05,
3638
- "loss": 0.8178,
3639
- "step": 593
3640
- },
3641
- {
3642
- "epoch": 0.42,
3643
- "learning_rate": 1.6588061441393908e-05,
3644
- "loss": 0.7476,
3645
- "step": 594
3646
- },
3647
- {
3648
- "epoch": 0.42,
3649
- "learning_rate": 1.6586606174142894e-05,
3650
- "loss": 0.8013,
3651
- "step": 595
3652
- },
3653
- {
3654
- "epoch": 0.42,
3655
- "learning_rate": 1.6585148404953258e-05,
3656
- "loss": 0.7909,
3657
- "step": 596
3658
- },
3659
- {
3660
- "epoch": 0.42,
3661
- "learning_rate": 1.6583688134276026e-05,
3662
- "loss": 0.7868,
3663
- "step": 597
3664
- },
3665
- {
3666
- "epoch": 0.42,
3667
- "learning_rate": 1.6582225362562994e-05,
3668
- "loss": 0.7774,
3669
- "step": 598
3670
- },
3671
- {
3672
- "epoch": 0.42,
3673
- "learning_rate": 1.658076009026673e-05,
3674
- "loss": 0.7855,
3675
- "step": 599
3676
- },
3677
- {
3678
- "epoch": 0.42,
3679
- "learning_rate": 1.6579292317840583e-05,
3680
- "loss": 0.7835,
3681
- "step": 600
3682
- },
3683
- {
3684
- "epoch": 0.42,
3685
- "learning_rate": 1.657782204573867e-05,
3686
- "loss": 0.766,
3687
- "step": 601
3688
- },
3689
- {
3690
- "epoch": 0.42,
3691
- "learning_rate": 1.6576349274415883e-05,
3692
- "loss": 0.784,
3693
- "step": 602
3694
- },
3695
- {
3696
- "epoch": 0.42,
3697
- "learning_rate": 1.657487400432789e-05,
3698
- "loss": 0.7921,
3699
- "step": 603
3700
- },
3701
- {
3702
- "epoch": 0.43,
3703
- "learning_rate": 1.657339623593112e-05,
3704
- "loss": 0.7818,
3705
- "step": 604
3706
- },
3707
- {
3708
- "epoch": 0.43,
3709
- "learning_rate": 1.6571915969682794e-05,
3710
- "loss": 0.7605,
3711
- "step": 605
3712
- },
3713
- {
3714
- "epoch": 0.43,
3715
- "learning_rate": 1.6570433206040893e-05,
3716
- "loss": 0.7997,
3717
- "step": 606
3718
- },
3719
- {
3720
- "epoch": 0.43,
3721
- "learning_rate": 1.6568947945464168e-05,
3722
- "loss": 0.8047,
3723
- "step": 607
3724
- },
3725
- {
3726
- "epoch": 0.43,
3727
- "learning_rate": 1.6567460188412156e-05,
3728
- "loss": 0.7786,
3729
- "step": 608
3730
- },
3731
- {
3732
- "epoch": 0.43,
3733
- "learning_rate": 1.6565969935345153e-05,
3734
- "loss": 0.8137,
3735
- "step": 609
3736
- },
3737
- {
3738
- "epoch": 0.43,
3739
- "learning_rate": 1.6564477186724236e-05,
3740
- "loss": 0.7634,
3741
- "step": 610
3742
- },
3743
- {
3744
- "epoch": 0.43,
3745
- "learning_rate": 1.6562981943011252e-05,
3746
- "loss": 0.7821,
3747
- "step": 611
3748
- },
3749
- {
3750
- "epoch": 0.43,
3751
- "learning_rate": 1.6561484204668815e-05,
3752
- "loss": 0.7642,
3753
- "step": 612
3754
- },
3755
- {
3756
- "epoch": 0.43,
3757
- "learning_rate": 1.6559983972160318e-05,
3758
- "loss": 0.7534,
3759
- "step": 613
3760
- },
3761
- {
3762
- "epoch": 0.43,
3763
- "learning_rate": 1.6558481245949917e-05,
3764
- "loss": 0.7722,
3765
- "step": 614
3766
- },
3767
- {
3768
- "epoch": 0.43,
3769
- "learning_rate": 1.6556976026502548e-05,
3770
- "loss": 0.8014,
3771
- "step": 615
3772
- },
3773
- {
3774
- "epoch": 0.43,
3775
- "learning_rate": 1.6555468314283918e-05,
3776
- "loss": 0.7874,
3777
- "step": 616
3778
- },
3779
- {
3780
- "epoch": 0.43,
3781
- "learning_rate": 1.6553958109760497e-05,
3782
- "loss": 0.797,
3783
- "step": 617
3784
- },
3785
- {
3786
- "epoch": 0.44,
3787
- "learning_rate": 1.6552445413399537e-05,
3788
- "loss": 0.7962,
3789
- "step": 618
3790
- },
3791
- {
3792
- "epoch": 0.44,
3793
- "learning_rate": 1.6550930225669052e-05,
3794
- "loss": 0.7651,
3795
- "step": 619
3796
- },
3797
- {
3798
- "epoch": 0.44,
3799
- "learning_rate": 1.6549412547037832e-05,
3800
- "loss": 0.7615,
3801
- "step": 620
3802
- },
3803
- {
3804
- "epoch": 0.44,
3805
- "learning_rate": 1.6547892377975435e-05,
3806
- "loss": 0.7682,
3807
- "step": 621
3808
- },
3809
- {
3810
- "epoch": 0.44,
3811
- "learning_rate": 1.654636971895219e-05,
3812
- "loss": 0.7818,
3813
- "step": 622
3814
- },
3815
- {
3816
- "epoch": 0.44,
3817
- "learning_rate": 1.6544844570439202e-05,
3818
- "loss": 0.7884,
3819
- "step": 623
3820
- },
3821
- {
3822
- "epoch": 0.44,
3823
- "learning_rate": 1.654331693290833e-05,
3824
- "loss": 0.7764,
3825
- "step": 624
3826
- },
3827
- {
3828
- "epoch": 0.44,
3829
- "learning_rate": 1.6541786806832228e-05,
3830
- "loss": 0.7752,
3831
- "step": 625
3832
- },
3833
- {
3834
- "epoch": 0.44,
3835
- "learning_rate": 1.6540254192684294e-05,
3836
- "loss": 0.8551,
3837
- "step": 626
3838
- },
3839
- {
3840
- "epoch": 0.44,
3841
- "learning_rate": 1.6538719090938718e-05,
3842
- "loss": 0.7922,
3843
- "step": 627
3844
- },
3845
- {
3846
- "epoch": 0.44,
3847
- "learning_rate": 1.653718150207044e-05,
3848
- "loss": 0.766,
3849
- "step": 628
3850
- },
3851
- {
3852
- "epoch": 0.44,
3853
- "learning_rate": 1.6535641426555188e-05,
3854
- "loss": 0.7865,
3855
- "step": 629
3856
- },
3857
- {
3858
- "epoch": 0.44,
3859
- "learning_rate": 1.6534098864869448e-05,
3860
- "loss": 0.7835,
3861
- "step": 630
3862
- },
3863
- {
3864
- "epoch": 0.44,
3865
- "learning_rate": 1.653255381749047e-05,
3866
- "loss": 0.8001,
3867
- "step": 631
3868
- },
3869
- {
3870
- "epoch": 0.45,
3871
- "learning_rate": 1.653100628489629e-05,
3872
- "loss": 0.7718,
3873
- "step": 632
3874
- },
3875
- {
3876
- "epoch": 0.45,
3877
- "learning_rate": 1.6529456267565702e-05,
3878
- "loss": 0.7914,
3879
- "step": 633
3880
- },
3881
- {
3882
- "epoch": 0.45,
3883
- "learning_rate": 1.6527903765978267e-05,
3884
- "loss": 0.7814,
3885
- "step": 634
3886
- },
3887
- {
3888
- "epoch": 0.45,
3889
- "learning_rate": 1.6526348780614318e-05,
3890
- "loss": 0.7779,
3891
- "step": 635
3892
- },
3893
- {
3894
- "epoch": 0.45,
3895
- "learning_rate": 1.652479131195496e-05,
3896
- "loss": 0.7669,
3897
- "step": 636
3898
- },
3899
- {
3900
- "epoch": 0.45,
3901
- "learning_rate": 1.6523231360482063e-05,
3902
- "loss": 0.783,
3903
- "step": 637
3904
- },
3905
- {
3906
- "epoch": 0.45,
3907
- "learning_rate": 1.652166892667826e-05,
3908
- "loss": 0.7499,
3909
- "step": 638
3910
- },
3911
- {
3912
- "epoch": 0.45,
3913
- "learning_rate": 1.6520104011026962e-05,
3914
- "loss": 0.7878,
3915
- "step": 639
3916
- },
3917
- {
3918
- "epoch": 0.45,
3919
- "learning_rate": 1.651853661401234e-05,
3920
- "loss": 0.8014,
3921
- "step": 640
3922
- },
3923
- {
3924
- "epoch": 0.45,
3925
- "eval_loss": 0.7539774179458618,
3926
- "eval_runtime": 30.8032,
3927
- "eval_samples_per_second": 265.914,
3928
- "eval_steps_per_second": 16.622,
3929
- "step": 640
3930
- },
3931
- {
3932
- "epoch": 0.45,
3933
- "learning_rate": 1.6516966736119334e-05,
3934
- "loss": 0.8004,
3935
- "step": 641
3936
- },
3937
- {
3938
- "epoch": 0.45,
3939
- "learning_rate": 1.6515394377833657e-05,
3940
- "loss": 0.7862,
3941
- "step": 642
3942
- },
3943
- {
3944
- "epoch": 0.45,
3945
- "learning_rate": 1.6513819539641783e-05,
3946
- "loss": 0.79,
3947
- "step": 643
3948
- },
3949
- {
3950
- "epoch": 0.45,
3951
- "learning_rate": 1.651224222203096e-05,
3952
- "loss": 0.7791,
3953
- "step": 644
3954
- },
3955
- {
3956
- "epoch": 0.45,
3957
- "learning_rate": 1.6510662425489193e-05,
3958
- "loss": 0.7774,
3959
- "step": 645
3960
- },
3961
- {
3962
- "epoch": 0.45,
3963
- "learning_rate": 1.6509080150505262e-05,
3964
- "loss": 0.7576,
3965
- "step": 646
3966
- },
3967
- {
3968
- "epoch": 0.46,
3969
- "learning_rate": 1.650749539756871e-05,
3970
- "loss": 0.7654,
3971
- "step": 647
3972
- },
3973
- {
3974
- "epoch": 0.46,
3975
- "learning_rate": 1.6505908167169852e-05,
3976
- "loss": 0.8208,
3977
- "step": 648
3978
- },
3979
- {
3980
- "epoch": 0.46,
3981
- "learning_rate": 1.6504318459799767e-05,
3982
- "loss": 0.7517,
3983
- "step": 649
3984
- },
3985
- {
3986
- "epoch": 0.46,
3987
- "learning_rate": 1.6502726275950295e-05,
3988
- "loss": 0.7944,
3989
- "step": 650
3990
- },
3991
- {
3992
- "epoch": 0.46,
3993
- "learning_rate": 1.6501131616114044e-05,
3994
- "loss": 0.7333,
3995
- "step": 651
3996
- },
3997
- {
3998
- "epoch": 0.46,
3999
- "learning_rate": 1.6499534480784395e-05,
4000
- "loss": 0.7699,
4001
- "step": 652
4002
- },
4003
- {
4004
- "epoch": 0.46,
4005
- "learning_rate": 1.649793487045549e-05,
4006
- "loss": 0.7663,
4007
- "step": 653
4008
- },
4009
- {
4010
- "epoch": 0.46,
4011
- "learning_rate": 1.649633278562224e-05,
4012
- "loss": 0.7792,
4013
- "step": 654
4014
- },
4015
- {
4016
- "epoch": 0.46,
4017
- "learning_rate": 1.6494728226780315e-05,
4018
- "loss": 0.7798,
4019
- "step": 655
4020
- },
4021
- {
4022
- "epoch": 0.46,
4023
- "learning_rate": 1.649312119442615e-05,
4024
- "loss": 0.78,
4025
- "step": 656
4026
- },
4027
- {
4028
- "epoch": 0.46,
4029
- "learning_rate": 1.649151168905696e-05,
4030
- "loss": 0.7839,
4031
- "step": 657
4032
- },
4033
- {
4034
- "epoch": 0.46,
4035
- "learning_rate": 1.64898997111707e-05,
4036
- "loss": 0.8006,
4037
- "step": 658
4038
- },
4039
- {
4040
- "epoch": 0.46,
4041
- "learning_rate": 1.6488285261266118e-05,
4042
- "loss": 0.7979,
4043
- "step": 659
4044
- },
4045
- {
4046
- "epoch": 0.46,
4047
- "learning_rate": 1.648666833984271e-05,
4048
- "loss": 0.7729,
4049
- "step": 660
4050
- },
4051
- {
4052
- "epoch": 0.47,
4053
- "learning_rate": 1.6485048947400734e-05,
4054
- "loss": 0.769,
4055
- "step": 661
4056
- },
4057
- {
4058
- "epoch": 0.47,
4059
- "learning_rate": 1.6483427084441225e-05,
4060
- "loss": 0.7953,
4061
- "step": 662
4062
- },
4063
- {
4064
- "epoch": 0.47,
4065
- "learning_rate": 1.6481802751465975e-05,
4066
- "loss": 0.7912,
4067
- "step": 663
4068
- },
4069
- {
4070
- "epoch": 0.47,
4071
- "learning_rate": 1.648017594897754e-05,
4072
- "loss": 0.8147,
4073
- "step": 664
4074
- },
4075
- {
4076
- "epoch": 0.47,
4077
- "learning_rate": 1.6478546677479236e-05,
4078
- "loss": 0.7766,
4079
- "step": 665
4080
- },
4081
- {
4082
- "epoch": 0.47,
4083
- "learning_rate": 1.6476914937475158e-05,
4084
- "loss": 0.7832,
4085
- "step": 666
4086
- },
4087
- {
4088
- "epoch": 0.47,
4089
- "learning_rate": 1.6475280729470148e-05,
4090
- "loss": 0.7996,
4091
- "step": 667
4092
- },
4093
- {
4094
- "epoch": 0.47,
4095
- "learning_rate": 1.647364405396982e-05,
4096
- "loss": 0.7868,
4097
- "step": 668
4098
- },
4099
- {
4100
- "epoch": 0.47,
4101
- "learning_rate": 1.6472004911480552e-05,
4102
- "loss": 0.7907,
4103
- "step": 669
4104
- },
4105
- {
4106
- "epoch": 0.47,
4107
- "learning_rate": 1.647036330250948e-05,
4108
- "loss": 0.8098,
4109
- "step": 670
4110
- },
4111
- {
4112
- "epoch": 0.47,
4113
- "learning_rate": 1.6468719227564504e-05,
4114
- "loss": 0.7614,
4115
- "step": 671
4116
- },
4117
- {
4118
- "epoch": 0.47,
4119
- "learning_rate": 1.6467072687154296e-05,
4120
- "loss": 0.7865,
4121
- "step": 672
4122
- },
4123
- {
4124
- "epoch": 0.47,
4125
- "learning_rate": 1.646542368178828e-05,
4126
- "loss": 0.8036,
4127
- "step": 673
4128
- },
4129
- {
4130
- "epoch": 0.47,
4131
- "learning_rate": 1.646377221197665e-05,
4132
- "loss": 0.7822,
4133
- "step": 674
4134
- },
4135
- {
4136
- "epoch": 0.48,
4137
- "learning_rate": 1.646211827823035e-05,
4138
- "loss": 0.7648,
4139
- "step": 675
4140
- },
4141
- {
4142
- "epoch": 0.48,
4143
- "learning_rate": 1.6460461881061103e-05,
4144
- "loss": 0.7603,
4145
- "step": 676
4146
- },
4147
- {
4148
- "epoch": 0.48,
4149
- "learning_rate": 1.6458803020981386e-05,
4150
- "loss": 0.7731,
4151
- "step": 677
4152
- },
4153
- {
4154
- "epoch": 0.48,
4155
- "learning_rate": 1.6457141698504437e-05,
4156
- "loss": 0.8007,
4157
- "step": 678
4158
- },
4159
- {
4160
- "epoch": 0.48,
4161
- "learning_rate": 1.6455477914144258e-05,
4162
- "loss": 0.7957,
4163
- "step": 679
4164
- },
4165
- {
4166
- "epoch": 0.48,
4167
- "learning_rate": 1.645381166841561e-05,
4168
- "loss": 0.7826,
4169
- "step": 680
4170
- },
4171
- {
4172
- "epoch": 0.48,
4173
- "learning_rate": 1.6452142961834022e-05,
4174
- "loss": 0.7959,
4175
- "step": 681
4176
- },
4177
- {
4178
- "epoch": 0.48,
4179
- "learning_rate": 1.6450471794915778e-05,
4180
- "loss": 0.7768,
4181
- "step": 682
4182
- },
4183
- {
4184
- "epoch": 0.48,
4185
- "learning_rate": 1.644879816817792e-05,
4186
- "loss": 0.7712,
4187
- "step": 683
4188
- },
4189
- {
4190
- "epoch": 0.48,
4191
- "learning_rate": 1.6447122082138263e-05,
4192
- "loss": 0.7683,
4193
- "step": 684
4194
- },
4195
- {
4196
- "epoch": 0.48,
4197
- "learning_rate": 1.6445443537315378e-05,
4198
- "loss": 0.7876,
4199
- "step": 685
4200
- },
4201
- {
4202
- "epoch": 0.48,
4203
- "learning_rate": 1.6443762534228585e-05,
4204
- "loss": 0.7799,
4205
- "step": 686
4206
- },
4207
- {
4208
- "epoch": 0.48,
4209
- "learning_rate": 1.6442079073397986e-05,
4210
- "loss": 0.7706,
4211
- "step": 687
4212
- },
4213
- {
4214
- "epoch": 0.48,
4215
- "learning_rate": 1.6440393155344423e-05,
4216
- "loss": 0.7309,
4217
- "step": 688
4218
- },
4219
- {
4220
- "epoch": 0.49,
4221
- "learning_rate": 1.643870478058951e-05,
4222
- "loss": 0.7426,
4223
- "step": 689
4224
- },
4225
- {
4226
- "epoch": 0.49,
4227
- "learning_rate": 1.643701394965562e-05,
4228
- "loss": 0.7585,
4229
- "step": 690
4230
- },
4231
- {
4232
- "epoch": 0.49,
4233
- "learning_rate": 1.6435320663065885e-05,
4234
- "loss": 0.7605,
4235
- "step": 691
4236
- },
4237
- {
4238
- "epoch": 0.49,
4239
- "learning_rate": 1.6433624921344188e-05,
4240
- "loss": 0.7748,
4241
- "step": 692
4242
- },
4243
- {
4244
- "epoch": 0.49,
4245
- "learning_rate": 1.6431926725015187e-05,
4246
- "loss": 0.7942,
4247
- "step": 693
4248
- },
4249
- {
4250
- "epoch": 0.49,
4251
- "learning_rate": 1.6430226074604292e-05,
4252
- "loss": 0.7809,
4253
- "step": 694
4254
- },
4255
- {
4256
- "epoch": 0.49,
4257
- "learning_rate": 1.6428522970637667e-05,
4258
- "loss": 0.748,
4259
- "step": 695
4260
- },
4261
- {
4262
- "epoch": 0.49,
4263
- "learning_rate": 1.6426817413642244e-05,
4264
- "loss": 0.7935,
4265
- "step": 696
4266
- },
4267
- {
4268
- "epoch": 0.49,
4269
- "learning_rate": 1.6425109404145712e-05,
4270
- "loss": 0.7717,
4271
- "step": 697
4272
- },
4273
- {
4274
- "epoch": 0.49,
4275
- "learning_rate": 1.6423398942676513e-05,
4276
- "loss": 0.8026,
4277
- "step": 698
4278
- },
4279
- {
4280
- "epoch": 0.49,
4281
- "learning_rate": 1.642168602976385e-05,
4282
- "loss": 0.7879,
4283
- "step": 699
4284
- },
4285
- {
4286
- "epoch": 0.49,
4287
- "learning_rate": 1.6419970665937696e-05,
4288
- "loss": 0.8023,
4289
- "step": 700
4290
- },
4291
- {
4292
- "epoch": 0.49,
4293
- "learning_rate": 1.641825285172876e-05,
4294
- "loss": 0.7342,
4295
- "step": 701
4296
- },
4297
- {
4298
- "epoch": 0.49,
4299
- "learning_rate": 1.641653258766853e-05,
4300
- "loss": 0.8106,
4301
- "step": 702
4302
- },
4303
- {
4304
- "epoch": 0.5,
4305
- "learning_rate": 1.6414809874289243e-05,
4306
- "loss": 0.783,
4307
- "step": 703
4308
- },
4309
- {
4310
- "epoch": 0.5,
4311
- "learning_rate": 1.641308471212389e-05,
4312
- "loss": 0.7829,
4313
- "step": 704
4314
- },
4315
- {
4316
- "epoch": 0.5,
4317
- "eval_loss": 0.7520720958709717,
4318
- "eval_runtime": 30.8053,
4319
- "eval_samples_per_second": 265.896,
4320
- "eval_steps_per_second": 16.621,
4321
- "step": 704
4322
- },
4323
- {
4324
- "epoch": 0.5,
4325
- "learning_rate": 1.6411357101706223e-05,
4326
- "loss": 0.7872,
4327
- "step": 705
4328
- },
4329
- {
4330
- "epoch": 0.5,
4331
- "learning_rate": 1.6409627043570755e-05,
4332
- "loss": 0.7664,
4333
- "step": 706
4334
- },
4335
- {
4336
- "epoch": 0.5,
4337
- "learning_rate": 1.640789453825276e-05,
4338
- "loss": 0.7716,
4339
- "step": 707
4340
- },
4341
- {
4342
- "epoch": 0.5,
4343
- "learning_rate": 1.6406159586288254e-05,
4344
- "loss": 0.8005,
4345
- "step": 708
4346
- },
4347
- {
4348
- "epoch": 0.5,
4349
- "learning_rate": 1.640442218821402e-05,
4350
- "loss": 0.7754,
4351
- "step": 709
4352
- },
4353
- {
4354
- "epoch": 0.5,
4355
- "learning_rate": 1.64026823445676e-05,
4356
- "loss": 0.7746,
4357
- "step": 710
4358
- },
4359
- {
4360
- "epoch": 0.5,
4361
- "learning_rate": 1.6400940055887288e-05,
4362
- "loss": 0.7988,
4363
- "step": 711
4364
- },
4365
- {
4366
- "epoch": 0.5,
4367
- "learning_rate": 1.6399195322712136e-05,
4368
- "loss": 0.8287,
4369
- "step": 712
4370
- },
4371
- {
4372
- "epoch": 0.5,
4373
- "learning_rate": 1.6397448145581953e-05,
4374
- "loss": 0.7612,
4375
- "step": 713
4376
- },
4377
- {
4378
- "epoch": 0.5,
4379
- "learning_rate": 1.63956985250373e-05,
4380
- "loss": 0.744,
4381
- "step": 714
4382
- },
4383
- {
4384
- "epoch": 0.5,
4385
- "learning_rate": 1.63939464616195e-05,
4386
- "loss": 0.7739,
4387
- "step": 715
4388
- },
4389
- {
4390
- "epoch": 0.5,
4391
- "learning_rate": 1.6392191955870627e-05,
4392
- "loss": 0.7683,
4393
- "step": 716
4394
- },
4395
- {
4396
- "epoch": 0.5,
4397
- "learning_rate": 1.6390435008333514e-05,
4398
- "loss": 0.7755,
4399
- "step": 717
4400
- },
4401
- {
4402
- "epoch": 0.51,
4403
- "learning_rate": 1.638867561955175e-05,
4404
- "loss": 0.7985,
4405
- "step": 718
4406
- },
4407
- {
4408
- "epoch": 0.51,
4409
- "learning_rate": 1.638691379006967e-05,
4410
- "loss": 0.7754,
4411
- "step": 719
4412
- },
4413
- {
4414
- "epoch": 0.51,
4415
- "learning_rate": 1.638514952043238e-05,
4416
- "loss": 0.7779,
4417
- "step": 720
4418
- },
4419
- {
4420
- "epoch": 0.51,
4421
- "learning_rate": 1.638338281118573e-05,
4422
- "loss": 0.7662,
4423
- "step": 721
4424
- },
4425
- {
4426
- "epoch": 0.51,
4427
- "learning_rate": 1.6381613662876327e-05,
4428
- "loss": 0.7563,
4429
- "step": 722
4430
- },
4431
- {
4432
- "epoch": 0.51,
4433
- "learning_rate": 1.637984207605153e-05,
4434
- "loss": 0.7936,
4435
- "step": 723
4436
- },
4437
- {
4438
- "epoch": 0.51,
4439
- "learning_rate": 1.637806805125946e-05,
4440
- "loss": 0.7846,
4441
- "step": 724
4442
- },
4443
- {
4444
- "epoch": 0.51,
4445
- "learning_rate": 1.6376291589048985e-05,
4446
- "loss": 0.76,
4447
- "step": 725
4448
- },
4449
- {
4450
- "epoch": 0.51,
4451
- "learning_rate": 1.6374512689969732e-05,
4452
- "loss": 0.7548,
4453
- "step": 726
4454
- },
4455
- {
4456
- "epoch": 0.51,
4457
- "learning_rate": 1.6372731354572076e-05,
4458
- "loss": 0.7714,
4459
- "step": 727
4460
- },
4461
- {
4462
- "epoch": 0.51,
4463
- "learning_rate": 1.6370947583407154e-05,
4464
- "loss": 0.769,
4465
- "step": 728
4466
- },
4467
- {
4468
- "epoch": 0.51,
4469
- "learning_rate": 1.6369161377026846e-05,
4470
- "loss": 0.7867,
4471
- "step": 729
4472
- },
4473
- {
4474
- "epoch": 0.51,
4475
- "learning_rate": 1.63673727359838e-05,
4476
- "loss": 0.7539,
4477
- "step": 730
4478
- },
4479
- {
4480
- "epoch": 0.51,
4481
- "learning_rate": 1.6365581660831405e-05,
4482
- "loss": 0.8022,
4483
- "step": 731
4484
- },
4485
- {
4486
- "epoch": 0.52,
4487
- "learning_rate": 1.63637881521238e-05,
4488
- "loss": 0.7871,
4489
- "step": 732
4490
- },
4491
- {
4492
- "epoch": 0.52,
4493
- "learning_rate": 1.6361992210415895e-05,
4494
- "loss": 0.7829,
4495
- "step": 733
4496
- },
4497
- {
4498
- "epoch": 0.52,
4499
- "learning_rate": 1.6360193836263337e-05,
4500
- "loss": 0.7537,
4501
- "step": 734
4502
- },
4503
- {
4504
- "epoch": 0.52,
4505
- "learning_rate": 1.635839303022253e-05,
4506
- "loss": 0.772,
4507
- "step": 735
4508
- },
4509
- {
4510
- "epoch": 0.52,
4511
- "learning_rate": 1.6356589792850627e-05,
4512
- "loss": 0.7337,
4513
- "step": 736
4514
- },
4515
- {
4516
- "epoch": 0.52,
4517
- "learning_rate": 1.6354784124705545e-05,
4518
- "loss": 0.8004,
4519
- "step": 737
4520
- },
4521
- {
4522
- "epoch": 0.52,
4523
- "learning_rate": 1.635297602634594e-05,
4524
- "loss": 0.7338,
4525
- "step": 738
4526
- },
4527
- {
4528
- "epoch": 0.52,
4529
- "learning_rate": 1.6351165498331222e-05,
4530
- "loss": 0.7807,
4531
- "step": 739
4532
- },
4533
- {
4534
- "epoch": 0.52,
4535
- "learning_rate": 1.634935254122156e-05,
4536
- "loss": 0.773,
4537
- "step": 740
4538
- },
4539
- {
4540
- "epoch": 0.52,
4541
- "learning_rate": 1.6347537155577872e-05,
4542
- "loss": 0.7582,
4543
- "step": 741
4544
- },
4545
- {
4546
- "epoch": 0.52,
4547
- "learning_rate": 1.6345719341961818e-05,
4548
- "loss": 0.7892,
4549
- "step": 742
4550
- },
4551
- {
4552
- "epoch": 0.52,
4553
- "learning_rate": 1.6343899100935822e-05,
4554
- "loss": 0.8013,
4555
- "step": 743
4556
- },
4557
- {
4558
- "epoch": 0.52,
4559
- "learning_rate": 1.6342076433063055e-05,
4560
- "loss": 0.752,
4561
- "step": 744
4562
- },
4563
- {
4564
- "epoch": 0.52,
4565
- "learning_rate": 1.6340251338907433e-05,
4566
- "loss": 0.7788,
4567
- "step": 745
4568
- },
4569
- {
4570
- "epoch": 0.53,
4571
- "learning_rate": 1.633842381903363e-05,
4572
- "loss": 0.7484,
4573
- "step": 746
4574
- },
4575
- {
4576
- "epoch": 0.53,
4577
- "learning_rate": 1.633659387400707e-05,
4578
- "loss": 0.7901,
4579
- "step": 747
4580
- },
4581
- {
4582
- "epoch": 0.53,
4583
- "learning_rate": 1.6334761504393915e-05,
4584
- "loss": 0.8093,
4585
- "step": 748
4586
- },
4587
- {
4588
- "epoch": 0.53,
4589
- "learning_rate": 1.63329267107611e-05,
4590
- "loss": 0.8039,
4591
- "step": 749
4592
- },
4593
- {
4594
- "epoch": 0.53,
4595
- "learning_rate": 1.633108949367629e-05,
4596
- "loss": 0.7992,
4597
- "step": 750
4598
- },
4599
- {
4600
- "epoch": 0.53,
4601
- "learning_rate": 1.632924985370791e-05,
4602
- "loss": 0.766,
4603
- "step": 751
4604
- },
4605
- {
4606
- "epoch": 0.53,
4607
- "learning_rate": 1.6327407791425127e-05,
4608
- "loss": 0.7682,
4609
- "step": 752
4610
- },
4611
- {
4612
- "epoch": 0.53,
4613
- "learning_rate": 1.632556330739787e-05,
4614
- "loss": 0.7746,
4615
- "step": 753
4616
- },
4617
- {
4618
- "epoch": 0.53,
4619
- "learning_rate": 1.6323716402196798e-05,
4620
- "loss": 0.7823,
4621
- "step": 754
4622
- },
4623
- {
4624
- "epoch": 0.53,
4625
- "learning_rate": 1.6321867076393336e-05,
4626
- "loss": 0.7765,
4627
- "step": 755
4628
- },
4629
- {
4630
- "epoch": 0.53,
4631
- "learning_rate": 1.632001533055966e-05,
4632
- "loss": 0.799,
4633
- "step": 756
4634
- },
4635
- {
4636
- "epoch": 0.53,
4637
- "learning_rate": 1.6318161165268673e-05,
4638
- "loss": 0.7931,
4639
- "step": 757
4640
- },
4641
- {
4642
- "epoch": 0.53,
4643
- "learning_rate": 1.631630458109405e-05,
4644
- "loss": 0.7724,
4645
- "step": 758
4646
- },
4647
- {
4648
- "epoch": 0.53,
4649
- "learning_rate": 1.6314445578610206e-05,
4650
- "loss": 0.7844,
4651
- "step": 759
4652
- },
4653
- {
4654
- "epoch": 0.54,
4655
- "learning_rate": 1.6312584158392295e-05,
4656
- "loss": 0.7971,
4657
- "step": 760
4658
- },
4659
- {
4660
- "epoch": 0.54,
4661
- "learning_rate": 1.631072032101623e-05,
4662
- "loss": 0.8121,
4663
- "step": 761
4664
- },
4665
- {
4666
- "epoch": 0.54,
4667
- "learning_rate": 1.6308854067058677e-05,
4668
- "loss": 0.7872,
4669
- "step": 762
4670
- },
4671
- {
4672
- "epoch": 0.54,
4673
- "learning_rate": 1.6306985397097032e-05,
4674
- "loss": 0.7464,
4675
- "step": 763
4676
- },
4677
- {
4678
- "epoch": 0.54,
4679
- "learning_rate": 1.6305114311709452e-05,
4680
- "loss": 0.7602,
4681
- "step": 764
4682
- },
4683
- {
4684
- "epoch": 0.54,
4685
- "learning_rate": 1.630324081147484e-05,
4686
- "loss": 0.7837,
4687
- "step": 765
4688
- },
4689
- {
4690
- "epoch": 0.54,
4691
- "learning_rate": 1.630136489697284e-05,
4692
- "loss": 0.7601,
4693
- "step": 766
4694
- },
4695
- {
4696
- "epoch": 0.54,
4697
- "learning_rate": 1.6299486568783848e-05,
4698
- "loss": 0.7607,
4699
- "step": 767
4700
- },
4701
- {
4702
- "epoch": 0.54,
4703
- "learning_rate": 1.6297605827489002e-05,
4704
- "loss": 0.7463,
4705
- "step": 768
4706
- },
4707
- {
4708
- "epoch": 0.54,
4709
- "eval_loss": 0.7497929930686951,
4710
- "eval_runtime": 30.8232,
4711
- "eval_samples_per_second": 265.742,
4712
- "eval_steps_per_second": 16.611,
4713
- "step": 768
4714
- }
4715
- ],
4716
- "logging_steps": 1,
4717
- "max_steps": 5680,
4718
- "num_train_epochs": 4,
4719
- "save_steps": 256,
4720
- "total_flos": 1.938244283400192e+18,
4721
- "trial_name": null,
4722
- "trial_params": null
4723
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-768/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef50d54f26590293253950618343fc1c8aeeac3fe94fda4d2342d71f81540862
3
- size 4347