joelniklaus commited on
Commit
423b93c
1 Parent(s): 4e97999

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,27 +0,0 @@
1
- {
2
- "_name_or_path": "data/plms/legal-latvian-roberta-base",
3
- "architectures": [
4
- "RobertaForMaskedLM"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 1,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 3072,
15
- "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 514,
17
- "model_type": "roberta",
18
- "num_attention_heads": 12,
19
- "num_hidden_layers": 12,
20
- "pad_token_id": 0,
21
- "position_embedding_type": "absolute",
22
- "torch_dtype": "float32",
23
- "transformers_version": "4.20.1",
24
- "type_vocab_size": 1,
25
- "use_cache": true,
26
- "vocab_size": 32000
27
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0edc0a810fe54d700eab8daae7236020506578b91224ab7084d2e17ba4a5d001
3
- size 885325017
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdce6d900edc30ff77f0488f840efd6e646629c085c9e0181b9fd98a2cd0cc1d
3
- size 442675755
 
 
 
 
last-checkpoint/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:613e655cc6f450c292615fd4ce417b1f9a9fb50cdf8e944faa946e28dfe90767
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:613e655cc6f450c292615fd4ce417b1f9a9fb50cdf8e944faa946e28dfe90767
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:613e655cc6f450c292615fd4ce417b1f9a9fb50cdf8e944faa946e28dfe90767
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:613e655cc6f450c292615fd4ce417b1f9a9fb50cdf8e944faa946e28dfe90767
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_4.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:613e655cc6f450c292615fd4ce417b1f9a9fb50cdf8e944faa946e28dfe90767
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_5.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:613e655cc6f450c292615fd4ce417b1f9a9fb50cdf8e944faa946e28dfe90767
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_6.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:613e655cc6f450c292615fd4ce417b1f9a9fb50cdf8e944faa946e28dfe90767
3
- size 13611
 
 
 
 
last-checkpoint/rng_state_7.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:613e655cc6f450c292615fd4ce417b1f9a9fb50cdf8e944faa946e28dfe90767
3
- size 13611
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:34c7791bbb10178054552ff1c1aa9bf08a101a8199906b7bf72dd42f5c977109
3
- size 623
 
 
 
 
last-checkpoint/special_tokens_map.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": "<mask>",
6
- "pad_token": "<pad>",
7
- "sep_token": "</s>",
8
- "unk_token": "<unk>"
9
- }
 
 
 
 
 
 
 
 
 
 
last-checkpoint/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/tokenizer_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": "<mask>",
6
- "model_max_length": 512,
7
- "name_or_path": "data/plms/legal-latvian-roberta-base",
8
- "pad_token": "<pad>",
9
- "sep_token": "</s>",
10
- "special_tokens_map_file": "data/plms/legal-latvian-roberta-base_32k/special_tokens_map.json",
11
- "tokenizer_class": "PreTrainedTokenizerFast",
12
- "unk_token": "<unk>"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,1248 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 101.006665,
5
- "global_step": 200000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.01,
12
- "learning_rate": 1e-05,
13
- "loss": 6.3683,
14
- "step": 1000
15
- },
16
- {
17
- "epoch": 1.0,
18
- "learning_rate": 2e-05,
19
- "loss": 4.5771,
20
- "step": 2000
21
- },
22
- {
23
- "epoch": 1.01,
24
- "learning_rate": 3e-05,
25
- "loss": 3.9413,
26
- "step": 3000
27
- },
28
- {
29
- "epoch": 2.0,
30
- "learning_rate": 4e-05,
31
- "loss": 3.6934,
32
- "step": 4000
33
- },
34
- {
35
- "epoch": 2.01,
36
- "learning_rate": 5e-05,
37
- "loss": 3.1564,
38
- "step": 5000
39
- },
40
- {
41
- "epoch": 3.0,
42
- "learning_rate": 6e-05,
43
- "loss": 2.3088,
44
- "step": 6000
45
- },
46
- {
47
- "epoch": 3.01,
48
- "learning_rate": 7e-05,
49
- "loss": 1.7659,
50
- "step": 7000
51
- },
52
- {
53
- "epoch": 4.0,
54
- "learning_rate": 8e-05,
55
- "loss": 1.6643,
56
- "step": 8000
57
- },
58
- {
59
- "epoch": 4.01,
60
- "learning_rate": 9e-05,
61
- "loss": 1.4542,
62
- "step": 9000
63
- },
64
- {
65
- "epoch": 5.0,
66
- "learning_rate": 0.0001,
67
- "loss": 1.4685,
68
- "step": 10000
69
- },
70
- {
71
- "epoch": 5.01,
72
- "learning_rate": 9.999316524962345e-05,
73
- "loss": 1.3161,
74
- "step": 11000
75
- },
76
- {
77
- "epoch": 6.0,
78
- "learning_rate": 9.997266286704631e-05,
79
- "loss": 1.3615,
80
- "step": 12000
81
- },
82
- {
83
- "epoch": 6.01,
84
- "learning_rate": 9.993849845741524e-05,
85
- "loss": 1.228,
86
- "step": 13000
87
- },
88
- {
89
- "epoch": 7.0,
90
- "learning_rate": 9.989068136093873e-05,
91
- "loss": 1.2929,
92
- "step": 14000
93
- },
94
- {
95
- "epoch": 7.01,
96
- "learning_rate": 9.98292246503335e-05,
97
- "loss": 1.1735,
98
- "step": 15000
99
- },
100
- {
101
- "epoch": 8.0,
102
- "learning_rate": 9.975414512725057e-05,
103
- "loss": 1.2422,
104
- "step": 16000
105
- },
106
- {
107
- "epoch": 8.01,
108
- "learning_rate": 9.966546331768191e-05,
109
- "loss": 1.1314,
110
- "step": 17000
111
- },
112
- {
113
- "epoch": 9.0,
114
- "learning_rate": 9.956320346634876e-05,
115
- "loss": 1.2033,
116
- "step": 18000
117
- },
118
- {
119
- "epoch": 9.01,
120
- "learning_rate": 9.944739353007344e-05,
121
- "loss": 1.1022,
122
- "step": 19000
123
- },
124
- {
125
- "epoch": 10.0,
126
- "learning_rate": 9.931806517013612e-05,
127
- "loss": 1.1727,
128
- "step": 20000
129
- },
130
- {
131
- "epoch": 10.01,
132
- "learning_rate": 9.917525374361912e-05,
133
- "loss": 1.0737,
134
- "step": 21000
135
- },
136
- {
137
- "epoch": 11.0,
138
- "learning_rate": 9.901899829374047e-05,
139
- "loss": 1.1482,
140
- "step": 22000
141
- },
142
- {
143
- "epoch": 11.01,
144
- "learning_rate": 9.884934153917997e-05,
145
- "loss": 1.052,
146
- "step": 23000
147
- },
148
- {
149
- "epoch": 12.0,
150
- "learning_rate": 9.86663298624003e-05,
151
- "loss": 1.1244,
152
- "step": 24000
153
- },
154
- {
155
- "epoch": 12.01,
156
- "learning_rate": 9.847001329696653e-05,
157
- "loss": 1.0347,
158
- "step": 25000
159
- },
160
- {
161
- "epoch": 13.0,
162
- "learning_rate": 9.826044551386744e-05,
163
- "loss": 1.1042,
164
- "step": 26000
165
- },
166
- {
167
- "epoch": 13.01,
168
- "learning_rate": 9.803768380684242e-05,
169
- "loss": 1.0166,
170
- "step": 27000
171
- },
172
- {
173
- "epoch": 14.0,
174
- "learning_rate": 9.780178907671789e-05,
175
- "loss": 1.0874,
176
- "step": 28000
177
- },
178
- {
179
- "epoch": 14.01,
180
- "learning_rate": 9.755282581475769e-05,
181
- "loss": 1.0078,
182
- "step": 29000
183
- },
184
- {
185
- "epoch": 15.0,
186
- "learning_rate": 9.729086208503174e-05,
187
- "loss": 1.0626,
188
- "step": 30000
189
- },
190
- {
191
- "epoch": 15.01,
192
- "learning_rate": 9.701596950580806e-05,
193
- "loss": 1.0042,
194
- "step": 31000
195
- },
196
- {
197
- "epoch": 16.0,
198
- "learning_rate": 9.672822322997305e-05,
199
- "loss": 1.0333,
200
- "step": 32000
201
- },
202
- {
203
- "epoch": 16.01,
204
- "learning_rate": 9.642770192448536e-05,
205
- "loss": 1.0095,
206
- "step": 33000
207
- },
208
- {
209
- "epoch": 17.0,
210
- "learning_rate": 9.611448774886924e-05,
211
- "loss": 1.0183,
212
- "step": 34000
213
- },
214
- {
215
- "epoch": 17.01,
216
- "learning_rate": 9.578866633275288e-05,
217
- "loss": 0.999,
218
- "step": 35000
219
- },
220
- {
221
- "epoch": 18.0,
222
- "learning_rate": 9.545032675245813e-05,
223
- "loss": 1.0009,
224
- "step": 36000
225
- },
226
- {
227
- "epoch": 18.01,
228
- "learning_rate": 9.509956150664796e-05,
229
- "loss": 0.998,
230
- "step": 37000
231
- },
232
- {
233
- "epoch": 19.0,
234
- "learning_rate": 9.473646649103818e-05,
235
- "loss": 0.988,
236
- "step": 38000
237
- },
238
- {
239
- "epoch": 19.01,
240
- "learning_rate": 9.43611409721806e-05,
241
- "loss": 0.9912,
242
- "step": 39000
243
- },
244
- {
245
- "epoch": 20.0,
246
- "learning_rate": 9.397368756032445e-05,
247
- "loss": 0.9631,
248
- "step": 40000
249
- },
250
- {
251
- "epoch": 20.01,
252
- "learning_rate": 9.357421218136386e-05,
253
- "loss": 0.9994,
254
- "step": 41000
255
- },
256
- {
257
- "epoch": 21.0,
258
- "learning_rate": 9.316282404787871e-05,
259
- "loss": 0.9448,
260
- "step": 42000
261
- },
262
- {
263
- "epoch": 21.01,
264
- "learning_rate": 9.273963562927695e-05,
265
- "loss": 1.0004,
266
- "step": 43000
267
- },
268
- {
269
- "epoch": 22.0,
270
- "learning_rate": 9.230476262104677e-05,
271
- "loss": 0.9371,
272
- "step": 44000
273
- },
274
- {
275
- "epoch": 22.01,
276
- "learning_rate": 9.185832391312644e-05,
277
- "loss": 0.9908,
278
- "step": 45000
279
- },
280
- {
281
- "epoch": 23.0,
282
- "learning_rate": 9.140044155740101e-05,
283
- "loss": 0.9292,
284
- "step": 46000
285
- },
286
- {
287
- "epoch": 23.01,
288
- "learning_rate": 9.093124073433463e-05,
289
- "loss": 0.9863,
290
- "step": 47000
291
- },
292
- {
293
- "epoch": 24.0,
294
- "learning_rate": 9.045084971874738e-05,
295
- "loss": 0.9198,
296
- "step": 48000
297
- },
298
- {
299
- "epoch": 24.01,
300
- "learning_rate": 8.995939984474624e-05,
301
- "loss": 0.984,
302
- "step": 49000
303
- },
304
- {
305
- "epoch": 25.0,
306
- "learning_rate": 8.945702546981969e-05,
307
- "loss": 0.9107,
308
- "step": 50000
309
- },
310
- {
311
- "epoch": 25.0,
312
- "eval_loss": 0.6652148962020874,
313
- "eval_runtime": 17.7039,
314
- "eval_samples_per_second": 282.424,
315
- "eval_steps_per_second": 2.259,
316
- "step": 50000
317
- },
318
- {
319
- "epoch": 25.01,
320
- "learning_rate": 8.894386393810563e-05,
321
- "loss": 0.9792,
322
- "step": 51000
323
- },
324
- {
325
- "epoch": 26.0,
326
- "learning_rate": 8.842005554284296e-05,
327
- "loss": 0.9031,
328
- "step": 52000
329
- },
330
- {
331
- "epoch": 26.01,
332
- "learning_rate": 8.788574348801675e-05,
333
- "loss": 0.9746,
334
- "step": 53000
335
- },
336
- {
337
- "epoch": 27.0,
338
- "learning_rate": 8.73410738492077e-05,
339
- "loss": 0.8959,
340
- "step": 54000
341
- },
342
- {
343
- "epoch": 27.01,
344
- "learning_rate": 8.678619553365659e-05,
345
- "loss": 0.9671,
346
- "step": 55000
347
- },
348
- {
349
- "epoch": 28.0,
350
- "learning_rate": 8.622126023955446e-05,
351
- "loss": 0.8891,
352
- "step": 56000
353
- },
354
- {
355
- "epoch": 28.01,
356
- "learning_rate": 8.564642241456986e-05,
357
- "loss": 0.9596,
358
- "step": 57000
359
- },
360
- {
361
- "epoch": 29.0,
362
- "learning_rate": 8.506183921362443e-05,
363
- "loss": 0.8833,
364
- "step": 58000
365
- },
366
- {
367
- "epoch": 29.01,
368
- "learning_rate": 8.44676704559283e-05,
369
- "loss": 0.9574,
370
- "step": 59000
371
- },
372
- {
373
- "epoch": 30.0,
374
- "learning_rate": 8.386407858128706e-05,
375
- "loss": 0.875,
376
- "step": 60000
377
- },
378
- {
379
- "epoch": 31.0,
380
- "learning_rate": 8.32512286056924e-05,
381
- "loss": 0.9522,
382
- "step": 61000
383
- },
384
- {
385
- "epoch": 31.01,
386
- "learning_rate": 8.262928807620843e-05,
387
- "loss": 0.8677,
388
- "step": 62000
389
- },
390
- {
391
- "epoch": 32.0,
392
- "learning_rate": 8.199842702516583e-05,
393
- "loss": 0.9489,
394
- "step": 63000
395
- },
396
- {
397
- "epoch": 32.01,
398
- "learning_rate": 8.135881792367686e-05,
399
- "loss": 0.8648,
400
- "step": 64000
401
- },
402
- {
403
- "epoch": 33.0,
404
- "learning_rate": 8.07106356344834e-05,
405
- "loss": 0.9418,
406
- "step": 65000
407
- },
408
- {
409
- "epoch": 33.01,
410
- "learning_rate": 8.005405736415126e-05,
411
- "loss": 0.8641,
412
- "step": 66000
413
- },
414
- {
415
- "epoch": 34.0,
416
- "learning_rate": 7.938926261462366e-05,
417
- "loss": 0.9371,
418
- "step": 67000
419
- },
420
- {
421
- "epoch": 34.01,
422
- "learning_rate": 7.871643313414718e-05,
423
- "loss": 0.8603,
424
- "step": 68000
425
- },
426
- {
427
- "epoch": 35.0,
428
- "learning_rate": 7.803575286758364e-05,
429
- "loss": 0.9334,
430
- "step": 69000
431
- },
432
- {
433
- "epoch": 35.01,
434
- "learning_rate": 7.734740790612136e-05,
435
- "loss": 0.8569,
436
- "step": 70000
437
- },
438
- {
439
- "epoch": 36.0,
440
- "learning_rate": 7.66515864363997e-05,
441
- "loss": 0.93,
442
- "step": 71000
443
- },
444
- {
445
- "epoch": 36.01,
446
- "learning_rate": 7.594847868906076e-05,
447
- "loss": 0.8504,
448
- "step": 72000
449
- },
450
- {
451
- "epoch": 37.0,
452
- "learning_rate": 7.52382768867422e-05,
453
- "loss": 0.9274,
454
- "step": 73000
455
- },
456
- {
457
- "epoch": 37.01,
458
- "learning_rate": 7.452117519152542e-05,
459
- "loss": 0.8482,
460
- "step": 74000
461
- },
462
- {
463
- "epoch": 38.0,
464
- "learning_rate": 7.379736965185368e-05,
465
- "loss": 0.9227,
466
- "step": 75000
467
- },
468
- {
469
- "epoch": 38.01,
470
- "learning_rate": 7.30670581489344e-05,
471
- "loss": 0.8441,
472
- "step": 76000
473
- },
474
- {
475
- "epoch": 39.0,
476
- "learning_rate": 7.233044034264034e-05,
477
- "loss": 0.9189,
478
- "step": 77000
479
- },
480
- {
481
- "epoch": 39.01,
482
- "learning_rate": 7.158771761692464e-05,
483
- "loss": 0.8422,
484
- "step": 78000
485
- },
486
- {
487
- "epoch": 40.0,
488
- "learning_rate": 7.083909302476453e-05,
489
- "loss": 0.9137,
490
- "step": 79000
491
- },
492
- {
493
- "epoch": 40.01,
494
- "learning_rate": 7.008477123264848e-05,
495
- "loss": 0.8382,
496
- "step": 80000
497
- },
498
- {
499
- "epoch": 41.0,
500
- "learning_rate": 6.932495846462261e-05,
501
- "loss": 0.9091,
502
- "step": 81000
503
- },
504
- {
505
- "epoch": 41.01,
506
- "learning_rate": 6.855986244591104e-05,
507
- "loss": 0.8347,
508
- "step": 82000
509
- },
510
- {
511
- "epoch": 42.0,
512
- "learning_rate": 6.778969234612584e-05,
513
- "loss": 0.9054,
514
- "step": 83000
515
- },
516
- {
517
- "epoch": 42.01,
518
- "learning_rate": 6.701465872208216e-05,
519
- "loss": 0.833,
520
- "step": 84000
521
- },
522
- {
523
- "epoch": 43.0,
524
- "learning_rate": 6.623497346023418e-05,
525
- "loss": 0.901,
526
- "step": 85000
527
- },
528
- {
529
- "epoch": 43.01,
530
- "learning_rate": 6.545084971874738e-05,
531
- "loss": 0.8321,
532
- "step": 86000
533
- },
534
- {
535
- "epoch": 44.0,
536
- "learning_rate": 6.466250186922325e-05,
537
- "loss": 0.8944,
538
- "step": 87000
539
- },
540
- {
541
- "epoch": 44.01,
542
- "learning_rate": 6.387014543809223e-05,
543
- "loss": 0.8303,
544
- "step": 88000
545
- },
546
- {
547
- "epoch": 45.0,
548
- "learning_rate": 6.307399704769099e-05,
549
- "loss": 0.8847,
550
- "step": 89000
551
- },
552
- {
553
- "epoch": 45.01,
554
- "learning_rate": 6.227427435703997e-05,
555
- "loss": 0.8359,
556
- "step": 90000
557
- },
558
- {
559
- "epoch": 46.0,
560
- "learning_rate": 6.147119600233758e-05,
561
- "loss": 0.8698,
562
- "step": 91000
563
- },
564
- {
565
- "epoch": 46.01,
566
- "learning_rate": 6.066498153718735e-05,
567
- "loss": 0.8455,
568
- "step": 92000
569
- },
570
- {
571
- "epoch": 47.0,
572
- "learning_rate": 5.985585137257401e-05,
573
- "loss": 0.8598,
574
- "step": 93000
575
- },
576
- {
577
- "epoch": 47.01,
578
- "learning_rate": 5.90440267166055e-05,
579
- "loss": 0.849,
580
- "step": 94000
581
- },
582
- {
583
- "epoch": 48.0,
584
- "learning_rate": 5.8229729514036705e-05,
585
- "loss": 0.8499,
586
- "step": 95000
587
- },
588
- {
589
- "epoch": 48.01,
590
- "learning_rate": 5.74131823855921e-05,
591
- "loss": 0.8506,
592
- "step": 96000
593
- },
594
- {
595
- "epoch": 49.0,
596
- "learning_rate": 5.6594608567103456e-05,
597
- "loss": 0.8467,
598
- "step": 97000
599
- },
600
- {
601
- "epoch": 49.01,
602
- "learning_rate": 5.577423184847932e-05,
603
- "loss": 0.85,
604
- "step": 98000
605
- },
606
- {
607
- "epoch": 50.0,
608
- "learning_rate": 5.495227651252315e-05,
609
- "loss": 0.8309,
610
- "step": 99000
611
- },
612
- {
613
- "epoch": 50.01,
614
- "learning_rate": 5.4128967273616625e-05,
615
- "loss": 0.8619,
616
- "step": 100000
617
- },
618
- {
619
- "epoch": 50.01,
620
- "eval_loss": 0.5780017375946045,
621
- "eval_runtime": 12.1685,
622
- "eval_samples_per_second": 410.898,
623
- "eval_steps_per_second": 3.287,
624
- "step": 100000
625
- },
626
- {
627
- "epoch": 51.0,
628
- "learning_rate": 5.330452921628497e-05,
629
- "loss": 0.8193,
630
- "step": 101000
631
- },
632
- {
633
- "epoch": 51.01,
634
- "learning_rate": 5.247918773366112e-05,
635
- "loss": 0.868,
636
- "step": 102000
637
- },
638
- {
639
- "epoch": 52.0,
640
- "learning_rate": 5.165316846586541e-05,
641
- "loss": 0.8127,
642
- "step": 103000
643
- },
644
- {
645
- "epoch": 52.01,
646
- "learning_rate": 5.0826697238317935e-05,
647
- "loss": 0.8676,
648
- "step": 104000
649
- },
650
- {
651
- "epoch": 53.0,
652
- "learning_rate": 5e-05,
653
- "loss": 0.8095,
654
- "step": 105000
655
- },
656
- {
657
- "epoch": 53.01,
658
- "learning_rate": 4.917330276168208e-05,
659
- "loss": 0.8663,
660
- "step": 106000
661
- },
662
- {
663
- "epoch": 54.0,
664
- "learning_rate": 4.834683153413459e-05,
665
- "loss": 0.8037,
666
- "step": 107000
667
- },
668
- {
669
- "epoch": 54.01,
670
- "learning_rate": 4.7520812266338885e-05,
671
- "loss": 0.8682,
672
- "step": 108000
673
- },
674
- {
675
- "epoch": 55.0,
676
- "learning_rate": 4.669547078371504e-05,
677
- "loss": 0.7985,
678
- "step": 109000
679
- },
680
- {
681
- "epoch": 55.01,
682
- "learning_rate": 4.5871032726383386e-05,
683
- "loss": 0.8687,
684
- "step": 110000
685
- },
686
- {
687
- "epoch": 56.0,
688
- "learning_rate": 4.504772348747687e-05,
689
- "loss": 0.7946,
690
- "step": 111000
691
- },
692
- {
693
- "epoch": 56.01,
694
- "learning_rate": 4.4225768151520694e-05,
695
- "loss": 0.8661,
696
- "step": 112000
697
- },
698
- {
699
- "epoch": 57.0,
700
- "learning_rate": 4.3405391432896555e-05,
701
- "loss": 0.7904,
702
- "step": 113000
703
- },
704
- {
705
- "epoch": 57.01,
706
- "learning_rate": 4.2586817614407895e-05,
707
- "loss": 0.8648,
708
- "step": 114000
709
- },
710
- {
711
- "epoch": 58.0,
712
- "learning_rate": 4.17702704859633e-05,
713
- "loss": 0.7875,
714
- "step": 115000
715
- },
716
- {
717
- "epoch": 58.01,
718
- "learning_rate": 4.095597328339452e-05,
719
- "loss": 0.8599,
720
- "step": 116000
721
- },
722
- {
723
- "epoch": 59.0,
724
- "learning_rate": 4.0144148627425993e-05,
725
- "loss": 0.786,
726
- "step": 117000
727
- },
728
- {
729
- "epoch": 59.01,
730
- "learning_rate": 3.933501846281267e-05,
731
- "loss": 0.8596,
732
- "step": 118000
733
- },
734
- {
735
- "epoch": 60.0,
736
- "learning_rate": 3.852880399766243e-05,
737
- "loss": 0.7793,
738
- "step": 119000
739
- },
740
- {
741
- "epoch": 61.0,
742
- "learning_rate": 3.772572564296005e-05,
743
- "loss": 0.8583,
744
- "step": 120000
745
- },
746
- {
747
- "epoch": 61.01,
748
- "learning_rate": 3.6926002952309016e-05,
749
- "loss": 0.7764,
750
- "step": 121000
751
- },
752
- {
753
- "epoch": 62.0,
754
- "learning_rate": 3.612985456190778e-05,
755
- "loss": 0.8561,
756
- "step": 122000
757
- },
758
- {
759
- "epoch": 62.01,
760
- "learning_rate": 3.533749813077677e-05,
761
- "loss": 0.7742,
762
- "step": 123000
763
- },
764
- {
765
- "epoch": 63.0,
766
- "learning_rate": 3.4549150281252636e-05,
767
- "loss": 0.8541,
768
- "step": 124000
769
- },
770
- {
771
- "epoch": 63.01,
772
- "learning_rate": 3.3765026539765834e-05,
773
- "loss": 0.776,
774
- "step": 125000
775
- },
776
- {
777
- "epoch": 64.0,
778
- "learning_rate": 3.298534127791785e-05,
779
- "loss": 0.8498,
780
- "step": 126000
781
- },
782
- {
783
- "epoch": 64.01,
784
- "learning_rate": 3.221030765387417e-05,
785
- "loss": 0.7761,
786
- "step": 127000
787
- },
788
- {
789
- "epoch": 65.0,
790
- "learning_rate": 3.144013755408895e-05,
791
- "loss": 0.8474,
792
- "step": 128000
793
- },
794
- {
795
- "epoch": 65.01,
796
- "learning_rate": 3.0675041535377405e-05,
797
- "loss": 0.7745,
798
- "step": 129000
799
- },
800
- {
801
- "epoch": 66.0,
802
- "learning_rate": 2.991522876735154e-05,
803
- "loss": 0.847,
804
- "step": 130000
805
- },
806
- {
807
- "epoch": 66.01,
808
- "learning_rate": 2.916090697523549e-05,
809
- "loss": 0.7721,
810
- "step": 131000
811
- },
812
- {
813
- "epoch": 67.0,
814
- "learning_rate": 2.8412282383075363e-05,
815
- "loss": 0.8469,
816
- "step": 132000
817
- },
818
- {
819
- "epoch": 67.01,
820
- "learning_rate": 2.766955965735968e-05,
821
- "loss": 0.7703,
822
- "step": 133000
823
- },
824
- {
825
- "epoch": 68.0,
826
- "learning_rate": 2.693294185106562e-05,
827
- "loss": 0.8453,
828
- "step": 134000
829
- },
830
- {
831
- "epoch": 68.01,
832
- "learning_rate": 2.6202630348146324e-05,
833
- "loss": 0.7685,
834
- "step": 135000
835
- },
836
- {
837
- "epoch": 69.0,
838
- "learning_rate": 2.547882480847461e-05,
839
- "loss": 0.8428,
840
- "step": 136000
841
- },
842
- {
843
- "epoch": 69.01,
844
- "learning_rate": 2.476172311325783e-05,
845
- "loss": 0.768,
846
- "step": 137000
847
- },
848
- {
849
- "epoch": 70.0,
850
- "learning_rate": 2.405152131093926e-05,
851
- "loss": 0.8397,
852
- "step": 138000
853
- },
854
- {
855
- "epoch": 70.01,
856
- "learning_rate": 2.3348413563600325e-05,
857
- "loss": 0.7683,
858
- "step": 139000
859
- },
860
- {
861
- "epoch": 71.0,
862
- "learning_rate": 2.2652592093878666e-05,
863
- "loss": 0.8377,
864
- "step": 140000
865
- },
866
- {
867
- "epoch": 71.01,
868
- "learning_rate": 2.196424713241637e-05,
869
- "loss": 0.7637,
870
- "step": 141000
871
- },
872
- {
873
- "epoch": 72.0,
874
- "learning_rate": 2.128356686585282e-05,
875
- "loss": 0.8356,
876
- "step": 142000
877
- },
878
- {
879
- "epoch": 72.01,
880
- "learning_rate": 2.061073738537635e-05,
881
- "loss": 0.7634,
882
- "step": 143000
883
- },
884
- {
885
- "epoch": 73.0,
886
- "learning_rate": 1.9945942635848748e-05,
887
- "loss": 0.8338,
888
- "step": 144000
889
- },
890
- {
891
- "epoch": 73.01,
892
- "learning_rate": 1.928936436551661e-05,
893
- "loss": 0.7656,
894
- "step": 145000
895
- },
896
- {
897
- "epoch": 74.0,
898
- "learning_rate": 1.8641182076323148e-05,
899
- "loss": 0.8302,
900
- "step": 146000
901
- },
902
- {
903
- "epoch": 74.01,
904
- "learning_rate": 1.800157297483417e-05,
905
- "loss": 0.7649,
906
- "step": 147000
907
- },
908
- {
909
- "epoch": 75.0,
910
- "learning_rate": 1.7370711923791567e-05,
911
- "loss": 0.8221,
912
- "step": 148000
913
- },
914
- {
915
- "epoch": 75.01,
916
- "learning_rate": 1.6748771394307585e-05,
917
- "loss": 0.7727,
918
- "step": 149000
919
- },
920
- {
921
- "epoch": 76.0,
922
- "learning_rate": 1.6135921418712956e-05,
923
- "loss": 0.8128,
924
- "step": 150000
925
- },
926
- {
927
- "epoch": 76.0,
928
- "eval_loss": 0.540192186832428,
929
- "eval_runtime": 11.2644,
930
- "eval_samples_per_second": 443.876,
931
- "eval_steps_per_second": 3.551,
932
- "step": 150000
933
- },
934
- {
935
- "epoch": 76.01,
936
- "learning_rate": 1.553232954407171e-05,
937
- "loss": 0.78,
938
- "step": 151000
939
- },
940
- {
941
- "epoch": 77.0,
942
- "learning_rate": 1.4938160786375572e-05,
943
- "loss": 0.8023,
944
- "step": 152000
945
- },
946
- {
947
- "epoch": 77.01,
948
- "learning_rate": 1.435357758543015e-05,
949
- "loss": 0.7871,
950
- "step": 153000
951
- },
952
- {
953
- "epoch": 78.0,
954
- "learning_rate": 1.3778739760445552e-05,
955
- "loss": 0.7959,
956
- "step": 154000
957
- },
958
- {
959
- "epoch": 78.01,
960
- "learning_rate": 1.3213804466343421e-05,
961
- "loss": 0.7908,
962
- "step": 155000
963
- },
964
- {
965
- "epoch": 79.0,
966
- "learning_rate": 1.2658926150792322e-05,
967
- "loss": 0.7888,
968
- "step": 156000
969
- },
970
- {
971
- "epoch": 79.01,
972
- "learning_rate": 1.2114256511983274e-05,
973
- "loss": 0.7955,
974
- "step": 157000
975
- },
976
- {
977
- "epoch": 80.0,
978
- "learning_rate": 1.157994445715706e-05,
979
- "loss": 0.781,
980
- "step": 158000
981
- },
982
- {
983
- "epoch": 80.01,
984
- "learning_rate": 1.1056136061894384e-05,
985
- "loss": 0.8035,
986
- "step": 159000
987
- },
988
- {
989
- "epoch": 81.0,
990
- "learning_rate": 1.0542974530180327e-05,
991
- "loss": 0.7703,
992
- "step": 160000
993
- },
994
- {
995
- "epoch": 81.01,
996
- "learning_rate": 1.0040600155253765e-05,
997
- "loss": 0.8123,
998
- "step": 161000
999
- },
1000
- {
1001
- "epoch": 82.0,
1002
- "learning_rate": 9.549150281252633e-06,
1003
- "loss": 0.7619,
1004
- "step": 162000
1005
- },
1006
- {
1007
- "epoch": 82.01,
1008
- "learning_rate": 9.068759265665384e-06,
1009
- "loss": 0.8191,
1010
- "step": 163000
1011
- },
1012
- {
1013
- "epoch": 83.0,
1014
- "learning_rate": 8.599558442598998e-06,
1015
- "loss": 0.7602,
1016
- "step": 164000
1017
- },
1018
- {
1019
- "epoch": 83.01,
1020
- "learning_rate": 8.141676086873572e-06,
1021
- "loss": 0.8173,
1022
- "step": 165000
1023
- },
1024
- {
1025
- "epoch": 84.0,
1026
- "learning_rate": 7.695237378953223e-06,
1027
- "loss": 0.7567,
1028
- "step": 166000
1029
- },
1030
- {
1031
- "epoch": 84.01,
1032
- "learning_rate": 7.260364370723044e-06,
1033
- "loss": 0.8207,
1034
- "step": 167000
1035
- },
1036
- {
1037
- "epoch": 85.0,
1038
- "learning_rate": 6.837175952121306e-06,
1039
- "loss": 0.7542,
1040
- "step": 168000
1041
- },
1042
- {
1043
- "epoch": 85.01,
1044
- "learning_rate": 6.425787818636131e-06,
1045
- "loss": 0.8239,
1046
- "step": 169000
1047
- },
1048
- {
1049
- "epoch": 86.0,
1050
- "learning_rate": 6.026312439675552e-06,
1051
- "loss": 0.7508,
1052
- "step": 170000
1053
- },
1054
- {
1055
- "epoch": 86.01,
1056
- "learning_rate": 5.6388590278194096e-06,
1057
- "loss": 0.8238,
1058
- "step": 171000
1059
- },
1060
- {
1061
- "epoch": 87.0,
1062
- "learning_rate": 5.263533508961827e-06,
1063
- "loss": 0.7479,
1064
- "step": 172000
1065
- },
1066
- {
1067
- "epoch": 87.01,
1068
- "learning_rate": 4.900438493352055e-06,
1069
- "loss": 0.8226,
1070
- "step": 173000
1071
- },
1072
- {
1073
- "epoch": 88.0,
1074
- "learning_rate": 4.549673247541875e-06,
1075
- "loss": 0.7481,
1076
- "step": 174000
1077
- },
1078
- {
1079
- "epoch": 88.01,
1080
- "learning_rate": 4.2113336672471245e-06,
1081
- "loss": 0.8216,
1082
- "step": 175000
1083
- },
1084
- {
1085
- "epoch": 89.0,
1086
- "learning_rate": 3.885512251130763e-06,
1087
- "loss": 0.7455,
1088
- "step": 176000
1089
- },
1090
- {
1091
- "epoch": 89.01,
1092
- "learning_rate": 3.5722980755146517e-06,
1093
- "loss": 0.8209,
1094
- "step": 177000
1095
- },
1096
- {
1097
- "epoch": 90.0,
1098
- "learning_rate": 3.271776770026963e-06,
1099
- "loss": 0.7435,
1100
- "step": 178000
1101
- },
1102
- {
1103
- "epoch": 91.0,
1104
- "learning_rate": 2.9840304941919415e-06,
1105
- "loss": 0.8222,
1106
- "step": 179000
1107
- },
1108
- {
1109
- "epoch": 91.01,
1110
- "learning_rate": 2.7091379149682685e-06,
1111
- "loss": 0.7414,
1112
- "step": 180000
1113
- },
1114
- {
1115
- "epoch": 92.0,
1116
- "learning_rate": 2.4471741852423237e-06,
1117
- "loss": 0.8199,
1118
- "step": 181000
1119
- },
1120
- {
1121
- "epoch": 92.01,
1122
- "learning_rate": 2.1982109232821178e-06,
1123
- "loss": 0.7402,
1124
- "step": 182000
1125
- },
1126
- {
1127
- "epoch": 93.0,
1128
- "learning_rate": 1.962316193157593e-06,
1129
- "loss": 0.8222,
1130
- "step": 183000
1131
- },
1132
- {
1133
- "epoch": 93.01,
1134
- "learning_rate": 1.7395544861325718e-06,
1135
- "loss": 0.7416,
1136
- "step": 184000
1137
- },
1138
- {
1139
- "epoch": 94.0,
1140
- "learning_rate": 1.5299867030334814e-06,
1141
- "loss": 0.8186,
1142
- "step": 185000
1143
- },
1144
- {
1145
- "epoch": 94.01,
1146
- "learning_rate": 1.333670137599713e-06,
1147
- "loss": 0.7457,
1148
- "step": 186000
1149
- },
1150
- {
1151
- "epoch": 95.0,
1152
- "learning_rate": 1.1506584608200367e-06,
1153
- "loss": 0.8179,
1154
- "step": 187000
1155
- },
1156
- {
1157
- "epoch": 95.01,
1158
- "learning_rate": 9.810017062595322e-07,
1159
- "loss": 0.7464,
1160
- "step": 188000
1161
- },
1162
- {
1163
- "epoch": 96.0,
1164
- "learning_rate": 8.247462563808817e-07,
1165
- "loss": 0.8181,
1166
- "step": 189000
1167
- },
1168
- {
1169
- "epoch": 96.01,
1170
- "learning_rate": 6.819348298638839e-07,
1171
- "loss": 0.745,
1172
- "step": 190000
1173
- },
1174
- {
1175
- "epoch": 97.0,
1176
- "learning_rate": 5.526064699265753e-07,
1177
- "loss": 0.8197,
1178
- "step": 191000
1179
- },
1180
- {
1181
- "epoch": 97.01,
1182
- "learning_rate": 4.367965336512403e-07,
1183
- "loss": 0.7437,
1184
- "step": 192000
1185
- },
1186
- {
1187
- "epoch": 98.0,
1188
- "learning_rate": 3.3453668231809286e-07,
1189
- "loss": 0.8203,
1190
- "step": 193000
1191
- },
1192
- {
1193
- "epoch": 98.01,
1194
- "learning_rate": 2.458548727494292e-07,
1195
- "loss": 0.7446,
1196
- "step": 194000
1197
- },
1198
- {
1199
- "epoch": 99.0,
1200
- "learning_rate": 1.7077534966650766e-07,
1201
- "loss": 0.8202,
1202
- "step": 195000
1203
- },
1204
- {
1205
- "epoch": 99.01,
1206
- "learning_rate": 1.0931863906127327e-07,
1207
- "loss": 0.7448,
1208
- "step": 196000
1209
- },
1210
- {
1211
- "epoch": 100.0,
1212
- "learning_rate": 6.150154258476315e-08,
1213
- "loss": 0.819,
1214
- "step": 197000
1215
- },
1216
- {
1217
- "epoch": 100.01,
1218
- "learning_rate": 2.7337132953697554e-08,
1219
- "loss": 0.7472,
1220
- "step": 198000
1221
- },
1222
- {
1223
- "epoch": 101.0,
1224
- "learning_rate": 6.834750376549792e-09,
1225
- "loss": 0.8181,
1226
- "step": 199000
1227
- },
1228
- {
1229
- "epoch": 101.01,
1230
- "learning_rate": 0.0,
1231
- "loss": 0.7458,
1232
- "step": 200000
1233
- },
1234
- {
1235
- "epoch": 101.01,
1236
- "eval_loss": 0.5318106412887573,
1237
- "eval_runtime": 11.6246,
1238
- "eval_samples_per_second": 430.123,
1239
- "eval_steps_per_second": 3.441,
1240
- "step": 200000
1241
- }
1242
- ],
1243
- "max_steps": 200000,
1244
- "num_train_epochs": 9223372036854775807,
1245
- "total_flos": 3.370355760109191e+18,
1246
- "trial_name": null,
1247
- "trial_params": null
1248
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:86726c67413e261308d81904cccb8d25c89f08109e07bb77beb2dc05715d3f64
3
- size 3439
 
 
 
 
runs/Feb09_18-25-12_t1v-n-d0b02cf6-w-0/events.out.tfevents.1675967131.t1v-n-d0b02cf6-w-0.744846.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:127eaa82b91c466f2b330c90a5364adbc6da1253a3a87bddf8c34ca58f876170
3
- size 36836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:686c6b2d8e82a5c4dcafce89bbadaec1b95a53fe58ea7b8159766a9ec2ce8b33
3
+ size 37196