seanmor5 commited on
Commit
147dd81
1 Parent(s): 3abaaec

Next checkpoint

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75fd9d486b46d64b848cc86c11f7c4b13bef4df29a52e38ef2ddcf662cae5afa
3
  size 609389712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ad439aaf5874d60533bb5051bf6acffc2930733aa6dd485866dc6c59a9bdaed
3
  size 609389712
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f9d3909cf1844576d913e65e12ea6c9803b84d60d503e1c10e8644665ef28b3
3
  size 43127132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff5e52256242d1b94c0b8d00e3ca518af4c033014b02970af183aa3537805bc
3
  size 43127132
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d2c86a82ac0544796339d1ffad39305810835aadba072c67e2a4057f9b2590a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:429b3c2a10fda7e387ef37f4109ca176f0c7db526d8d6c6307e136f1be354272
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76d56d87344e9a2362ceb0c3f55bb4d805fbfc4b13b2b61dd202aac4f9d1849f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e19f6ca67131719fb6829118206971ea3850a7bbb4adfc8a62cee3bb0ae50d
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7356805044666316,
5
  "eval_steps": 500,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,6 +427,114 @@
427
  "learning_rate": 6.613005050505051e-06,
428
  "loss": 3.4486,
429
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 50,
@@ -434,7 +542,7 @@
434
  "num_input_tokens_seen": 0,
435
  "num_train_epochs": 1,
436
  "save_steps": 100,
437
- "total_flos": 8.593871062478193e+18,
438
  "train_batch_size": 8,
439
  "trial_name": null,
440
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9248554913294798,
5
  "eval_steps": 500,
6
+ "global_step": 4400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "learning_rate": 6.613005050505051e-06,
428
  "loss": 3.4486,
429
  "step": 3500
430
+ },
431
+ {
432
+ "epoch": 0.75,
433
+ "learning_rate": 6.349957912457913e-06,
434
+ "loss": 3.4718,
435
+ "step": 3550
436
+ },
437
+ {
438
+ "epoch": 0.76,
439
+ "learning_rate": 6.086910774410775e-06,
440
+ "loss": 3.4443,
441
+ "step": 3600
442
+ },
443
+ {
444
+ "epoch": 0.77,
445
+ "learning_rate": 5.823863636363636e-06,
446
+ "loss": 3.4849,
447
+ "step": 3650
448
+ },
449
+ {
450
+ "epoch": 0.78,
451
+ "learning_rate": 5.560816498316499e-06,
452
+ "loss": 3.4729,
453
+ "step": 3700
454
+ },
455
+ {
456
+ "epoch": 0.79,
457
+ "learning_rate": 5.297769360269361e-06,
458
+ "loss": 3.4664,
459
+ "step": 3750
460
+ },
461
+ {
462
+ "epoch": 0.8,
463
+ "learning_rate": 5.034722222222222e-06,
464
+ "loss": 3.4643,
465
+ "step": 3800
466
+ },
467
+ {
468
+ "epoch": 0.81,
469
+ "learning_rate": 4.7716750841750845e-06,
470
+ "loss": 3.4514,
471
+ "step": 3850
472
+ },
473
+ {
474
+ "epoch": 0.82,
475
+ "learning_rate": 4.508627946127946e-06,
476
+ "loss": 3.4661,
477
+ "step": 3900
478
+ },
479
+ {
480
+ "epoch": 0.83,
481
+ "learning_rate": 4.245580808080808e-06,
482
+ "loss": 3.4637,
483
+ "step": 3950
484
+ },
485
+ {
486
+ "epoch": 0.84,
487
+ "learning_rate": 3.98253367003367e-06,
488
+ "loss": 3.453,
489
+ "step": 4000
490
+ },
491
+ {
492
+ "epoch": 0.85,
493
+ "learning_rate": 3.7194865319865326e-06,
494
+ "loss": 3.4671,
495
+ "step": 4050
496
+ },
497
+ {
498
+ "epoch": 0.86,
499
+ "learning_rate": 3.456439393939394e-06,
500
+ "loss": 3.4439,
501
+ "step": 4100
502
+ },
503
+ {
504
+ "epoch": 0.87,
505
+ "learning_rate": 3.1933922558922558e-06,
506
+ "loss": 3.4504,
507
+ "step": 4150
508
+ },
509
+ {
510
+ "epoch": 0.88,
511
+ "learning_rate": 2.930345117845118e-06,
512
+ "loss": 3.4759,
513
+ "step": 4200
514
+ },
515
+ {
516
+ "epoch": 0.89,
517
+ "learning_rate": 2.66729797979798e-06,
518
+ "loss": 3.4356,
519
+ "step": 4250
520
+ },
521
+ {
522
+ "epoch": 0.9,
523
+ "learning_rate": 2.4042508417508416e-06,
524
+ "loss": 3.4618,
525
+ "step": 4300
526
+ },
527
+ {
528
+ "epoch": 0.91,
529
+ "learning_rate": 2.141203703703704e-06,
530
+ "loss": 3.463,
531
+ "step": 4350
532
+ },
533
+ {
534
+ "epoch": 0.92,
535
+ "learning_rate": 1.8781565656565657e-06,
536
+ "loss": 3.4524,
537
+ "step": 4400
538
  }
539
  ],
540
  "logging_steps": 50,
 
542
  "num_input_tokens_seen": 0,
543
  "num_train_epochs": 1,
544
  "save_steps": 100,
545
+ "total_flos": 1.0803018319304393e+19,
546
  "train_batch_size": 8,
547
  "trial_name": null,
548
  "trial_params": null