ncbateman commited on
Commit
89b259b
·
verified ·
1 Parent(s): 796dd9a

Training in progress, step 490, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47a6b59d01849a163702b8e50607c498745112dc6ced09ce72986bc17ec447c5
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca3af260a573b461cd342f7bf7afa505c4101cdfcb62508cb4679571ecbc02c
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c943a007ca820031ad467134581d9a1d6d566dbef8d77e1cbb5e854f9dbe6f0
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b428390e9c7047edbe5f8c842a6e6a63099594ace45e1079150202b30097dd
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2c2f577326b2b04f054023f26d112b121ac8ff4bf7c93ae38255948cc3a2c22
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d4663104f7f8c9aa215e9202234c5afb320a2473b8dc4114eff264e4c02057b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04f4b94f1a44c59895f464174a55ad4567f9ad6cb9a5184c25fed320934f9e0d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c388993843f0e58c8f91775a4c14074f5b54c4aade244b7bc036cc4bac2f6b60
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6276285991588483,
5
  "eval_steps": 386,
6
- "global_step": 485,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3418,6 +3418,41 @@
3418
  "learning_rate": 9.681794913852746e-05,
3419
  "loss": 0.989,
3420
  "step": 485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3421
  }
3422
  ],
3423
  "logging_steps": 1,
@@ -3437,7 +3472,7 @@
3437
  "attributes": {}
3438
  }
3439
  },
3440
- "total_flos": 5.421995282713805e+17,
3441
  "train_batch_size": 4,
3442
  "trial_name": null,
3443
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.634098997088321,
5
  "eval_steps": 386,
6
+ "global_step": 490,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3418
  "learning_rate": 9.681794913852746e-05,
3419
  "loss": 0.989,
3420
  "step": 485
3421
+ },
3422
+ {
3423
+ "epoch": 0.6289226787447428,
3424
+ "grad_norm": 0.7921927571296692,
3425
+ "learning_rate": 9.68034602964518e-05,
3426
+ "loss": 0.8748,
3427
+ "step": 486
3428
+ },
3429
+ {
3430
+ "epoch": 0.6302167583306373,
3431
+ "grad_norm": 0.8911926746368408,
3432
+ "learning_rate": 9.678893963235704e-05,
3433
+ "loss": 1.0641,
3434
+ "step": 487
3435
+ },
3436
+ {
3437
+ "epoch": 0.6315108379165318,
3438
+ "grad_norm": 0.8004783391952515,
3439
+ "learning_rate": 9.677438715611586e-05,
3440
+ "loss": 0.9681,
3441
+ "step": 488
3442
+ },
3443
+ {
3444
+ "epoch": 0.6328049175024264,
3445
+ "grad_norm": 0.9230672717094421,
3446
+ "learning_rate": 9.675980287762263e-05,
3447
+ "loss": 0.9226,
3448
+ "step": 489
3449
+ },
3450
+ {
3451
+ "epoch": 0.634098997088321,
3452
+ "grad_norm": 0.7298123240470886,
3453
+ "learning_rate": 9.67451868067933e-05,
3454
+ "loss": 0.7876,
3455
+ "step": 490
3456
  }
3457
  ],
3458
  "logging_steps": 1,
 
3472
  "attributes": {}
3473
  }
3474
  },
3475
+ "total_flos": 5.477892141298483e+17,
3476
  "train_batch_size": 4,
3477
  "trial_name": null,
3478
  "trial_params": null