AndreaUnibo commited on
Commit
6c546da
·
verified ·
1 Parent(s): 5f2e561

Training in progress, step 119000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57312df35d1a759d1125b8cde191f19096f08bf7f3585c486f0ac69eddbc5b19
3
  size 6961088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37e1df7a1d5af563d39dac4475c64a25a3d966d1f87494a9c847fe983f166bd8
3
  size 6961088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0edb8b443d3e74439d4e8af25da0dc55293b9cfa4bbf93848f5a4a068ae59ca
3
  size 3583418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b087cbcac1794815a736ae0610cb66f89b41ceefdbf2a98a7d64e4969a8e48
3
  size 3583418
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:088ef85c232e7d464d32fef0c355b7dfa446fac27a9e5b10b5535860b7e4c06b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a15c5c1eeb4142880599a7cf455fa16fb33ff35eaae83b03134bb5e6035bca7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45a9438c318fc6e63fadb9b0a23e4a1df10fa7a56afdc22fe2c5542c681f9eb0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e81870cc69b1580d1dc882ebfa95e380308fdb3a91bad7c4f448de107a49b57a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9752066115702479,
5
  "eval_steps": 1000,
6
- "global_step": 118000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2603,6 +2603,28 @@
2603
  "eval_samples_per_second": 6.882,
2604
  "eval_steps_per_second": 6.882,
2605
  "step": 118000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2606
  }
2607
  ],
2608
  "logging_steps": 500,
@@ -2610,7 +2632,7 @@
2610
  "num_input_tokens_seen": 0,
2611
  "num_train_epochs": 1,
2612
  "save_steps": 1000,
2613
- "total_flos": 3.000947490654474e+18,
2614
  "train_batch_size": 1,
2615
  "trial_name": null,
2616
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9834710743801653,
5
  "eval_steps": 1000,
6
+ "global_step": 119000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2603
  "eval_samples_per_second": 6.882,
2604
  "eval_steps_per_second": 6.882,
2605
  "step": 118000
2606
+ },
2607
+ {
2608
+ "epoch": 0.98,
2609
+ "grad_norm": 73.10523986816406,
2610
+ "learning_rate": 4.132231404958678e-07,
2611
+ "loss": 2.6807,
2612
+ "step": 118500
2613
+ },
2614
+ {
2615
+ "epoch": 0.98,
2616
+ "grad_norm": 17.360837936401367,
2617
+ "learning_rate": 3.3057851239669426e-07,
2618
+ "loss": 2.7351,
2619
+ "step": 119000
2620
+ },
2621
+ {
2622
+ "epoch": 0.98,
2623
+ "eval_loss": 2.436006546020508,
2624
+ "eval_runtime": 139.6881,
2625
+ "eval_samples_per_second": 6.887,
2626
+ "eval_steps_per_second": 6.887,
2627
+ "step": 119000
2628
  }
2629
  ],
2630
  "logging_steps": 500,
 
2632
  "num_input_tokens_seen": 0,
2633
  "num_train_epochs": 1,
2634
  "save_steps": 1000,
2635
+ "total_flos": 3.026405427716183e+18,
2636
  "train_batch_size": 1,
2637
  "trial_name": null,
2638
  "trial_params": null