ncbateman commited on
Commit
c2246d1
·
verified ·
1 Parent(s): b078178

Training in progress, step 575, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4584dc7b0cb76f460c8d711cb91e7a00115c4b8b6e4c5f64c026bab2b1ffec56
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dc3759cb3eadde8f65f5a3d7697045cdf5905ba99f1c6eaf2ca3a7844ad2aca
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e34737a84c54468cc2ddb658609e3abb120334092064eaaf03c2dd0f9ccf8bfa
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8685b80d11ca02ce5247ab56c8fdc4d82bb07cfe0c06ad76dd35d09f90963d57
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e31720c6202686d6b213f1d4f529251465f67b801085538ab4bd286d72ff5e4d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:530917c5e79645a47e31080d2f68465b75947e6f663e644f4e8388bf2a65638b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cac34b28e1ee591b304cbd862bb43f437e9105f534e9b2ad7a541d74d74af47f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e7e7df729679e04b7adacc504f1466aebac28d86d657be5afa757859f96a7b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7376253639598835,
5
  "eval_steps": 386,
6
- "global_step": 570,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4013,6 +4013,41 @@
4013
  "learning_rate": 9.54738116487959e-05,
4014
  "loss": 0.8114,
4015
  "step": 570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4016
  }
4017
  ],
4018
  "logging_steps": 1,
@@ -4032,7 +4067,7 @@
4032
  "attributes": {}
4033
  }
4034
  },
4035
- "total_flos": 6.372241878653338e+17,
4036
  "train_batch_size": 4,
4037
  "trial_name": null,
4038
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7440957618893562,
5
  "eval_steps": 386,
6
+ "global_step": 575,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4013
  "learning_rate": 9.54738116487959e-05,
4014
  "loss": 0.8114,
4015
  "step": 570
4016
+ },
4017
+ {
4018
+ "epoch": 0.7389194435457781,
4019
+ "grad_norm": 0.9224253296852112,
4020
+ "learning_rate": 9.545665530288612e-05,
4021
+ "loss": 0.8829,
4022
+ "step": 571
4023
+ },
4024
+ {
4025
+ "epoch": 0.7402135231316725,
4026
+ "grad_norm": 0.8720259666442871,
4027
+ "learning_rate": 9.543946805065992e-05,
4028
+ "loss": 0.9658,
4029
+ "step": 572
4030
+ },
4031
+ {
4032
+ "epoch": 0.7415076027175671,
4033
+ "grad_norm": 0.5992270708084106,
4034
+ "learning_rate": 9.542224990380304e-05,
4035
+ "loss": 0.8167,
4036
+ "step": 573
4037
+ },
4038
+ {
4039
+ "epoch": 0.7428016823034617,
4040
+ "grad_norm": 0.7595481276512146,
4041
+ "learning_rate": 9.540500087402222e-05,
4042
+ "loss": 0.9002,
4043
+ "step": 574
4044
+ },
4045
+ {
4046
+ "epoch": 0.7440957618893562,
4047
+ "grad_norm": 0.8269613981246948,
4048
+ "learning_rate": 9.538772097304521e-05,
4049
+ "loss": 0.8442,
4050
+ "step": 575
4051
  }
4052
  ],
4053
  "logging_steps": 1,
 
4067
  "attributes": {}
4068
  }
4069
  },
4070
+ "total_flos": 6.428138737238016e+17,
4071
  "train_batch_size": 4,
4072
  "trial_name": null,
4073
  "trial_params": null