ncbateman commited on
Commit
1f020cd
·
verified ·
1 Parent(s): fdc4839

Training in progress, step 725, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a4972d945aaf79f38a6385891c2c6bf16c9ab13d1387a9a314a9ae88e2af9d7
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59d4b20c39e023774f653c79696e7bf1a0895c40499204e0da55c8972f138d66
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:263b3898d405f513a3d751718a8e4459efbe49762bd2d59a834f0b0bd5f9c305
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e425df4cff4f5a833435a908df13e0e68fb91e068e65be0350970182c98532fe
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92e2bed38537091b1b6847ec894ec446f212f658d47a8eafa5ceedcd4769dcc4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cf257bb73c12dfb0ceb40ef8d7dab03fb8c678e0b5befc5cb019048e159abe6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa5e57bbef4d99689575267a9a61782e3d42e915f722a8a205895c6faf79e5c7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93cb95f675bf4560789b437cb4b6409914c75b59d4784dc1ea4e7bea55c15059
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9317373018440634,
5
  "eval_steps": 386,
6
- "global_step": 720,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5063,6 +5063,41 @@
5063
  "learning_rate": 9.256184469043851e-05,
5064
  "loss": 0.7484,
5065
  "step": 720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5066
  }
5067
  ],
5068
  "logging_steps": 1,
@@ -5082,7 +5117,7 @@
5082
  "attributes": {}
5083
  }
5084
  },
5085
- "total_flos": 8.04914763619369e+17,
5086
  "train_batch_size": 4,
5087
  "trial_name": null,
5088
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.938207699773536,
5
  "eval_steps": 386,
6
+ "global_step": 725,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5063
  "learning_rate": 9.256184469043851e-05,
5064
  "loss": 0.7484,
5065
  "step": 720
5066
+ },
5067
+ {
5068
+ "epoch": 0.9330313814299579,
5069
+ "grad_norm": 0.7698187232017517,
5070
+ "learning_rate": 9.254019439590835e-05,
5071
+ "loss": 0.7563,
5072
+ "step": 721
5073
+ },
5074
+ {
5075
+ "epoch": 0.9343254610158525,
5076
+ "grad_norm": 0.8331950306892395,
5077
+ "learning_rate": 9.251851517798514e-05,
5078
+ "loss": 0.8826,
5079
+ "step": 722
5080
+ },
5081
+ {
5082
+ "epoch": 0.935619540601747,
5083
+ "grad_norm": 0.7643804550170898,
5084
+ "learning_rate": 9.24968070514087e-05,
5085
+ "loss": 1.0688,
5086
+ "step": 723
5087
+ },
5088
+ {
5089
+ "epoch": 0.9369136201876416,
5090
+ "grad_norm": 0.802943229675293,
5091
+ "learning_rate": 9.247507003093858e-05,
5092
+ "loss": 0.8872,
5093
+ "step": 724
5094
+ },
5095
+ {
5096
+ "epoch": 0.938207699773536,
5097
+ "grad_norm": 0.8977981209754944,
5098
+ "learning_rate": 9.245330413135395e-05,
5099
+ "loss": 0.8946,
5100
+ "step": 725
5101
  }
5102
  ],
5103
  "logging_steps": 1,
 
5117
  "attributes": {}
5118
  }
5119
  },
5120
+ "total_flos": 8.105044494778368e+17,
5121
  "train_batch_size": 4,
5122
  "trial_name": null,
5123
  "trial_params": null