bhuvanmdev commited on
Commit
6883826
·
verified ·
1 Parent(s): ebf43ee

Training in progress, step 1560, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b3ff122300a18d57c853e25e590e16786c7ad419f48524c7cbf11ccb071c190
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:534add263f0ab1fce24fa011018eefdd2dcddfad7e6b3e3167b8b88e43d1d335
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:636171340db7557397e62ad8d25bdf32a718d2d9d58de677060e6a51a635f297
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae8e9e48314c3831c9e9b379cf12dda16febba948f23260c667cfcefdb811cd
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7156da34db6a913e161eace9419c4bd917f16d306983c0686b60a669b6fb753
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2490d49251dc1bfa591e0e3acdc7c7d22ca5de91839e3e61e572022e72d3d8d2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea3157897b0fd7796a4bb8ecade679914447a9babe2f9fec4f85adecca7e3f9d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a759d59fcc4923d52863d19dcf59b3835205ae6c3c6035bc1e96d3a9ecd6ff0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5509838998211091,
5
  "eval_steps": 500,
6
- "global_step": 1540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1239,14 +1239,30 @@
1239
  "loss": 0.4164,
1240
  "num_input_tokens_seen": 1035252,
1241
  "step": 1540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1242
  }
1243
  ],
1244
  "logging_steps": 10,
1245
  "max_steps": 2795,
1246
- "num_input_tokens_seen": 1035252,
1247
  "num_train_epochs": 1,
1248
  "save_steps": 20,
1249
- "total_flos": 2.3279161599369216e+16,
1250
  "train_batch_size": 1,
1251
  "trial_name": null,
1252
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5581395348837209,
5
  "eval_steps": 500,
6
+ "global_step": 1560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1239
  "loss": 0.4164,
1240
  "num_input_tokens_seen": 1035252,
1241
  "step": 1540
1242
+ },
1243
+ {
1244
+ "epoch": 0.554561717352415,
1245
+ "grad_norm": 0.27858543395996094,
1246
+ "learning_rate": 8.9087656529517e-05,
1247
+ "loss": 0.3976,
1248
+ "num_input_tokens_seen": 1041187,
1249
+ "step": 1550
1250
+ },
1251
+ {
1252
+ "epoch": 0.5581395348837209,
1253
+ "grad_norm": 0.34940874576568604,
1254
+ "learning_rate": 8.837209302325582e-05,
1255
+ "loss": 0.3889,
1256
+ "num_input_tokens_seen": 1047255,
1257
+ "step": 1560
1258
  }
1259
  ],
1260
  "logging_steps": 10,
1261
  "max_steps": 2795,
1262
+ "num_input_tokens_seen": 1047255,
1263
  "num_train_epochs": 1,
1264
  "save_steps": 20,
1265
+ "total_flos": 2.354906668207104e+16,
1266
  "train_batch_size": 1,
1267
  "trial_name": null,
1268
  "trial_params": null