ncbateman commited on
Commit
b67a03f
·
verified ·
1 Parent(s): 97e5238

Training in progress, step 425, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bda3973802d4dbe7e1febacfe2285c1ad74a69704c82dcd78446562f9a1926d
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ebdb29d9486a130aa0643f46ee0a0c62804d066e5e565482737346091c91a4f
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:946ca5a242b732a08489e3c9f5009fccf9316aa3c64052f3c123a7cbf199b27c
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2e999ae9fca5af619ba5b1c0aa8c6c5128c6255b22b10d7fa7e41d1f7ed2f89
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:594ae8f292448e23ba6c3e26a64c1c1ada41d262255da9a2d6d6797b9bd3a554
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1f4bee02a2423f6bf461638ccfb451ce039046573115c43bd8f40b32464c3ba
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccc44ae533ab5cb598c5cab350c6c0e00c24c2118f4dfe0f5a4518a727f56b8d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af5756a3003f956101e6e090da18ea2a94d3b04f61ed1d4e64424da2405b4032
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5435134260757036,
5
  "eval_steps": 386,
6
- "global_step": 420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2963,6 +2963,41 @@
2963
  "learning_rate": 9.769101037869187e-05,
2964
  "loss": 0.9612,
2965
  "step": 420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2966
  }
2967
  ],
2968
  "logging_steps": 1,
@@ -2982,7 +3017,7 @@
2982
  "attributes": {}
2983
  }
2984
  },
2985
- "total_flos": 4.6953361211129856e+17,
2986
  "train_batch_size": 4,
2987
  "trial_name": null,
2988
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5499838240051763,
5
  "eval_steps": 386,
6
+ "global_step": 425,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2963
  "learning_rate": 9.769101037869187e-05,
2964
  "loss": 0.9612,
2965
  "step": 420
2966
+ },
2967
+ {
2968
+ "epoch": 0.5448075056615982,
2969
+ "grad_norm": 0.7557950615882874,
2970
+ "learning_rate": 9.767861010334962e-05,
2971
+ "loss": 0.9965,
2972
+ "step": 421
2973
+ },
2974
+ {
2975
+ "epoch": 0.5461015852474927,
2976
+ "grad_norm": 0.812099814414978,
2977
+ "learning_rate": 9.766617741096746e-05,
2978
+ "loss": 0.8824,
2979
+ "step": 422
2980
+ },
2981
+ {
2982
+ "epoch": 0.5473956648333873,
2983
+ "grad_norm": 0.8438544273376465,
2984
+ "learning_rate": 9.765371230999843e-05,
2985
+ "loss": 0.8852,
2986
+ "step": 423
2987
+ },
2988
+ {
2989
+ "epoch": 0.5486897444192818,
2990
+ "grad_norm": 0.7006101608276367,
2991
+ "learning_rate": 9.764121480891765e-05,
2992
+ "loss": 0.808,
2993
+ "step": 424
2994
+ },
2995
+ {
2996
+ "epoch": 0.5499838240051763,
2997
+ "grad_norm": 0.7312132716178894,
2998
+ "learning_rate": 9.76286849162223e-05,
2999
+ "loss": 0.7964,
3000
+ "step": 425
3001
  }
3002
  ],
3003
  "logging_steps": 1,
 
3017
  "attributes": {}
3018
  }
3019
  },
3020
+ "total_flos": 4.751232979697664e+17,
3021
  "train_batch_size": 4,
3022
  "trial_name": null,
3023
  "trial_params": null