bhuvanmdev commited on
Commit
36fe73a
·
verified ·
1 Parent(s): af166ef

Training in progress, step 1120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4d69499c5af363db9c69800142daa202a40720097dcebdb876bc14cde7f5d13
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93756c662435c89f0b33aea427588c404b58d5be37cf76c3f9e4bc0ff1bd939a
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47615466b0123bdcca71dd3dc26a2fef0b8c8f7445ddb63eaee76e7d029bd403
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88dc8585ef97f13c62f73031c2660812fe3125beba7cb9ca7b650ca2dd9be0e
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:133aae226bbaac66725e376492ee3f542023abc66dd7536ee6fe5913c3f55ef4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10612cfe26142bef77beb908186e256524851aa5a76ff5646d1e0e1c8e9ff68c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:050a05f863436bace20199741dad316727f622e89a1193b9d3f3a77dd7cb7646
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:827f3733c0f4c8564e6bc69cab6741789e52e80ec29e9f5a0f4d2f148dfac06d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3935599284436494,
5
  "eval_steps": 500,
6
- "global_step": 1100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -887,14 +887,30 @@
887
  "loss": 0.4208,
888
  "num_input_tokens_seen": 739112,
889
  "step": 1100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  }
891
  ],
892
  "logging_steps": 10,
893
  "max_steps": 2795,
894
- "num_input_tokens_seen": 739112,
895
  "num_train_epochs": 1,
896
  "save_steps": 20,
897
- "total_flos": 1.6620018785796096e+16,
898
  "train_batch_size": 1,
899
  "trial_name": null,
900
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4007155635062612,
5
  "eval_steps": 500,
6
+ "global_step": 1120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
887
  "loss": 0.4208,
888
  "num_input_tokens_seen": 739112,
889
  "step": 1100
890
+ },
891
+ {
892
+ "epoch": 0.39713774597495527,
893
+ "grad_norm": 0.3873242437839508,
894
+ "learning_rate": 0.00012057245080500895,
895
+ "loss": 0.3798,
896
+ "num_input_tokens_seen": 744478,
897
+ "step": 1110
898
+ },
899
+ {
900
+ "epoch": 0.4007155635062612,
901
+ "grad_norm": 0.31334424018859863,
902
+ "learning_rate": 0.00011985688729874778,
903
+ "loss": 0.4179,
904
+ "num_input_tokens_seen": 753315,
905
+ "step": 1120
906
  }
907
  ],
908
  "logging_steps": 10,
909
  "max_steps": 2795,
910
+ "num_input_tokens_seen": 753315,
911
  "num_train_epochs": 1,
912
  "save_steps": 20,
913
+ "total_flos": 1.693939409943552e+16,
914
  "train_batch_size": 1,
915
  "trial_name": null,
916
  "trial_params": null