bhuvanmdev commited on
Commit
724c64c
1 Parent(s): a8cdaf7

Training in progress, step 1220, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a978d2ea4c2ef178cab56e079e834d41ad51771ce490d4892a323b4ec39be6be
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8e5d6c8e35a31822eb871dc12fd823552b7a9eb9ab33cebd109faea9d529ed
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:469403812ebd6b0168f1cfd10dc9051127cd1573ac3bdf23aca7e0e5a8a0418b
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fd6895bed1fe4adfa252f7f5652144d6386071fff8967514a353129d7f88c54
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c4a6bdcee541f4bc227f467016d9bc346f4f8483027f6990cb0f3a9b7a1f71e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9866c1c14b91e373c14a64f4ded163a467e354a089a0218d545dc01da1c17aaa
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d836f79887d280f068e10ec79168f2ea91f57c6fbb35eab33b10757b0316f93b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57764528c21ae418357b945c42fcac06191f00649d4e1a2cbc30bb9cceaa547a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4293381037567084,
5
  "eval_steps": 500,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -967,14 +967,30 @@
967
  "loss": 0.4032,
968
  "num_input_tokens_seen": 807607,
969
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
970
  }
971
  ],
972
  "logging_steps": 10,
973
  "max_steps": 2795,
974
- "num_input_tokens_seen": 807607,
975
  "num_train_epochs": 1,
976
  "save_steps": 20,
977
- "total_flos": 1.8160229453101056e+16,
978
  "train_batch_size": 1,
979
  "trial_name": null,
980
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4364937388193202,
5
  "eval_steps": 500,
6
+ "global_step": 1220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
967
  "loss": 0.4032,
968
  "num_input_tokens_seen": 807607,
969
  "step": 1200
970
+ },
971
+ {
972
+ "epoch": 0.4329159212880143,
973
+ "grad_norm": 0.25886887311935425,
974
+ "learning_rate": 0.00011341681574239715,
975
+ "loss": 0.416,
976
+ "num_input_tokens_seen": 813956,
977
+ "step": 1210
978
+ },
979
+ {
980
+ "epoch": 0.4364937388193202,
981
+ "grad_norm": 0.336000919342041,
982
+ "learning_rate": 0.00011270125223613597,
983
+ "loss": 0.3817,
984
+ "num_input_tokens_seen": 821166,
985
+ "step": 1220
986
  }
987
  ],
988
  "logging_steps": 10,
989
  "max_steps": 2795,
990
+ "num_input_tokens_seen": 821166,
991
  "num_train_epochs": 1,
992
  "save_steps": 20,
993
+ "total_flos": 1.846512348095693e+16,
994
  "train_batch_size": 1,
995
  "trial_name": null,
996
  "trial_params": null