bhuvanmdev commited on
Commit
61c5dc8
1 Parent(s): e042934

Training in progress, step 1160, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ccf5782593e66412849cf0331f694759fa6cadfe866582a92dc1f5db7cfb19c
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b0dd6d9eecd111fc6fbe2ea756eb3599bab074e1522852ef4565a8d0ad6a8c8
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33ace10175162fbb0251f3a59624654383e563ccfa7daaa8f8a330925f01cfae
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311a37a787f8e244031ec691411ba1c79ba76c28a8310c7bb68b2848e9a6c30e
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db963a2a49fb5002a715398d59757b5b39bdbc3ac8e778ee4bf9108f72ec61a1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:347b707ba8c2977c0e73867f80277a74052c00e5d1c0dbac8e14d68e38fd054c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76c7c7c62106d50e468f60a02081ed9ee14be8c90f56cb0a1ed1c7569018541d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e645a96d4cb253e4a9c4e19eba1a73b1611a46e51a63b85e8070050f71348c7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.407871198568873,
5
  "eval_steps": 500,
6
- "global_step": 1140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -919,14 +919,30 @@
919
  "loss": 0.4247,
920
  "num_input_tokens_seen": 766054,
921
  "step": 1140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
922
  }
923
  ],
924
  "logging_steps": 10,
925
  "max_steps": 2795,
926
- "num_input_tokens_seen": 766054,
927
  "num_train_epochs": 1,
928
  "save_steps": 20,
929
- "total_flos": 1.7225849223032832e+16,
930
  "train_batch_size": 1,
931
  "trial_name": null,
932
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4150268336314848,
5
  "eval_steps": 500,
6
+ "global_step": 1160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
919
  "loss": 0.4247,
920
  "num_input_tokens_seen": 766054,
921
  "step": 1140
922
+ },
923
+ {
924
+ "epoch": 0.41144901610017887,
925
+ "grad_norm": 0.322694331407547,
926
+ "learning_rate": 0.00011771019677996423,
927
+ "loss": 0.416,
928
+ "num_input_tokens_seen": 772192,
929
+ "step": 1150
930
+ },
931
+ {
932
+ "epoch": 0.4150268336314848,
933
+ "grad_norm": 0.4780764877796173,
934
+ "learning_rate": 0.00011699463327370303,
935
+ "loss": 0.3937,
936
+ "num_input_tokens_seen": 778619,
937
+ "step": 1160
938
  }
939
  ],
940
  "logging_steps": 10,
941
  "max_steps": 2795,
942
+ "num_input_tokens_seen": 778619,
943
  "num_train_epochs": 1,
944
  "save_steps": 20,
945
+ "total_flos": 1.7508391701092352e+16,
946
  "train_batch_size": 1,
947
  "trial_name": null,
948
  "trial_params": null