ncbateman commited on
Commit
fef1a8f
1 Parent(s): a3b1f05

Training in progress, step 420, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3c853d3c99ad98cd0f1a3885e822275c0b7c1209b0bb534194841e45f77ccde
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bda3973802d4dbe7e1febacfe2285c1ad74a69704c82dcd78446562f9a1926d
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:674fe507fc4380e8cfbc66f2ade552e239f7263730b76fe4741c197838f048dd
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:946ca5a242b732a08489e3c9f5009fccf9316aa3c64052f3c123a7cbf199b27c
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:693ea3a43eaa5cd09102e82652754c22734bed378c16600849bbc7fa038d670b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:594ae8f292448e23ba6c3e26a64c1c1ada41d262255da9a2d6d6797b9bd3a554
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b210180cd4578e203d13e7831d41ab50e4f3ba110e1252d9dac7fb7d3a0c022
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc44ae533ab5cb598c5cab350c6c0e00c24c2118f4dfe0f5a4518a727f56b8d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.537043028146231,
5
  "eval_steps": 386,
6
- "global_step": 415,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2928,6 +2928,41 @@
2928
  "learning_rate": 9.77525252054893e-05,
2929
  "loss": 0.6972,
2930
  "step": 415
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2931
  }
2932
  ],
2933
  "logging_steps": 1,
@@ -2947,7 +2982,7 @@
2947
  "attributes": {}
2948
  }
2949
  },
2950
- "total_flos": 4.639439262528307e+17,
2951
  "train_batch_size": 4,
2952
  "trial_name": null,
2953
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5435134260757036,
5
  "eval_steps": 386,
6
+ "global_step": 420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2928
  "learning_rate": 9.77525252054893e-05,
2929
  "loss": 0.6972,
2930
  "step": 415
2931
+ },
2932
+ {
2933
+ "epoch": 0.5383371077321255,
2934
+ "grad_norm": 1.0071479082107544,
2935
+ "learning_rate": 9.774028714137133e-05,
2936
+ "loss": 0.9233,
2937
+ "step": 416
2938
+ },
2939
+ {
2940
+ "epoch": 0.5396311873180201,
2941
+ "grad_norm": 0.916771411895752,
2942
+ "learning_rate": 9.772801661827874e-05,
2943
+ "loss": 1.0072,
2944
+ "step": 417
2945
+ },
2946
+ {
2947
+ "epoch": 0.5409252669039146,
2948
+ "grad_norm": 0.8663278818130493,
2949
+ "learning_rate": 9.771571364455439e-05,
2950
+ "loss": 1.1011,
2951
+ "step": 418
2952
+ },
2953
+ {
2954
+ "epoch": 0.5422193464898091,
2955
+ "grad_norm": 0.7842355370521545,
2956
+ "learning_rate": 9.77033782285631e-05,
2957
+ "loss": 1.0382,
2958
+ "step": 419
2959
+ },
2960
+ {
2961
+ "epoch": 0.5435134260757036,
2962
+ "grad_norm": 0.8407487273216248,
2963
+ "learning_rate": 9.769101037869187e-05,
2964
+ "loss": 0.9612,
2965
+ "step": 420
2966
  }
2967
  ],
2968
  "logging_steps": 1,
 
2982
  "attributes": {}
2983
  }
2984
  },
2985
+ "total_flos": 4.6953361211129856e+17,
2986
  "train_batch_size": 4,
2987
  "trial_name": null,
2988
  "trial_params": null