ncbateman commited on
Commit
3bf7807
·
verified ·
1 Parent(s): a5b40a3

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:303d3808899ee31f18d353a81aee545e536a2dcd4cbf6b967519fa2c85e216cf
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a71f452e39be42f226077fc02ef1d9cd35fb105385d0893e5923d2ba4da0414
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf82ca0d1c2a8b78b728f10513331085b2fa3972fe291e89f4a5f25e18d40d82
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:603fc44dc0dcc7e55622ab39fe350be7bf496843a61f644202a9539169eb455c
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85797b6736a862291678baca072e183198938fa2e531b073954ae497386d1e69
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c8920f2a4268bc4fce4895714a93f0d42f4b23ede61ecccf586bc2a1a8d9321
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afbdbed6f103600bb101032f12d2fcf21f68d620e7d74db4f36c50456df69b7e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad7fafc1af8d86c13379f526e0cfbac632a863b9b0610d9ebb41405c99da8a8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3817534778388871,
5
  "eval_steps": 386,
6
- "global_step": 295,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2080,6 +2080,41 @@
2080
  "learning_rate": 9.898317912544536e-05,
2081
  "loss": 0.8985,
2082
  "step": 295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2083
  }
2084
  ],
2085
  "logging_steps": 1,
@@ -2099,7 +2134,7 @@
2099
  "attributes": {}
2100
  }
2101
  },
2102
- "total_flos": 3.2979146564960256e+17,
2103
  "train_batch_size": 4,
2104
  "trial_name": null,
2105
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.38822387576835976,
5
  "eval_steps": 386,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2080
  "learning_rate": 9.898317912544536e-05,
2081
  "loss": 0.8985,
2082
  "step": 295
2083
+ },
2084
+ {
2085
+ "epoch": 0.38304755742478164,
2086
+ "grad_norm": 0.8654418587684631,
2087
+ "learning_rate": 9.897489014490553e-05,
2088
+ "loss": 0.8752,
2089
+ "step": 296
2090
+ },
2091
+ {
2092
+ "epoch": 0.38434163701067614,
2093
+ "grad_norm": 0.7863161563873291,
2094
+ "learning_rate": 9.896656786597535e-05,
2095
+ "loss": 1.0138,
2096
+ "step": 297
2097
+ },
2098
+ {
2099
+ "epoch": 0.3856357165965707,
2100
+ "grad_norm": 0.8346667885780334,
2101
+ "learning_rate": 9.895821229431323e-05,
2102
+ "loss": 0.94,
2103
+ "step": 298
2104
+ },
2105
+ {
2106
+ "epoch": 0.3869297961824652,
2107
+ "grad_norm": 0.9577547907829285,
2108
+ "learning_rate": 9.894982343560016e-05,
2109
+ "loss": 0.9967,
2110
+ "step": 299
2111
+ },
2112
+ {
2113
+ "epoch": 0.38822387576835976,
2114
+ "grad_norm": 0.7633039951324463,
2115
+ "learning_rate": 9.894140129553981e-05,
2116
+ "loss": 0.8469,
2117
+ "step": 300
2118
  }
2119
  ],
2120
  "logging_steps": 1,
 
2134
  "attributes": {}
2135
  }
2136
  },
2137
+ "total_flos": 3.353811515080704e+17,
2138
  "train_batch_size": 4,
2139
  "trial_name": null,
2140
  "trial_params": null