ncbateman commited on
Commit
2f84df3
·
verified ·
1 Parent(s): 36e2118

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbef157b9600fdc30cd09681e635c3596d9d9f3a832f0d58792888176093d6e7
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79333448ea194b8955d28edd0dfdc2624a75062a6e2d6fa9c8d67763978b60c9
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16278e58faf54743e51c2cb86c8c7a4cef307e814c0ff1c955065184c7b3ffbf
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97aabbe19b13f77d29eac72afd9b2fec41c341d45c159475d1bbfd48fe26b6fb
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d0fa37673dbc0bf8164a593d75abf6e8c224c2ea3193e0f86621fb2aa8c6ed6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5766dc22014545cb596a69098c894e4f30cee7366965d5aa7f46e78007cbc6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd1931e4016bf28b07346e79413c71e240f12f43909f7431de607b5c05407707
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940d423240b39d966113615a1fba0e170b7aa70deeb57e606975b0bf165e01d8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.058233581365253966,
5
  "eval_steps": 386,
6
- "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -330,6 +330,41 @@
330
  "learning_rate": 9e-05,
331
  "loss": 1.0146,
332
  "step": 45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  }
334
  ],
335
  "logging_steps": 1,
@@ -349,7 +384,7 @@
349
  "attributes": {}
350
  }
351
  },
352
- "total_flos": 5.030717272621056e+16,
353
  "train_batch_size": 4,
354
  "trial_name": null,
355
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06470397929472663,
5
  "eval_steps": 386,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
330
  "learning_rate": 9e-05,
331
  "loss": 1.0146,
332
  "step": 45
333
+ },
334
+ {
335
+ "epoch": 0.059527660951148496,
336
+ "grad_norm": 1.169231653213501,
337
+ "learning_rate": 9.200000000000001e-05,
338
+ "loss": 1.1266,
339
+ "step": 46
340
+ },
341
+ {
342
+ "epoch": 0.06082174053704303,
343
+ "grad_norm": 0.9771779179573059,
344
+ "learning_rate": 9.4e-05,
345
+ "loss": 0.8351,
346
+ "step": 47
347
+ },
348
+ {
349
+ "epoch": 0.06211582012293756,
350
+ "grad_norm": 1.2849314212799072,
351
+ "learning_rate": 9.6e-05,
352
+ "loss": 1.1822,
353
+ "step": 48
354
+ },
355
+ {
356
+ "epoch": 0.0634098997088321,
357
+ "grad_norm": 1.023181676864624,
358
+ "learning_rate": 9.8e-05,
359
+ "loss": 0.9082,
360
+ "step": 49
361
+ },
362
+ {
363
+ "epoch": 0.06470397929472663,
364
+ "grad_norm": 1.135751724243164,
365
+ "learning_rate": 0.0001,
366
+ "loss": 0.9407,
367
+ "step": 50
368
  }
369
  ],
370
  "logging_steps": 1,
 
384
  "attributes": {}
385
  }
386
  },
387
+ "total_flos": 5.58968585846784e+16,
388
  "train_batch_size": 4,
389
  "trial_name": null,
390
  "trial_params": null