TachyHealthResearch commited on
Commit
29032c4
1 Parent(s): e2188a8

Training in progress, step 85, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e539c7d29f28e7bf3189f97535848aefceb6fab6557519f106d8367fed09c89d
3
  size 75012288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d93f3d709b62a0801581eb275f1cf2b02019d04bcb1a08b2885550214f5215c5
3
  size 75012288
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e9be4e2e8066cfdf9173a341feef6dfa163131c2366c0ba34259b7eee259d0d
3
  size 38034724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84381402825991379f5ff82e4a69d8a8b4f9fd4188f826e7d8e70bc4266421be
3
  size 38034724
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bf616fc8f0f1afbaf078532cbb81e411d5e1a4c86d97ff1aad2f218d94ba7f6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a03205bd2befa025d120b19664f9407743b66ea492f2d94e9cbbdfbf2f7a9336
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f42057680a5702f6f5a4011b02db74a01965e38f5a26c223b3885c3184ec6f0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7277563b68f0a25594bd8f04a2297ddade7a93c1062f094bae84be2463e566
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.026,
5
  "eval_steps": 1000,
6
- "global_step": 65,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -397,13 +397,133 @@
397
  "learning_rate": 0.00011564625850340137,
398
  "loss": 1.7418,
399
  "step": 65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  }
401
  ],
402
  "logging_steps": 1,
403
  "max_steps": 150,
404
  "num_train_epochs": 1,
405
  "save_steps": 5,
406
- "total_flos": 3125581151207424.0,
407
  "trial_name": null,
408
  "trial_params": null
409
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.034,
5
  "eval_steps": 1000,
6
+ "global_step": 85,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
397
  "learning_rate": 0.00011564625850340137,
398
  "loss": 1.7418,
399
  "step": 65
400
+ },
401
+ {
402
+ "epoch": 0.03,
403
+ "learning_rate": 0.00011428571428571428,
404
+ "loss": 2.2,
405
+ "step": 66
406
+ },
407
+ {
408
+ "epoch": 0.03,
409
+ "learning_rate": 0.00011292517006802721,
410
+ "loss": 1.8804,
411
+ "step": 67
412
+ },
413
+ {
414
+ "epoch": 0.03,
415
+ "learning_rate": 0.00011156462585034013,
416
+ "loss": 1.8546,
417
+ "step": 68
418
+ },
419
+ {
420
+ "epoch": 0.03,
421
+ "learning_rate": 0.00011020408163265306,
422
+ "loss": 1.9946,
423
+ "step": 69
424
+ },
425
+ {
426
+ "epoch": 0.03,
427
+ "learning_rate": 0.000108843537414966,
428
+ "loss": 1.9766,
429
+ "step": 70
430
+ },
431
+ {
432
+ "epoch": 0.03,
433
+ "learning_rate": 0.00010748299319727892,
434
+ "loss": 2.5233,
435
+ "step": 71
436
+ },
437
+ {
438
+ "epoch": 0.03,
439
+ "learning_rate": 0.00010612244897959185,
440
+ "loss": 2.1251,
441
+ "step": 72
442
+ },
443
+ {
444
+ "epoch": 0.03,
445
+ "learning_rate": 0.00010476190476190477,
446
+ "loss": 1.6734,
447
+ "step": 73
448
+ },
449
+ {
450
+ "epoch": 0.03,
451
+ "learning_rate": 0.0001034013605442177,
452
+ "loss": 1.7247,
453
+ "step": 74
454
+ },
455
+ {
456
+ "epoch": 0.03,
457
+ "learning_rate": 0.00010204081632653062,
458
+ "loss": 1.7936,
459
+ "step": 75
460
+ },
461
+ {
462
+ "epoch": 0.03,
463
+ "learning_rate": 0.00010068027210884355,
464
+ "loss": 2.1881,
465
+ "step": 76
466
+ },
467
+ {
468
+ "epoch": 0.03,
469
+ "learning_rate": 9.931972789115646e-05,
470
+ "loss": 2.1806,
471
+ "step": 77
472
+ },
473
+ {
474
+ "epoch": 0.03,
475
+ "learning_rate": 9.79591836734694e-05,
476
+ "loss": 1.7475,
477
+ "step": 78
478
+ },
479
+ {
480
+ "epoch": 0.03,
481
+ "learning_rate": 9.659863945578231e-05,
482
+ "loss": 1.8856,
483
+ "step": 79
484
+ },
485
+ {
486
+ "epoch": 0.03,
487
+ "learning_rate": 9.523809523809524e-05,
488
+ "loss": 1.9571,
489
+ "step": 80
490
+ },
491
+ {
492
+ "epoch": 0.03,
493
+ "learning_rate": 9.387755102040817e-05,
494
+ "loss": 1.773,
495
+ "step": 81
496
+ },
497
+ {
498
+ "epoch": 0.03,
499
+ "learning_rate": 9.25170068027211e-05,
500
+ "loss": 2.0793,
501
+ "step": 82
502
+ },
503
+ {
504
+ "epoch": 0.03,
505
+ "learning_rate": 9.115646258503402e-05,
506
+ "loss": 1.6465,
507
+ "step": 83
508
+ },
509
+ {
510
+ "epoch": 0.03,
511
+ "learning_rate": 8.979591836734695e-05,
512
+ "loss": 2.3302,
513
+ "step": 84
514
+ },
515
+ {
516
+ "epoch": 0.03,
517
+ "learning_rate": 8.843537414965987e-05,
518
+ "loss": 1.9541,
519
+ "step": 85
520
  }
521
  ],
522
  "logging_steps": 1,
523
  "max_steps": 150,
524
  "num_train_epochs": 1,
525
  "save_steps": 5,
526
+ "total_flos": 4131013396463616.0,
527
  "trial_name": null,
528
  "trial_params": null
529
  }