ncbateman commited on
Commit
2690518
1 Parent(s): 5ae408e

Training in progress, step 360, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f630f599375b6f9e8b3d899cdec9f7ee694632d31c165a103037286f91bdb40d
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa9893a84cb0e84de34ca04383d58aed137598edfa5d6b72d961fad0f27735d
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f881a0164766ec0cdc916551a149a6e90b7c7bff3e56a1b2314770aebd4e485f
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b961e1395983b9869fa73b1df3863725b49dac36d70d0a4af83933dde9e72f8
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e01ddaf507ffae0b11987877fc582c16ae0f24690bb23d34f7cf537c451f099
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea983627119c27e833d8278aa2759c5160e60ebbfa2503f097a57924edc56a60
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3758bcabbf6a982afc57e0ef5b43cd5da58e8541454f29024c32acf06fb153c5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3808f18a91c29579db27addbed742cebf6b7639d12834c99b15b5738ffd35626
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.45292785506308636,
5
  "eval_steps": 386,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2465,6 +2465,76 @@
2465
  "learning_rate": 9.847799323991234e-05,
2466
  "loss": 1.077,
2467
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2468
  }
2469
  ],
2470
  "logging_steps": 1,
@@ -2484,7 +2554,7 @@
2484
  "attributes": {}
2485
  }
2486
  },
2487
- "total_flos": 3.912780100927488e+17,
2488
  "train_batch_size": 4,
2489
  "trial_name": null,
2490
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4658686509220317,
5
  "eval_steps": 386,
6
+ "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2465
  "learning_rate": 9.847799323991234e-05,
2466
  "loss": 1.077,
2467
  "step": 350
2468
+ },
2469
+ {
2470
+ "epoch": 0.4542219346489809,
2471
+ "grad_norm": 0.9179444909095764,
2472
+ "learning_rate": 9.8467881843081e-05,
2473
+ "loss": 0.8353,
2474
+ "step": 351
2475
+ },
2476
+ {
2477
+ "epoch": 0.4555160142348754,
2478
+ "grad_norm": 0.855802595615387,
2479
+ "learning_rate": 9.845773749257804e-05,
2480
+ "loss": 0.9764,
2481
+ "step": 352
2482
+ },
2483
+ {
2484
+ "epoch": 0.45681009382077,
2485
+ "grad_norm": 0.9086332321166992,
2486
+ "learning_rate": 9.844756019530066e-05,
2487
+ "loss": 1.0526,
2488
+ "step": 353
2489
+ },
2490
+ {
2491
+ "epoch": 0.4581041734066645,
2492
+ "grad_norm": 0.890271782875061,
2493
+ "learning_rate": 9.843734995816848e-05,
2494
+ "loss": 0.8905,
2495
+ "step": 354
2496
+ },
2497
+ {
2498
+ "epoch": 0.45939825299255904,
2499
+ "grad_norm": 0.7878096699714661,
2500
+ "learning_rate": 9.842710678812351e-05,
2501
+ "loss": 0.8706,
2502
+ "step": 355
2503
+ },
2504
+ {
2505
+ "epoch": 0.4606923325784536,
2506
+ "grad_norm": 0.9886014461517334,
2507
+ "learning_rate": 9.841683069213017e-05,
2508
+ "loss": 0.9579,
2509
+ "step": 356
2510
+ },
2511
+ {
2512
+ "epoch": 0.4619864121643481,
2513
+ "grad_norm": 0.8265432119369507,
2514
+ "learning_rate": 9.840652167717526e-05,
2515
+ "loss": 0.8528,
2516
+ "step": 357
2517
+ },
2518
+ {
2519
+ "epoch": 0.46328049175024266,
2520
+ "grad_norm": 0.7354372143745422,
2521
+ "learning_rate": 9.839617975026793e-05,
2522
+ "loss": 0.775,
2523
+ "step": 358
2524
+ },
2525
+ {
2526
+ "epoch": 0.46457457133613717,
2527
+ "grad_norm": 0.8311409950256348,
2528
+ "learning_rate": 9.838580491843976e-05,
2529
+ "loss": 1.0374,
2530
+ "step": 359
2531
+ },
2532
+ {
2533
+ "epoch": 0.4658686509220317,
2534
+ "grad_norm": 0.8180521130561829,
2535
+ "learning_rate": 9.837539718874464e-05,
2536
+ "loss": 0.818,
2537
+ "step": 360
2538
  }
2539
  ],
2540
  "logging_steps": 1,
 
2554
  "attributes": {}
2555
  }
2556
  },
2557
+ "total_flos": 4.024573818096845e+17,
2558
  "train_batch_size": 4,
2559
  "trial_name": null,
2560
  "trial_params": null