Rakhman16 commited on
Commit
37e31a5
·
verified ·
1 Parent(s): 6b2182e

Training in progress, step 11500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d1700752c2db1a8ed2781ef8eb83ccbdefb94d318514548a2b365e11848312b
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5bdcafd7e54ccfb54edd3811725f1984a354498d7153f053c3cdf7217ec9db4
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e790c26c26a08130bd738010082748750c57d39aaed35ef4dac16e9b6a4bc9b5
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78e8caa6e190a79f2c47539fc7bdacd9073500128a21f89148735b0653b16ff7
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0520387b87f2fd0680f5469a4542550352967458dc2b683f1fb39755e8c611b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d22d79498e9a42f44dadc83e0bfe26c6297fe6f1a1339b834940e632f50f9f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0684752f700925138067f1ce27fd9b76f34d8e92bf412925f6cf2367b06e7145
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c19fddbbe59fe77d9c9931e2dfec577f342f095ed5843c735b486fb4141326d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.1035689190030098,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-11000",
4
- "epoch": 1.932027750944059,
5
  "eval_steps": 100,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2427,6 +2427,116 @@
2427
  "eval_samples_per_second": 25.458,
2428
  "eval_steps_per_second": 3.185,
2429
  "step": 11000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2430
  }
2431
  ],
2432
  "logging_steps": 50,
@@ -2446,7 +2556,7 @@
2446
  "attributes": {}
2447
  }
2448
  },
2449
- "total_flos": 5.358616301666304e+16,
2450
  "train_batch_size": 8,
2451
  "trial_name": null,
2452
  "trial_params": null
 
1
  {
2
  "best_metric": 0.1035689190030098,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-11000",
4
+ "epoch": 2.019847194168789,
5
  "eval_steps": 100,
6
+ "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2427
  "eval_samples_per_second": 25.458,
2428
  "eval_steps_per_second": 3.185,
2429
  "step": 11000
2430
+ },
2431
+ {
2432
+ "epoch": 1.9408096952665321,
2433
+ "grad_norm": 15381.65625,
2434
+ "learning_rate": 1.5442648867029686e-05,
2435
+ "loss": 0.104,
2436
+ "step": 11050
2437
+ },
2438
+ {
2439
+ "epoch": 1.9495916395890052,
2440
+ "grad_norm": 11851.7646484375,
2441
+ "learning_rate": 1.5376778499912176e-05,
2442
+ "loss": 0.1021,
2443
+ "step": 11100
2444
+ },
2445
+ {
2446
+ "epoch": 1.9495916395890052,
2447
+ "eval_loss": 0.1033787652850151,
2448
+ "eval_runtime": 175.4375,
2449
+ "eval_samples_per_second": 25.422,
2450
+ "eval_steps_per_second": 3.181,
2451
+ "step": 11100
2452
+ },
2453
+ {
2454
+ "epoch": 1.958373583911478,
2455
+ "grad_norm": 10821.587890625,
2456
+ "learning_rate": 1.531090813279466e-05,
2457
+ "loss": 0.105,
2458
+ "step": 11150
2459
+ },
2460
+ {
2461
+ "epoch": 1.967155528233951,
2462
+ "grad_norm": 10268.7001953125,
2463
+ "learning_rate": 1.5245037765677149e-05,
2464
+ "loss": 0.0995,
2465
+ "step": 11200
2466
+ },
2467
+ {
2468
+ "epoch": 1.967155528233951,
2469
+ "eval_loss": 0.1031695231795311,
2470
+ "eval_runtime": 175.3915,
2471
+ "eval_samples_per_second": 25.429,
2472
+ "eval_steps_per_second": 3.181,
2473
+ "step": 11200
2474
+ },
2475
+ {
2476
+ "epoch": 1.975937472556424,
2477
+ "grad_norm": 15241.69140625,
2478
+ "learning_rate": 1.5179167398559635e-05,
2479
+ "loss": 0.1001,
2480
+ "step": 11250
2481
+ },
2482
+ {
2483
+ "epoch": 1.9847194168788969,
2484
+ "grad_norm": 9984.0908203125,
2485
+ "learning_rate": 1.5113297031442124e-05,
2486
+ "loss": 0.1051,
2487
+ "step": 11300
2488
+ },
2489
+ {
2490
+ "epoch": 1.9847194168788969,
2491
+ "eval_loss": 0.10308495908975601,
2492
+ "eval_runtime": 175.2586,
2493
+ "eval_samples_per_second": 25.448,
2494
+ "eval_steps_per_second": 3.184,
2495
+ "step": 11300
2496
+ },
2497
+ {
2498
+ "epoch": 1.99350136120137,
2499
+ "grad_norm": 10116.9619140625,
2500
+ "learning_rate": 1.5047426664324609e-05,
2501
+ "loss": 0.102,
2502
+ "step": 11350
2503
+ },
2504
+ {
2505
+ "epoch": 2.002283305523843,
2506
+ "grad_norm": 13065.00390625,
2507
+ "learning_rate": 1.4981556297207097e-05,
2508
+ "loss": 0.1029,
2509
+ "step": 11400
2510
+ },
2511
+ {
2512
+ "epoch": 2.002283305523843,
2513
+ "eval_loss": 0.10323852300643921,
2514
+ "eval_runtime": 175.2291,
2515
+ "eval_samples_per_second": 25.452,
2516
+ "eval_steps_per_second": 3.184,
2517
+ "step": 11400
2518
+ },
2519
+ {
2520
+ "epoch": 2.011065249846316,
2521
+ "grad_norm": 10087.5390625,
2522
+ "learning_rate": 1.4915685930089584e-05,
2523
+ "loss": 0.1009,
2524
+ "step": 11450
2525
+ },
2526
+ {
2527
+ "epoch": 2.019847194168789,
2528
+ "grad_norm": 9332.802734375,
2529
+ "learning_rate": 1.4849815562972072e-05,
2530
+ "loss": 0.093,
2531
+ "step": 11500
2532
+ },
2533
+ {
2534
+ "epoch": 2.019847194168789,
2535
+ "eval_loss": 0.10356967151165009,
2536
+ "eval_runtime": 175.1976,
2537
+ "eval_samples_per_second": 25.457,
2538
+ "eval_steps_per_second": 3.185,
2539
+ "step": 11500
2540
  }
2541
  ],
2542
  "logging_steps": 50,
 
2556
  "attributes": {}
2557
  }
2558
  },
2559
+ "total_flos": 5.601986322628608e+16,
2560
  "train_batch_size": 8,
2561
  "trial_name": null,
2562
  "trial_params": null