Rakhman16 commited on
Commit
f807d48
·
verified ·
1 Parent(s): 4de09d2

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df1244c910bb4060fc5cd3cd04e33a0310bf7b71cf0fe800440d55080893d1a2
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28072047700e1de585a07077ebc92b4714eed04f2345d75eb99ced2196fe9fa7
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f77b13041110339e8140256c79f4ac6e2ee378775e323dfcd41c6660939958fd
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c3a16a887489600f444738e291b8186e27ffbeb4f43a529a4469099c5dc85f
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03be960e2fc539e2d69d5db998af57e835579dabd06a4178f51fdda3448e0efb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48d505bc797ae06b42ca84d6f450fca5d6d2f612a42f19f5b2b9faeb52b37e39
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9dca2388d7b13e399101f77bac9094a9dd1673ec11d576f1f56065692ea6a74
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5042add55b1f9bbff03a2f79c52bf9eb682e629b3af75c9a351c18dbe12735bf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.10702774673700333,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-6500",
4
- "epoch": 1.1416527619214893,
5
  "eval_steps": 100,
6
- "global_step": 6500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1437,6 +1437,116 @@
1437
  "eval_samples_per_second": 25.245,
1438
  "eval_steps_per_second": 3.158,
1439
  "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1440
  }
1441
  ],
1442
  "logging_steps": 50,
@@ -1456,7 +1566,7 @@
1456
  "attributes": {}
1457
  }
1458
  },
1459
- "total_flos": 3.166367895650304e+16,
1460
  "train_batch_size": 8,
1461
  "trial_name": null,
1462
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.10637149214744568,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-7000",
4
+ "epoch": 1.2294722051462195,
5
  "eval_steps": 100,
6
+ "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1437
  "eval_samples_per_second": 25.245,
1438
  "eval_steps_per_second": 3.158,
1439
  "step": 6500
1440
+ },
1441
+ {
1442
+ "epoch": 1.1504347062439624,
1443
+ "grad_norm": 9161.2529296875,
1444
+ "learning_rate": 2.1370981907605832e-05,
1445
+ "loss": 0.1037,
1446
+ "step": 6550
1447
+ },
1448
+ {
1449
+ "epoch": 1.1592166505664354,
1450
+ "grad_norm": 9676.08203125,
1451
+ "learning_rate": 2.130511154048832e-05,
1452
+ "loss": 0.1113,
1453
+ "step": 6600
1454
+ },
1455
+ {
1456
+ "epoch": 1.1592166505664354,
1457
+ "eval_loss": 0.10694678127765656,
1458
+ "eval_runtime": 176.4583,
1459
+ "eval_samples_per_second": 25.275,
1460
+ "eval_steps_per_second": 3.162,
1461
+ "step": 6600
1462
+ },
1463
+ {
1464
+ "epoch": 1.1679985948889084,
1465
+ "grad_norm": 10895.8876953125,
1466
+ "learning_rate": 2.1239241173370806e-05,
1467
+ "loss": 0.1029,
1468
+ "step": 6650
1469
+ },
1470
+ {
1471
+ "epoch": 1.1767805392113815,
1472
+ "grad_norm": 10269.076171875,
1473
+ "learning_rate": 2.1173370806253292e-05,
1474
+ "loss": 0.1086,
1475
+ "step": 6700
1476
+ },
1477
+ {
1478
+ "epoch": 1.1767805392113815,
1479
+ "eval_loss": 0.10676951706409454,
1480
+ "eval_runtime": 176.5328,
1481
+ "eval_samples_per_second": 25.264,
1482
+ "eval_steps_per_second": 3.161,
1483
+ "step": 6700
1484
+ },
1485
+ {
1486
+ "epoch": 1.1855624835338543,
1487
+ "grad_norm": 9631.9169921875,
1488
+ "learning_rate": 2.1107500439135782e-05,
1489
+ "loss": 0.111,
1490
+ "step": 6750
1491
+ },
1492
+ {
1493
+ "epoch": 1.1943444278563273,
1494
+ "grad_norm": 8764.7451171875,
1495
+ "learning_rate": 2.104163007201827e-05,
1496
+ "loss": 0.1066,
1497
+ "step": 6800
1498
+ },
1499
+ {
1500
+ "epoch": 1.1943444278563273,
1501
+ "eval_loss": 0.10679937154054642,
1502
+ "eval_runtime": 176.7169,
1503
+ "eval_samples_per_second": 25.238,
1504
+ "eval_steps_per_second": 3.158,
1505
+ "step": 6800
1506
+ },
1507
+ {
1508
+ "epoch": 1.2031263721788004,
1509
+ "grad_norm": 12220.5439453125,
1510
+ "learning_rate": 2.0975759704900756e-05,
1511
+ "loss": 0.1106,
1512
+ "step": 6850
1513
+ },
1514
+ {
1515
+ "epoch": 1.2119083165012734,
1516
+ "grad_norm": 13508.0205078125,
1517
+ "learning_rate": 2.0909889337783242e-05,
1518
+ "loss": 0.1115,
1519
+ "step": 6900
1520
+ },
1521
+ {
1522
+ "epoch": 1.2119083165012734,
1523
+ "eval_loss": 0.10662820190191269,
1524
+ "eval_runtime": 176.4182,
1525
+ "eval_samples_per_second": 25.281,
1526
+ "eval_steps_per_second": 3.163,
1527
+ "step": 6900
1528
+ },
1529
+ {
1530
+ "epoch": 1.2206902608237464,
1531
+ "grad_norm": 11431.49609375,
1532
+ "learning_rate": 2.0844018970665732e-05,
1533
+ "loss": 0.1042,
1534
+ "step": 6950
1535
+ },
1536
+ {
1537
+ "epoch": 1.2294722051462195,
1538
+ "grad_norm": 8914.7119140625,
1539
+ "learning_rate": 2.077814860354822e-05,
1540
+ "loss": 0.1037,
1541
+ "step": 7000
1542
+ },
1543
+ {
1544
+ "epoch": 1.2294722051462195,
1545
+ "eval_loss": 0.10637149214744568,
1546
+ "eval_runtime": 176.33,
1547
+ "eval_samples_per_second": 25.293,
1548
+ "eval_steps_per_second": 3.165,
1549
+ "step": 7000
1550
  }
1551
  ],
1552
  "logging_steps": 50,
 
1566
  "attributes": {}
1567
  }
1568
  },
1569
+ "total_flos": 3.409951051874304e+16,
1570
  "train_batch_size": 8,
1571
  "trial_name": null,
1572
  "trial_params": null