Rakhman16 commited on
Commit
401a5d9
·
verified ·
1 Parent(s): af3ef3e

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28072047700e1de585a07077ebc92b4714eed04f2345d75eb99ced2196fe9fa7
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44daa9f54039c6a7c112e61fc03e79ec195dfbc876d5e9ced3f4491bafe05b5
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2c3a16a887489600f444738e291b8186e27ffbeb4f43a529a4469099c5dc85f
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c2bc227bc3f6d19424b7e3fb6e1f890007183e4569ce0175d186b2f545cd1d
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48d505bc797ae06b42ca84d6f450fca5d6d2f612a42f19f5b2b9faeb52b37e39
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd1a1233fd369cd2fe83300436739906b8c0f2d7a7cea881535c02b97b11121
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5042add55b1f9bbff03a2f79c52bf9eb682e629b3af75c9a351c18dbe12735bf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feab8bd3d670dd033dd040441a4a555712fd3da457861c798e89356180e76612
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.10637149214744568,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-7000",
4
- "epoch": 1.2294722051462195,
5
  "eval_steps": 100,
6
- "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1547,6 +1547,116 @@
1547
  "eval_samples_per_second": 25.293,
1548
  "eval_steps_per_second": 3.165,
1549
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1550
  }
1551
  ],
1552
  "logging_steps": 50,
@@ -1566,7 +1676,7 @@
1566
  "attributes": {}
1567
  }
1568
  },
1569
- "total_flos": 3.409951051874304e+16,
1570
  "train_batch_size": 8,
1571
  "trial_name": null,
1572
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.1062735840678215,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-7500",
4
+ "epoch": 1.3172916483709494,
5
  "eval_steps": 100,
6
+ "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1547
  "eval_samples_per_second": 25.293,
1548
  "eval_steps_per_second": 3.165,
1549
  "step": 7000
1550
+ },
1551
+ {
1552
+ "epoch": 1.2382541494686923,
1553
+ "grad_norm": 32394.365234375,
1554
+ "learning_rate": 2.0712278236430705e-05,
1555
+ "loss": 0.1082,
1556
+ "step": 7050
1557
+ },
1558
+ {
1559
+ "epoch": 1.2470360937911653,
1560
+ "grad_norm": 13041.302734375,
1561
+ "learning_rate": 2.0646407869313192e-05,
1562
+ "loss": 0.1118,
1563
+ "step": 7100
1564
+ },
1565
+ {
1566
+ "epoch": 1.2470360937911653,
1567
+ "eval_loss": 0.10657413303852081,
1568
+ "eval_runtime": 176.3827,
1569
+ "eval_samples_per_second": 25.286,
1570
+ "eval_steps_per_second": 3.164,
1571
+ "step": 7100
1572
+ },
1573
+ {
1574
+ "epoch": 1.2558180381136383,
1575
+ "grad_norm": 9132.3310546875,
1576
+ "learning_rate": 2.0580537502195682e-05,
1577
+ "loss": 0.103,
1578
+ "step": 7150
1579
+ },
1580
+ {
1581
+ "epoch": 1.2645999824361114,
1582
+ "grad_norm": 18166.306640625,
1583
+ "learning_rate": 2.0514667135078165e-05,
1584
+ "loss": 0.1108,
1585
+ "step": 7200
1586
+ },
1587
+ {
1588
+ "epoch": 1.2645999824361114,
1589
+ "eval_loss": 0.10623560100793839,
1590
+ "eval_runtime": 176.2458,
1591
+ "eval_samples_per_second": 25.306,
1592
+ "eval_steps_per_second": 3.166,
1593
+ "step": 7200
1594
+ },
1595
+ {
1596
+ "epoch": 1.2733819267585844,
1597
+ "grad_norm": 12996.111328125,
1598
+ "learning_rate": 2.0448796767960652e-05,
1599
+ "loss": 0.1019,
1600
+ "step": 7250
1601
+ },
1602
+ {
1603
+ "epoch": 1.2821638710810572,
1604
+ "grad_norm": 9010.8212890625,
1605
+ "learning_rate": 2.0382926400843142e-05,
1606
+ "loss": 0.1074,
1607
+ "step": 7300
1608
+ },
1609
+ {
1610
+ "epoch": 1.2821638710810572,
1611
+ "eval_loss": 0.10622620582580566,
1612
+ "eval_runtime": 176.3743,
1613
+ "eval_samples_per_second": 25.287,
1614
+ "eval_steps_per_second": 3.164,
1615
+ "step": 7300
1616
+ },
1617
+ {
1618
+ "epoch": 1.2909458154035303,
1619
+ "grad_norm": 14462.72265625,
1620
+ "learning_rate": 2.031705603372563e-05,
1621
+ "loss": 0.1104,
1622
+ "step": 7350
1623
+ },
1624
+ {
1625
+ "epoch": 1.2997277597260033,
1626
+ "grad_norm": 9121.498046875,
1627
+ "learning_rate": 2.0251185666608115e-05,
1628
+ "loss": 0.1141,
1629
+ "step": 7400
1630
+ },
1631
+ {
1632
+ "epoch": 1.2997277597260033,
1633
+ "eval_loss": 0.10618162155151367,
1634
+ "eval_runtime": 176.2451,
1635
+ "eval_samples_per_second": 25.306,
1636
+ "eval_steps_per_second": 3.166,
1637
+ "step": 7400
1638
+ },
1639
+ {
1640
+ "epoch": 1.3085097040484763,
1641
+ "grad_norm": 13231.1484375,
1642
+ "learning_rate": 2.0185315299490602e-05,
1643
+ "loss": 0.1106,
1644
+ "step": 7450
1645
+ },
1646
+ {
1647
+ "epoch": 1.3172916483709494,
1648
+ "grad_norm": 8816.541015625,
1649
+ "learning_rate": 2.0119444932373092e-05,
1650
+ "loss": 0.1065,
1651
+ "step": 7500
1652
+ },
1653
+ {
1654
+ "epoch": 1.3172916483709494,
1655
+ "eval_loss": 0.1062735840678215,
1656
+ "eval_runtime": 176.3513,
1657
+ "eval_samples_per_second": 25.29,
1658
+ "eval_steps_per_second": 3.164,
1659
+ "step": 7500
1660
  }
1661
  ],
1662
  "logging_steps": 50,
 
1676
  "attributes": {}
1677
  }
1678
  },
1679
+ "total_flos": 3.653534208098304e+16,
1680
  "train_batch_size": 8,
1681
  "trial_name": null,
1682
  "trial_params": null