Rakhman16 commited on
Commit
e72fa10
·
verified ·
1 Parent(s): f090676

Training in progress, step 8000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d44daa9f54039c6a7c112e61fc03e79ec195dfbc876d5e9ced3f4491bafe05b5
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:960ccc4c98033660ea4872dea982bb6f605830e2504ff79a8db1bf16ebe24d44
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5c2bc227bc3f6d19424b7e3fb6e1f890007183e4569ce0175d186b2f545cd1d
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef06fbfc0a013966537fe78757ac6ba4b087e8004e2b33a3accdd74d1b793632
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bd1a1233fd369cd2fe83300436739906b8c0f2d7a7cea881535c02b97b11121
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7798b130c38fd371f598d9d4ee07d9b5d45bacb7dac3afcdc6fdc2d93c9e53c0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feab8bd3d670dd033dd040441a4a555712fd3da457861c798e89356180e76612
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8838af9443e4cc6d8bbc7ede7a551a909b84e3a919f05d6b0080ae477122847b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.1062735840678215,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-7500",
4
- "epoch": 1.3172916483709494,
5
  "eval_steps": 100,
6
- "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1657,6 +1657,116 @@
1657
  "eval_samples_per_second": 25.29,
1658
  "eval_steps_per_second": 3.164,
1659
  "step": 7500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1660
  }
1661
  ],
1662
  "logging_steps": 50,
@@ -1676,7 +1786,7 @@
1676
  "attributes": {}
1677
  }
1678
  },
1679
- "total_flos": 3.653534208098304e+16,
1680
  "train_batch_size": 8,
1681
  "trial_name": null,
1682
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.10532288253307343,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-8000",
4
+ "epoch": 1.4051110915956793,
5
  "eval_steps": 100,
6
+ "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1657
  "eval_samples_per_second": 25.29,
1658
  "eval_steps_per_second": 3.164,
1659
  "step": 7500
1660
+ },
1661
+ {
1662
+ "epoch": 1.3260735926934224,
1663
+ "grad_norm": 9924.986328125,
1664
+ "learning_rate": 2.005357456525558e-05,
1665
+ "loss": 0.1018,
1666
+ "step": 7550
1667
+ },
1668
+ {
1669
+ "epoch": 1.3348555370158954,
1670
+ "grad_norm": 14466.806640625,
1671
+ "learning_rate": 1.9987704198138065e-05,
1672
+ "loss": 0.1065,
1673
+ "step": 7600
1674
+ },
1675
+ {
1676
+ "epoch": 1.3348555370158954,
1677
+ "eval_loss": 0.105972521007061,
1678
+ "eval_runtime": 176.3022,
1679
+ "eval_samples_per_second": 25.297,
1680
+ "eval_steps_per_second": 3.165,
1681
+ "step": 7600
1682
+ },
1683
+ {
1684
+ "epoch": 1.3436374813383682,
1685
+ "grad_norm": 13860.5234375,
1686
+ "learning_rate": 1.992183383102055e-05,
1687
+ "loss": 0.1096,
1688
+ "step": 7650
1689
+ },
1690
+ {
1691
+ "epoch": 1.3524194256608413,
1692
+ "grad_norm": 9354.7333984375,
1693
+ "learning_rate": 1.985596346390304e-05,
1694
+ "loss": 0.1014,
1695
+ "step": 7700
1696
+ },
1697
+ {
1698
+ "epoch": 1.3524194256608413,
1699
+ "eval_loss": 0.10594488680362701,
1700
+ "eval_runtime": 176.4418,
1701
+ "eval_samples_per_second": 25.277,
1702
+ "eval_steps_per_second": 3.163,
1703
+ "step": 7700
1704
+ },
1705
+ {
1706
+ "epoch": 1.3612013699833143,
1707
+ "grad_norm": 9179.8173828125,
1708
+ "learning_rate": 1.9790093096785525e-05,
1709
+ "loss": 0.0998,
1710
+ "step": 7750
1711
+ },
1712
+ {
1713
+ "epoch": 1.3699833143057873,
1714
+ "grad_norm": 6730.38134765625,
1715
+ "learning_rate": 1.972422272966801e-05,
1716
+ "loss": 0.1029,
1717
+ "step": 7800
1718
+ },
1719
+ {
1720
+ "epoch": 1.3699833143057873,
1721
+ "eval_loss": 0.10599970072507858,
1722
+ "eval_runtime": 176.3495,
1723
+ "eval_samples_per_second": 25.291,
1724
+ "eval_steps_per_second": 3.164,
1725
+ "step": 7800
1726
+ },
1727
+ {
1728
+ "epoch": 1.3787652586282602,
1729
+ "grad_norm": 9362.3427734375,
1730
+ "learning_rate": 1.96583523625505e-05,
1731
+ "loss": 0.1092,
1732
+ "step": 7850
1733
+ },
1734
+ {
1735
+ "epoch": 1.3875472029507332,
1736
+ "grad_norm": 10184.53125,
1737
+ "learning_rate": 1.9592481995432988e-05,
1738
+ "loss": 0.1058,
1739
+ "step": 7900
1740
+ },
1741
+ {
1742
+ "epoch": 1.3875472029507332,
1743
+ "eval_loss": 0.1059907078742981,
1744
+ "eval_runtime": 176.2866,
1745
+ "eval_samples_per_second": 25.3,
1746
+ "eval_steps_per_second": 3.165,
1747
+ "step": 7900
1748
+ },
1749
+ {
1750
+ "epoch": 1.3963291472732062,
1751
+ "grad_norm": 11201.806640625,
1752
+ "learning_rate": 1.9526611628315475e-05,
1753
+ "loss": 0.1143,
1754
+ "step": 7950
1755
+ },
1756
+ {
1757
+ "epoch": 1.4051110915956793,
1758
+ "grad_norm": 6871.8662109375,
1759
+ "learning_rate": 1.946074126119796e-05,
1760
+ "loss": 0.1009,
1761
+ "step": 8000
1762
+ },
1763
+ {
1764
+ "epoch": 1.4051110915956793,
1765
+ "eval_loss": 0.10532288253307343,
1766
+ "eval_runtime": 176.6701,
1767
+ "eval_samples_per_second": 25.245,
1768
+ "eval_steps_per_second": 3.158,
1769
+ "step": 8000
1770
  }
1771
  ],
1772
  "logging_steps": 50,
 
1786
  "attributes": {}
1787
  }
1788
  },
1789
+ "total_flos": 3.897117364322304e+16,
1790
  "train_batch_size": 8,
1791
  "trial_name": null,
1792
  "trial_params": null