Training in progress, step 12500, checkpoint
Browse files- last-checkpoint/global_step12500/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step12500/zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12500/zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12500/zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12500/zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12500/zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12500/zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +73 -3
last-checkpoint/global_step12500/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d471f4d4e89a53746b0e4a77b195db83faf739af77bf5baec751b0fb175acc6
|
3 |
+
size 197282509
|
last-checkpoint/global_step12500/zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7eaf9773d725fdb93f06baf3dc7e73f199a813f44e0b1d39af31e10f200d549
|
3 |
+
size 180416968
|
last-checkpoint/global_step12500/zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12a3531002d674e9e9bc13b3d11ccd44c3f04eff8843f7d15670890e4bdb391d
|
3 |
+
size 180416776
|
last-checkpoint/global_step12500/zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4650ebb550dd159fc85dc4057ed8f347f71bb91103d7547fdf9c57742299984
|
3 |
+
size 180416776
|
last-checkpoint/global_step12500/zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84e2a6205ea3f09a2469f8ed6b8b95fabc4c0313b35371e3dfb4becc615f33c9
|
3 |
+
size 180416904
|
last-checkpoint/global_step12500/zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fcc6ed5077225dfbc3ba935b9353af036bf0b9a9cb642b0257712b9167d6c2c
|
3 |
+
size 180416712
|
last-checkpoint/global_step12500/zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e96d7a6ffec4f1d7591687afcfbdb6ec6d72fd25a7696dc1c62d712439a5af62
|
3 |
+
size 180417096
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step12500
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 188836816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24b88f70912df99acfaa93c847d111588238f2f3c89f9126b68e97bd70f7fbb5
|
3 |
size 188836816
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:271c89ce6ee409be2ea9aa3bf701ff66e7a7cb9264b73eefc11e757372a91e5e
|
3 |
+
size 15472
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41eea4946eeaa383e314ebd734ddda2972689f784d350627374fb05cf2b160ab
|
3 |
size 15536
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eccd0e35b1a3fb6b4961836705b55374915efc0fe4aa8bbcd2fcc5652edb5192
|
3 |
+
size 15472
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1155b24d828f1b4cc9581115f945c46af6cafa8b0dcd3abce9639d3f64bf6fa7
|
3 |
+
size 15472
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d24951e84bf84c9244c0763d9d797270b2b1f7662cfade2d3d2824db197031c1
|
3 |
+
size 15472
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:baa66995193ecfcc4672c56a227acadd5b4de025d048272586e219600d9fb650
|
3 |
+
size 15472
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f500d7068793f6d489446a9d3939e8a5fbc68fd372f8eeb2605b20dd2e2ab5a
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1750,6 +1750,76 @@
|
|
1750 |
"eval_steps_per_second": 0.288,
|
1751 |
"eval_wer": 0.19910846953937592,
|
1752 |
"step": 12000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1753 |
}
|
1754 |
],
|
1755 |
"logging_steps": 50,
|
@@ -1769,7 +1839,7 @@
|
|
1769 |
"attributes": {}
|
1770 |
}
|
1771 |
},
|
1772 |
-
"total_flos": 3.
|
1773 |
"train_batch_size": 64,
|
1774 |
"trial_name": null,
|
1775 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 337.8378378378378,
|
5 |
"eval_steps": 1500,
|
6 |
+
"global_step": 12500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1750 |
"eval_steps_per_second": 0.288,
|
1751 |
"eval_wer": 0.19910846953937592,
|
1752 |
"step": 12000
|
1753 |
+
},
|
1754 |
+
{
|
1755 |
+
"epoch": 325.6756756756757,
|
1756 |
+
"grad_norm": 0.28629496693611145,
|
1757 |
+
"learning_rate": 0.0001,
|
1758 |
+
"loss": 0.0159,
|
1759 |
+
"step": 12050
|
1760 |
+
},
|
1761 |
+
{
|
1762 |
+
"epoch": 327.02702702702703,
|
1763 |
+
"grad_norm": 0.24716606736183167,
|
1764 |
+
"learning_rate": 0.0001,
|
1765 |
+
"loss": 0.0152,
|
1766 |
+
"step": 12100
|
1767 |
+
},
|
1768 |
+
{
|
1769 |
+
"epoch": 328.3783783783784,
|
1770 |
+
"grad_norm": 0.2562699019908905,
|
1771 |
+
"learning_rate": 0.0001,
|
1772 |
+
"loss": 0.0153,
|
1773 |
+
"step": 12150
|
1774 |
+
},
|
1775 |
+
{
|
1776 |
+
"epoch": 329.72972972972974,
|
1777 |
+
"grad_norm": 0.27679792046546936,
|
1778 |
+
"learning_rate": 0.0001,
|
1779 |
+
"loss": 0.0147,
|
1780 |
+
"step": 12200
|
1781 |
+
},
|
1782 |
+
{
|
1783 |
+
"epoch": 331.0810810810811,
|
1784 |
+
"grad_norm": 0.2266552597284317,
|
1785 |
+
"learning_rate": 0.0001,
|
1786 |
+
"loss": 0.0149,
|
1787 |
+
"step": 12250
|
1788 |
+
},
|
1789 |
+
{
|
1790 |
+
"epoch": 332.43243243243245,
|
1791 |
+
"grad_norm": 0.2453828752040863,
|
1792 |
+
"learning_rate": 0.0001,
|
1793 |
+
"loss": 0.0151,
|
1794 |
+
"step": 12300
|
1795 |
+
},
|
1796 |
+
{
|
1797 |
+
"epoch": 333.7837837837838,
|
1798 |
+
"grad_norm": 0.3448384702205658,
|
1799 |
+
"learning_rate": 0.0001,
|
1800 |
+
"loss": 0.014,
|
1801 |
+
"step": 12350
|
1802 |
+
},
|
1803 |
+
{
|
1804 |
+
"epoch": 335.13513513513516,
|
1805 |
+
"grad_norm": 0.20089378952980042,
|
1806 |
+
"learning_rate": 0.0001,
|
1807 |
+
"loss": 0.0148,
|
1808 |
+
"step": 12400
|
1809 |
+
},
|
1810 |
+
{
|
1811 |
+
"epoch": 336.4864864864865,
|
1812 |
+
"grad_norm": 0.2895062267780304,
|
1813 |
+
"learning_rate": 0.0001,
|
1814 |
+
"loss": 0.016,
|
1815 |
+
"step": 12450
|
1816 |
+
},
|
1817 |
+
{
|
1818 |
+
"epoch": 337.8378378378378,
|
1819 |
+
"grad_norm": 0.3491511642932892,
|
1820 |
+
"learning_rate": 0.0001,
|
1821 |
+
"loss": 0.0153,
|
1822 |
+
"step": 12500
|
1823 |
}
|
1824 |
],
|
1825 |
"logging_steps": 50,
|
|
|
1839 |
"attributes": {}
|
1840 |
}
|
1841 |
},
|
1842 |
+
"total_flos": 3.5089183189071246e+20,
|
1843 |
"train_batch_size": 64,
|
1844 |
"trial_name": null,
|
1845 |
"trial_params": null
|