diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..1577f2553815fb492b0bdc9cce7d570a9cb2c146 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,24 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-250 filter=lfs diff=lfs merge=lfs -text +checkpoint-750 filter=lfs diff=lfs merge=lfs -text +generation_config.json filter=lfs diff=lfs merge=lfs -text +latest filter=lfs diff=lfs merge=lfs -text +pytorch_model-00002-of-00002.bin filter=lfs diff=lfs merge=lfs -text +rng_state_0.pth filter=lfs diff=lfs merge=lfs -text +zero_to_fp32.py filter=lfs diff=lfs merge=lfs -text +checkpoint-500 filter=lfs diff=lfs merge=lfs -text +special_tokens_map.json filter=lfs diff=lfs merge=lfs -text +tokenizer_config.json filter=lfs diff=lfs merge=lfs -text +added_tokens.json filter=lfs diff=lfs merge=lfs -text +config.json filter=lfs diff=lfs merge=lfs -text +global_step500 filter=lfs diff=lfs merge=lfs -text +rng_state_1.pth filter=lfs diff=lfs merge=lfs -text +rng_state_2.pth filter=lfs diff=lfs merge=lfs -text +tokenizer.model filter=lfs diff=lfs merge=lfs -text +pytorch_model-00001-of-00002.bin filter=lfs diff=lfs merge=lfs -text +pytorch_model.bin.index.json filter=lfs diff=lfs merge=lfs -text +rng_state_3.pth filter=lfs diff=lfs merge=lfs -text +trainer_state.json filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..2541c7c187ec8fb99956b3604962fb777ad832e4 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1137d58ae316b03a36d7162631e491319c75a223ee366beddc447e77ccb42832 +size 90 diff --git a/checkpoint-250/config.json b/checkpoint-250/config.json new file mode 100644 index 0000000000000000000000000000000000000000..329529ab9dbd7e63db5c00a5f66ae00df3869be1 --- /dev/null +++ b/checkpoint-250/config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38040ecb6dd98fcbe9a29765b4c1db8e79c6677a63acc41919af2d7337454654 +size 620 diff --git a/checkpoint-250/generation_config.json b/checkpoint-250/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d48fc0407d4ffb6f8dc6da1c77bbbd0206fac12 --- /dev/null +++ b/checkpoint-250/generation_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afcf2a1da583dbeb66bb367a759713901e23ce19ed83ed8f5a21b0e877addd5b +size 116 diff --git a/checkpoint-250/global_step250/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-250/global_step250/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..acfda3392b24d5c7f93ad518cefc96d4112b6a7f --- /dev/null +++ b/checkpoint-250/global_step250/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a26c1e6ac191c92f45e6dbd0b292942d37c1bd7bff9ca2ed3841d131452760 +size 21725254007 diff --git a/checkpoint-250/global_step250/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/checkpoint-250/global_step250/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..df57ba66f8322b530f43be6c4dd2a2c1935db39e --- /dev/null +++ b/checkpoint-250/global_step250/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65bfe89cbcb1ea530a018536901c8ba9f9e6b347bf24fc63eba685e0588f47de +size 21725254583 diff --git a/checkpoint-250/global_step250/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-250/global_step250/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22d36baf22f00ebf286d075a15529bb317e12718 --- /dev/null +++ b/checkpoint-250/global_step250/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76dda73ab480f23c26df27d9ed2c5cb3489b81a49ddec4bfc73e14d3acc2afd7 +size 21725254647 diff --git a/checkpoint-250/global_step250/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-250/global_step250/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d206f04094deb7f0c2b90db88405f5b7150b09a9 --- /dev/null +++ b/checkpoint-250/global_step250/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f54ef909fe921bd1e2cbba6ef7694f322308450f6c9209e33348adad518fb5 +size 21725254007 diff --git a/checkpoint-250/global_step250/mp_rank_00_model_states.pt b/checkpoint-250/global_step250/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a040f4c99d0811c9c3812cca155c534c5151ae4 --- /dev/null +++ b/checkpoint-250/global_step250/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bef3c4a04e356bf2842db3aa0d1133a3546c9ff3b3eb05d64852e0b19f03ab6 +size 14483584899 diff --git a/checkpoint-250/latest b/checkpoint-250/latest new file mode 100644 index 0000000000000000000000000000000000000000..9213e6cb763d9e20e87a593bd3ae6ac32e970f63 --- /dev/null +++ b/checkpoint-250/latest @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074d211701b75761ea28e8611c3d9a4db5b233e788456ff08f2111426855cd5e +size 14 diff --git a/checkpoint-250/pytorch_model-00001-of-00002.bin b/checkpoint-250/pytorch_model-00001-of-00002.bin new file mode 100644 index 0000000000000000000000000000000000000000..16b8f70b392b7888a7c8cf69ae98f18c602e16e5 --- /dev/null +++ b/checkpoint-250/pytorch_model-00001-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a13b9de24d37a71843b49556c3202c9d8e1a1a368187a88c1bb980c0cee50c6 +size 9943044428 diff --git a/checkpoint-250/pytorch_model-00002-of-00002.bin b/checkpoint-250/pytorch_model-00002-of-00002.bin new file mode 100644 index 0000000000000000000000000000000000000000..69a7542fd35269d72742f25963db9c8382d456fa --- /dev/null +++ b/checkpoint-250/pytorch_model-00002-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ffcff369d4c1b2b91d1286dfea5bbd27607098bbf21d851b6ba8e45dcf2152 +size 4540552031 diff --git a/checkpoint-250/pytorch_model.bin.index.json b/checkpoint-250/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..4fb1e141e8ad246863a9b6c67238236806c33381 --- /dev/null +++ b/checkpoint-250/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e0f44b64f8e8d5241ee3c393f6339e05f2e844134c75c524d87dc89a7257e9 +size 23950 diff --git a/checkpoint-250/rng_state_0.pth b/checkpoint-250/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..24aba7bcc2a9fb783bd13a6a67b96e5ad055d89d --- /dev/null +++ b/checkpoint-250/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eafe3d5e0585dde8c5033613de99a5d4f23df4284a488f4007b3944580c0b97 +size 17655 diff --git a/checkpoint-250/rng_state_1.pth b/checkpoint-250/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..6b2ef88173fde17f2b3e738a28446f89a0528a96 --- /dev/null +++ b/checkpoint-250/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e34eb456d2d003a2839f2daa9425e99bdd79ed7e24a1de9fc7d5738476bfb4b +size 17655 diff --git a/checkpoint-250/rng_state_2.pth b/checkpoint-250/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b118d52a3006aea6c44f23f94c5568d1fb0a2f3 --- /dev/null +++ b/checkpoint-250/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b374af4a2765d8771cee7a72921d3c2e438b9bee34f0b2d098ce6071afeb65e4 +size 17655 diff --git a/checkpoint-250/rng_state_3.pth b/checkpoint-250/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..3f6fd9aa58eb1d5815ca991134531a3280601900 --- /dev/null +++ b/checkpoint-250/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5df75d8477fcc69c7abb03025313915ebfe3ac18c54a7c57aaa455c0099e13e5 +size 17655 diff --git a/checkpoint-250/trainer_state.json b/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..be241b96e7c9d573c2167ff54e9960c327d2172f --- /dev/null +++ b/checkpoint-250/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4b6d519251155a8dc355dd61a9cf5ee8a3c9020795f2603f2f146b47ebb0d5 +size 31155 diff --git a/checkpoint-250/training_args.bin b/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..298692c0b541c2e962bbd888bcd24d27d623ab2d --- /dev/null +++ b/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbeb8071fbe7ebd575391d62f8bcd4599760ab21bbc612b111647e2a2bf508e +size 6011 diff --git a/checkpoint-250/zero_to_fp32.py b/checkpoint-250/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..2843405d3ae2f5da24b77351a3ff9bd5a677ab5f --- /dev/null +++ b/checkpoint-250/zero_to_fp32.py @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68f965505cd086b7582e9821f8a6bc9b5b80c43450671c1139f734d1c7678a2c +size 24177 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..329529ab9dbd7e63db5c00a5f66ae00df3869be1 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38040ecb6dd98fcbe9a29765b4c1db8e79c6677a63acc41919af2d7337454654 +size 620 diff --git a/checkpoint-500/generation_config.json b/checkpoint-500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d48fc0407d4ffb6f8dc6da1c77bbbd0206fac12 --- /dev/null +++ b/checkpoint-500/generation_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afcf2a1da583dbeb66bb367a759713901e23ce19ed83ed8f5a21b0e877addd5b +size 116 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..539eeaee64e42a00d4a9f82b403b12483809f29e --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd98e2885c8add0e9ce02232ebb7d57637bcc73be93433f13d0b3d7297b5f2c +size 21725254007 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..df0c3de3e783b38d8c89eaa692a07609cd4021ec --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1c1b1d036ffe97cb0873f0c3382e1077aedec0d27c8c6303a7396a8bd1eb7b +size 7946108928 diff --git a/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b1bb0f1dac03a75df6f514f420a80afa9688da5 --- /dev/null +++ b/checkpoint-500/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f25bd7cace2d38e9d43f48d2bed436add1ed610ca8ca893200ff18c8d14fa96 +size 21725254007 diff --git a/checkpoint-500/global_step500/mp_rank_00_model_states.pt b/checkpoint-500/global_step500/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..317b804dc09f27ba43dbc03a623aeab24193567a --- /dev/null +++ b/checkpoint-500/global_step500/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7114b83cd3d93111db5d75975628ae8730ccbf2ea2aa15ae1a6ff626bd57c3ec +size 14483584899 diff --git a/checkpoint-500/pytorch_model-00001-of-00002.bin b/checkpoint-500/pytorch_model-00001-of-00002.bin new file mode 100644 index 0000000000000000000000000000000000000000..179502457ef27524ba6752a60278b83d0ec47dfb --- /dev/null +++ b/checkpoint-500/pytorch_model-00001-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea284a24a4d42047bc5b0a99840420497496eff7d9af6c76b8020c9aac3b3ac4 +size 9943044428 diff --git a/checkpoint-500/pytorch_model-00002-of-00002.bin b/checkpoint-500/pytorch_model-00002-of-00002.bin new file mode 100644 index 0000000000000000000000000000000000000000..2dcd25a470d7387eba8b9b5e0b0d556179115862 --- /dev/null +++ b/checkpoint-500/pytorch_model-00002-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff85db3904cf639607677c3b225fc6a687ae4b4c72cae68063ae86c28dac7115 +size 4540552031 diff --git a/checkpoint-500/pytorch_model.bin.index.json b/checkpoint-500/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..4fb1e141e8ad246863a9b6c67238236806c33381 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e0f44b64f8e8d5241ee3c393f6339e05f2e844134c75c524d87dc89a7257e9 +size 23950 diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..298692c0b541c2e962bbd888bcd24d27d623ab2d --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbeb8071fbe7ebd575391d62f8bcd4599760ab21bbc612b111647e2a2bf508e +size 6011 diff --git a/checkpoint-750/config.json b/checkpoint-750/config.json new file mode 100644 index 0000000000000000000000000000000000000000..329529ab9dbd7e63db5c00a5f66ae00df3869be1 --- /dev/null +++ b/checkpoint-750/config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38040ecb6dd98fcbe9a29765b4c1db8e79c6677a63acc41919af2d7337454654 +size 620 diff --git a/checkpoint-750/generation_config.json b/checkpoint-750/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d48fc0407d4ffb6f8dc6da1c77bbbd0206fac12 --- /dev/null +++ b/checkpoint-750/generation_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afcf2a1da583dbeb66bb367a759713901e23ce19ed83ed8f5a21b0e877addd5b +size 116 diff --git a/checkpoint-750/global_step750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-750/global_step750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..31d98ae730c0bf2b827c3ec8afca74e8d14909ff --- /dev/null +++ b/checkpoint-750/global_step750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4db949e8907e6f83bd6f3dbb0dc0682d0d15d7a444fe6555d5112976151b3463 +size 21725254007 diff --git a/checkpoint-750/global_step750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/checkpoint-750/global_step750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6932086cab7fe6be0e26f5efacc19dcc86d70b3e --- /dev/null +++ b/checkpoint-750/global_step750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c1fb6c7b67b75d1eb10f43f19195600f62e671558efbb8907e44e310994896e +size 21725254583 diff --git a/checkpoint-750/global_step750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/checkpoint-750/global_step750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..92bfb05724ed31ed4a2162ffbadb06c060abfaaa --- /dev/null +++ b/checkpoint-750/global_step750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27808edd997a0d40385887613c8bbf3f6835869eab1eac81bab04b685ad0d9de +size 21725254647 diff --git a/checkpoint-750/global_step750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/checkpoint-750/global_step750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c143a352241368e7dcde402b7207efd01cc093b --- /dev/null +++ b/checkpoint-750/global_step750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99be19c541989a9156b434863500a24848be3fce5b60ba44fa82c281f87a7454 +size 21725254007 diff --git a/checkpoint-750/global_step750/mp_rank_00_model_states.pt b/checkpoint-750/global_step750/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c40d9b63f3014fd37bbc871bf112e2f468debe2f --- /dev/null +++ b/checkpoint-750/global_step750/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d2b2ec5cf5d538d34f84f5f633ff7ae85c8db0d1eb7599958a97684d19baf70 +size 14483584899 diff --git a/checkpoint-750/latest b/checkpoint-750/latest new file mode 100644 index 0000000000000000000000000000000000000000..47c71d7a75fae75cbd5c369a5fb5c6134a05ef6f --- /dev/null +++ b/checkpoint-750/latest @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd3153661a24859b6a2748f64ccf12ea6ba4772d09ef03886b14d37541114d8 +size 14 diff --git a/checkpoint-750/pytorch_model-00001-of-00002.bin b/checkpoint-750/pytorch_model-00001-of-00002.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c055a14f7b7251179eb994370488a363e6ed734 --- /dev/null +++ b/checkpoint-750/pytorch_model-00001-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a0c772c0633cfedd7d31fce0f8d8348d9ba982ab49fb18dd38a353319fadb4 +size 9943044428 diff --git a/checkpoint-750/pytorch_model-00002-of-00002.bin b/checkpoint-750/pytorch_model-00002-of-00002.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b2ffda5fdd8bfb65a1de26db7ab0b5c77ac22e7 --- /dev/null +++ b/checkpoint-750/pytorch_model-00002-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a590331b0e3ce8b4ce22b509ab95f5a1c637b82da880aa4c9bcaa18ead8904b +size 4540552031 diff --git a/checkpoint-750/pytorch_model.bin.index.json b/checkpoint-750/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..4fb1e141e8ad246863a9b6c67238236806c33381 --- /dev/null +++ b/checkpoint-750/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e0f44b64f8e8d5241ee3c393f6339e05f2e844134c75c524d87dc89a7257e9 +size 23950 diff --git a/checkpoint-750/rng_state_0.pth b/checkpoint-750/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff6323f273e070f1a9364d4bd9623f138aeecf3e --- /dev/null +++ b/checkpoint-750/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5775c11ec2fe6a5d58d1857fa436bb0da77027386858b2a4e7ea3eff9c2ab66 +size 17655 diff --git a/checkpoint-750/rng_state_1.pth b/checkpoint-750/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..f39e782f2c104735be454371496353436ab14d35 --- /dev/null +++ b/checkpoint-750/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca1a0e79990b2325280b0f88b8b2e25a4c5d61927bfa5e5c178ffe1321125ca9 +size 17655 diff --git a/checkpoint-750/rng_state_2.pth b/checkpoint-750/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b1a20a8abe7a5b8ce88a989433e595e68e23f67 --- /dev/null +++ b/checkpoint-750/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aebaa297ea8d8fd38ae9b49e17f26565783e27c56a220dc278f70f6593bccda +size 17655 diff --git a/checkpoint-750/rng_state_3.pth b/checkpoint-750/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..8a69cdc1ee3f8fb7e5648d30ae646601bd696c21 --- /dev/null +++ b/checkpoint-750/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f537c78a9ca12294baa835f33188b772f2cbf238559c0f36ab3505e19304755 +size 17655 diff --git a/checkpoint-750/trainer_state.json b/checkpoint-750/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a178a03e55aaec432af16b5021e634e1f768425 --- /dev/null +++ b/checkpoint-750/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56866d2dfe74e8ad06e88fd187ab8dc61429dc2abdf7b1ff2826fe63ed7e5750 +size 93002 diff --git a/checkpoint-750/training_args.bin b/checkpoint-750/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..298692c0b541c2e962bbd888bcd24d27d623ab2d --- /dev/null +++ b/checkpoint-750/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbeb8071fbe7ebd575391d62f8bcd4599760ab21bbc612b111647e2a2bf508e +size 6011 diff --git a/checkpoint-750/zero_to_fp32.py b/checkpoint-750/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..2843405d3ae2f5da24b77351a3ff9bd5a677ab5f --- /dev/null +++ b/checkpoint-750/zero_to_fp32.py @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68f965505cd086b7582e9821f8a6bc9b5b80c43450671c1139f734d1c7678a2c +size 24177 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..329529ab9dbd7e63db5c00a5f66ae00df3869be1 --- /dev/null +++ b/config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38040ecb6dd98fcbe9a29765b4c1db8e79c6677a63acc41919af2d7337454654 +size 620 diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d48fc0407d4ffb6f8dc6da1c77bbbd0206fac12 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afcf2a1da583dbeb66bb367a759713901e23ce19ed83ed8f5a21b0e877addd5b +size 116 diff --git a/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..539eeaee64e42a00d4a9f82b403b12483809f29e --- /dev/null +++ b/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd98e2885c8add0e9ce02232ebb7d57637bcc73be93433f13d0b3d7297b5f2c +size 21725254007 diff --git a/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..00cef2ca1bc67bad0f930fd6506570c8ce2e9d05 --- /dev/null +++ b/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3bf01104767c5a1712e1b056b5ee9ed92cca6f1cf4f5cd33d02ef80fb6c660c +size 21725254583 diff --git a/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c53da69fd0bf328e83016e124514d1d329c7172 --- /dev/null +++ b/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495c9c5d26168f8d4663bd925031fe7113ab9f36f6407598c1d232f7ced2fc63 +size 21725254647 diff --git a/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b1bb0f1dac03a75df6f514f420a80afa9688da5 --- /dev/null +++ b/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f25bd7cace2d38e9d43f48d2bed436add1ed610ca8ca893200ff18c8d14fa96 +size 21725254007 diff --git a/global_step500/mp_rank_00_model_states.pt b/global_step500/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..317b804dc09f27ba43dbc03a623aeab24193567a --- /dev/null +++ b/global_step500/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7114b83cd3d93111db5d75975628ae8730ccbf2ea2aa15ae1a6ff626bd57c3ec +size 14483584899 diff --git a/latest b/latest new file mode 100644 index 0000000000000000000000000000000000000000..4a36dadeb520360c99392d3aefc1d8643133b221 --- /dev/null +++ b/latest @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87259daf3b26bb9bbc1c08fe6c7b6ff903d983c3ecde1e2e586fd3d3481fb409 +size 14 diff --git a/pytorch_model-00001-of-00002.bin b/pytorch_model-00001-of-00002.bin new file mode 100644 index 0000000000000000000000000000000000000000..179502457ef27524ba6752a60278b83d0ec47dfb --- /dev/null +++ b/pytorch_model-00001-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea284a24a4d42047bc5b0a99840420497496eff7d9af6c76b8020c9aac3b3ac4 +size 9943044428 diff --git a/pytorch_model-00002-of-00002.bin b/pytorch_model-00002-of-00002.bin new file mode 100644 index 0000000000000000000000000000000000000000..2dcd25a470d7387eba8b9b5e0b0d556179115862 --- /dev/null +++ b/pytorch_model-00002-of-00002.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff85db3904cf639607677c3b225fc6a687ae4b4c72cae68063ae86c28dac7115 +size 4540552031 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..4fb1e141e8ad246863a9b6c67238236806c33381 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72e0f44b64f8e8d5241ee3c393f6339e05f2e844134c75c524d87dc89a7257e9 +size 23950 diff --git a/rng_state_0.pth b/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae4b8d17e5912771b04a754b09c049eccbbfcd63 --- /dev/null +++ b/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd8bdb59e9192d49197cb061374fd4c50d6d8537f5a113b12bf4b45127d57bc +size 17655 diff --git a/rng_state_1.pth b/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c8f1332da4ea9e5f508a4d4dee7742bfc638c41 --- /dev/null +++ b/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ee77112b679d1d95da2ae8da1e21a46c05310a444fab01332534849afbf5fe +size 17655 diff --git a/rng_state_2.pth b/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6f437dd76fdc3ecbe514d28ad84cb007d0fff33 --- /dev/null +++ b/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:298ba88fe1fab9a0075aa69e702e2cd646f3213bb45d4c7361f2177c6b6f34d1 +size 17655 diff --git a/rng_state_3.pth b/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..c8e85af9cd5e5bf312449f5a28b454e12080b585 --- /dev/null +++ b/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480b2689eb2712c09ad603537b3b042e208e7574e1e7b0d6bd721ac2ba83f095 +size 17655 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..2eaadf75fd776719dfcbe91915e24f43d5d0b6a2 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b454cf92b8686901e71f611ed4e6b93e70a2692dd3a5bb50b1c408efb9a47a03 +size 101 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe5f9b925716864415e118ef66db24d751c8ef7d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f28772d58ac39f7727bb49d7f5ae8e21fcfacdb1677c86dc6c5891d8c75bba9e +size 1515 diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..653c8fffb3c32d03e266ca9be072fd31e60d1921 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d694008575df14324f9d6f5253e1fb72814f29e8aef60cd5eaa533c791d855 +size 62089 diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..298692c0b541c2e962bbd888bcd24d27d623ab2d --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbeb8071fbe7ebd575391d62f8bcd4599760ab21bbc612b111647e2a2bf508e +size 6011 diff --git a/zero_to_fp32.py b/zero_to_fp32.py new file mode 100755 index 0000000000000000000000000000000000000000..2843405d3ae2f5da24b77351a3ff9bd5a677ab5f --- /dev/null +++ b/zero_to_fp32.py @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68f965505cd086b7582e9821f8a6bc9b5b80c43450671c1139f734d1c7678a2c +size 24177