diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..7ebc8c3d9b7f34a0d4c5344c5cbfcfcb260950e6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,94 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-3000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text +checkpoint-4863/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint-1000/optimizer_0/.metadata b/checkpoint-1000/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..96ca21338bb145d85aeba56cefccf1dcfdb42903 --- /dev/null +++ b/checkpoint-1000/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c823bc431bf7d807ed32a5f978a1748f129e77a58aba3594daf3a2045d091648 +size 2626018 diff --git a/checkpoint-1000/optimizer_0/__0_0.distcp b/checkpoint-1000/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..42db290f9007a690ce4df4c26546f4da7e2aec3b --- /dev/null +++ b/checkpoint-1000/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3c2153bf02b381c6ad2ffe0824dc5ba46c39423c40fb753a13b2b81d22c2d6d +size 55406592 diff --git a/checkpoint-1000/optimizer_0/__1_0.distcp b/checkpoint-1000/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..82c936ba1f9c8b1360c22895ad5dd6205c153f44 --- /dev/null +++ b/checkpoint-1000/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976f38d9d7d31749574bb1d3dc238341bd89db0ad56b4a89aca61daaedbd1dc8 +size 55526656 diff --git a/checkpoint-1000/optimizer_0/__2_0.distcp b/checkpoint-1000/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..52265e4d63894379553a8bb287ba17cfb60510fe --- /dev/null +++ b/checkpoint-1000/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8f4a691793cccd18d28711b97db5c3fffd047450c73cd1bc8c15d242a455e0 +size 55480896 diff --git a/checkpoint-1000/optimizer_0/__3_0.distcp b/checkpoint-1000/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2988a72824a7ba5f8f1d6c08a2e99a9702c60d2d --- /dev/null +++ b/checkpoint-1000/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7794d7737fe9ab804c47265172c228f322c3ce0d8ae1f4b0da1c0961299c454b +size 55480896 diff --git a/checkpoint-1000/optimizer_0/__4_0.distcp b/checkpoint-1000/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..316c1579cb0e90ac4b7e0cfd7c772e06ecab46ed --- /dev/null +++ b/checkpoint-1000/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1be0f25d5d1ef6a5da7241da8e0bfd32f5ea5acae0402cb65c0b237cdb85ba52 +size 55480032 diff --git a/checkpoint-1000/optimizer_0/__5_0.distcp b/checkpoint-1000/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..146ce131c7dd70d2c4fcc86a362151140b75baf4 --- /dev/null +++ b/checkpoint-1000/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d508dfa9519b3b5007e08eb3cfcadb5e1af8f66b5cbb7c46433de1a0220b3fd0 +size 55480032 diff --git a/checkpoint-1000/optimizer_0/__6_0.distcp b/checkpoint-1000/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2b8bee53d759c8fe9d9cdbe37bc501623ab77d87 --- /dev/null +++ b/checkpoint-1000/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef388af3d62f8049bdd7aa4bd92686f81eba235bd699a8770d3d998bd7fc7201 +size 55480032 diff --git a/checkpoint-1000/optimizer_0/__7_0.distcp b/checkpoint-1000/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7f887ad4554d48bc907c2ea2874700fd16bd1fe3 --- /dev/null +++ b/checkpoint-1000/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:862e1e89ad8d6d45b9612642902fb8e7e24f3242c701559334e4a98718bd9930 +size 55480032 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/.metadata b/checkpoint-1000/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..6b5278cb1a423ced0a91fbe8ae204777fcbc0434 --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19502f0f22e6a789d9430be30fd1319f8dd68afbb00a9bc001926d880d5042d1 +size 1064888 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp b/checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4b4ec6bae146494171dbf41d4e4f20cc9a97ff93 --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a8ae8d0d9b4c6d4906e101e591b6ff24ca8256fb349638bc2c9bafe125a6fb +size 27702864 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp b/checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7db1a80519e32981397deaa9c5cb29c98ad6ebe3 --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe1fb95d56970cff4dcbc83a83ca7f8ecac9bc641868cf8fde4b69f5b7b57f5 +size 27702864 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp b/checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a6607d63d24d39e1c0f13ec649331203b089082c --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be0526cb870508e64adac4dba0b1cd510e90492d4a0c715b6f3824cd9a832ce6 +size 27702864 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp b/checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..80cbf6ac662c7fc1a472d9080b900a868a2270ca --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:548171e890776a3386a8114d5e24eb128c0035a60402484b03960f2e01651715 +size 27702864 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp b/checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..95aca3fab72e6ab2f1d8bd727992f6d54741b14e --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40528c0670a9dbde32993d03f21ac1cb232c25c596ab49daa179f3ebcf19bf8d +size 27702864 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp b/checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..e041ca7d2fa6a342303106a8ef4528c460296fe8 --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30cd894ac0476288fe9a1b47f5257a750b3f8b247ccaa46692096d393f54c7a +size 27702864 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp b/checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f3324b963e0b683991679e0dfaa77dc5d40cf4c1 --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0e628ecd537a0d31f91287a8a7320005dcef7e164a9f8b11c199eeda8058c52 +size 27702864 diff --git a/checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp b/checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..41db0b1b9d7ce62a519cc750a38d627a0e420beb --- /dev/null +++ b/checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd911f0a16f12715e3105bf4ac7d5735f4c9e2bb7b1e1fd50687fa706884fbd0 +size 27702864 diff --git a/checkpoint-1000/rng_state_0.pth b/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6919efdd3b1eb0e3b2c6a74dff6a66519bdbf6ab --- /dev/null +++ b/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:852a2229a3726ffedc43daf9b32d882ac09be192242bada110e4e27a158a4ad8 +size 15984 diff --git a/checkpoint-1000/rng_state_1.pth b/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..82ee371d9fc623a41cd46b98cac7adace729d0cb --- /dev/null +++ b/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607d5b9fe1b6ffab2c6ab8c0cca7a2dc074f35f0f539611c21febcb657cb9230 +size 15984 diff --git a/checkpoint-1000/rng_state_2.pth b/checkpoint-1000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..65a7ee4a0457b1f7d0b41cf5911d068414c0a501 --- /dev/null +++ b/checkpoint-1000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21bfb8fc3702bc02f20eecb02befd28c4d4a0cf2d6e241c7b306fedf67d74101 +size 15984 diff --git a/checkpoint-1000/rng_state_3.pth b/checkpoint-1000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..cffc72cb88834bad5933f555f1819a97f680ef3c --- /dev/null +++ b/checkpoint-1000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f680126569c803d74359f9aa642e22ede3c8af20715e7c162ef1abe0100375d8 +size 15984 diff --git a/checkpoint-1000/rng_state_4.pth b/checkpoint-1000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..7f4c3257d9882af2029e31ab9f19c62720226618 --- /dev/null +++ b/checkpoint-1000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0095abae33bd227b2ec0fdc9f9fe68be4e271a993b2a40b77676a7d46e7c0877 +size 15984 diff --git a/checkpoint-1000/rng_state_5.pth b/checkpoint-1000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad45d104bffa3e2cea2c9b190860fad8f2ec62a4 --- /dev/null +++ b/checkpoint-1000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb75e7ee126757b5c516ccdb6844f17b23593a1586dc161a8668186a81a17481 +size 15984 diff --git a/checkpoint-1000/rng_state_6.pth b/checkpoint-1000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..852f625684b60c9d5382c4f9f7661083b6b3def1 --- /dev/null +++ b/checkpoint-1000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2d0e74d163711a1f334aacbfe6cc80b35858c2f0be112236077c3cb5c8047a2 +size 15984 diff --git a/checkpoint-1000/rng_state_7.pth b/checkpoint-1000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..3235441a56804ec804f53de66407895663e2adbe --- /dev/null +++ b/checkpoint-1000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba53e2865adec6eb39312aa9f9ea49a46316b7a8c07f97959fc55d8a4a61f463 +size 15984 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea99baee166d32743a919b73621ead832972525e --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1252495abb6207a87314f27eb7a52b71b9170963ff2e044a7fac5b9a90ef861 +size 1064 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2d32d05a12c4653047ca3a06cf4e820f80f4e407 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,173 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6169031462060457, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.030845157310302282, + "grad_norm": 2.8206074237823486, + "learning_rate": 1.0277492291880782e-05, + "loss": 1.8082, + "step": 50 + }, + { + "epoch": 0.061690314620604564, + "grad_norm": 3.4183013439178467, + "learning_rate": 2.0554984583761563e-05, + "loss": 0.6538, + "step": 100 + }, + { + "epoch": 0.09253547193090685, + "grad_norm": 2.170591354370117, + "learning_rate": 3.083247687564235e-05, + "loss": 0.4563, + "step": 150 + }, + { + "epoch": 0.12338062924120913, + "grad_norm": 1.4687080383300781, + "learning_rate": 4.110996916752313e-05, + "loss": 0.4263, + "step": 200 + }, + { + "epoch": 0.15422578655151142, + "grad_norm": 1.836676836013794, + "learning_rate": 5.1387461459403907e-05, + "loss": 0.3994, + "step": 250 + }, + { + "epoch": 0.1850709438618137, + "grad_norm": 1.2718663215637207, + "learning_rate": 6.16649537512847e-05, + "loss": 0.3665, + "step": 300 + }, + { + "epoch": 0.215916101172116, + "grad_norm": 1.6945191621780396, + "learning_rate": 7.194244604316547e-05, + "loss": 0.3577, + "step": 350 + }, + { + "epoch": 0.24676125848241826, + "grad_norm": 1.2829898595809937, + "learning_rate": 8.221993833504625e-05, + "loss": 0.347, + "step": 400 + }, + { + "epoch": 0.27760641579272055, + "grad_norm": 1.01521635055542, + "learning_rate": 9.249743062692704e-05, + "loss": 0.3288, + "step": 450 + }, + { + "epoch": 0.30845157310302285, + "grad_norm": 1.522111415863037, + "learning_rate": 0.00010277492291880781, + "loss": 0.3267, + "step": 500 + }, + { + "epoch": 0.3392967304133251, + "grad_norm": 0.9678927659988403, + "learning_rate": 0.00011305241521068859, + "loss": 0.3198, + "step": 550 + }, + { + "epoch": 0.3701418877236274, + "grad_norm": 1.2144405841827393, + "learning_rate": 0.0001233299075025694, + "loss": 0.3099, + "step": 600 + }, + { + "epoch": 0.4009870450339297, + "grad_norm": 1.3122639656066895, + "learning_rate": 0.00013360739979445017, + "loss": 0.2929, + "step": 650 + }, + { + "epoch": 0.431832202344232, + "grad_norm": 1.0934101343154907, + "learning_rate": 0.00014388489208633093, + "loss": 0.3003, + "step": 700 + }, + { + "epoch": 0.4626773596545342, + "grad_norm": 0.7938969731330872, + "learning_rate": 0.00015416238437821172, + "loss": 0.2956, + "step": 750 + }, + { + "epoch": 0.4935225169648365, + "grad_norm": 0.6571168303489685, + "learning_rate": 0.0001644398766700925, + "loss": 0.2736, + "step": 800 + }, + { + "epoch": 0.5243676742751388, + "grad_norm": 1.0073938369750977, + "learning_rate": 0.0001747173689619733, + "loss": 0.2892, + "step": 850 + }, + { + "epoch": 0.5552128315854411, + "grad_norm": 0.9874083399772644, + "learning_rate": 0.00018499486125385408, + "loss": 0.2723, + "step": 900 + }, + { + "epoch": 0.5860579888957433, + "grad_norm": 1.1770968437194824, + "learning_rate": 0.00019527235354573487, + "loss": 0.2855, + "step": 950 + }, + { + "epoch": 0.6169031462060457, + "grad_norm": 1.00326669216156, + "learning_rate": 0.00019997622717095418, + "loss": 0.2587, + "step": 1000 + } + ], + "logging_steps": 50, + "max_steps": 4863, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7124677004623872.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/optimizer_0/.metadata b/checkpoint-2000/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..c7ba389db41f4f10a3ee053c87b276a1630775f6 --- /dev/null +++ b/checkpoint-2000/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5626ee28c1f24f09a7b7f3a240119b9d2db5082be6528657524ec153ab4bab40 +size 2626018 diff --git a/checkpoint-2000/optimizer_0/__0_0.distcp b/checkpoint-2000/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..41ac7e5a5c0b40454b40c62e40e5980cd08b1545 --- /dev/null +++ b/checkpoint-2000/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1ddac24aff4bfe008d75f42dabb23ed177efe5cefe9f7b5f29f71a0ebdb1c1 +size 55406592 diff --git a/checkpoint-2000/optimizer_0/__1_0.distcp b/checkpoint-2000/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..614eb6f8ffe0acb122cc2b0dd219410cd5589875 --- /dev/null +++ b/checkpoint-2000/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644a8b57f5bf12acebd9a45311e38f079556731bd0cce4d9e4479c52fe290daf +size 55526656 diff --git a/checkpoint-2000/optimizer_0/__2_0.distcp b/checkpoint-2000/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..15843ca84ab468ca853696c56a9ddfd6fece2318 --- /dev/null +++ b/checkpoint-2000/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e3ed8ae7c7f161c908da0ff18b57aeca951ebbd215de56c152243a70b4d230 +size 55480896 diff --git a/checkpoint-2000/optimizer_0/__3_0.distcp b/checkpoint-2000/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..b97b960c1c27bd8c06c9677667441d97ca34db72 --- /dev/null +++ b/checkpoint-2000/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e94e18382a97151194018cb781cf0fe22334ee35ac961ca0af3001abd6bc932 +size 55480896 diff --git a/checkpoint-2000/optimizer_0/__4_0.distcp b/checkpoint-2000/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..3f7bcdb1aadf2532e304a9ebcaa5279515362c3c --- /dev/null +++ b/checkpoint-2000/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63e6da3410ebcd43453c81d05a10eafb02c674cac7d1b8798b0933af17d3a218 +size 55480032 diff --git a/checkpoint-2000/optimizer_0/__5_0.distcp b/checkpoint-2000/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a9a8e33aa0a846f247b934cf2ba23c99369d4f5e --- /dev/null +++ b/checkpoint-2000/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed8c41c0e613f61c6d53924a953f451e70a74c66778418a43b07e0a72d4837fc +size 55480032 diff --git a/checkpoint-2000/optimizer_0/__6_0.distcp b/checkpoint-2000/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a70438719f50fc5e1ea0be69c19c2f7e197761be --- /dev/null +++ b/checkpoint-2000/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75dd182d21f0d83d4f2e1d017b7ad1bc81d1cdd6b5f05c411eb7156db6af5b54 +size 55480032 diff --git a/checkpoint-2000/optimizer_0/__7_0.distcp b/checkpoint-2000/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..4067e6b6297a14d27c1833bf1e4a0ca6e38f3d18 --- /dev/null +++ b/checkpoint-2000/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1887e9892bd4a005864e6de571616f9850350f98993f8ddf36fcfddca8d360e4 +size 55480032 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/.metadata b/checkpoint-2000/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..60f91e315a24dbbb19c43df9013db337acab12c7 --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a19ab021f1ba6fc33e10c18810aae15bcdcc44f398f902265ff6fd94a02d4df +size 1064888 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp b/checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..d2f850772e7a28ed224bc74b23b7aeef6a081806 --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275c922a12364010c488d511c3b5e5de1bfe494ef4dbb54b1892668b1d4bfaac +size 27702864 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp b/checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..123337505f571d51498fe25ab0b827aede8c6a38 --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b96915d94e47d988a4d4c1fd5a228abcba8bb73c05b155cff6c893a0ebf85b09 +size 27702864 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp b/checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c874c2ac2e2027ecbcce4474644d9e8c7b7151be --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c852e5098594f95316cb3a0b872de32d839d418dc744165865805834e452b2a6 +size 27702864 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp b/checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..2ef59677466d9563e33896187eac71afb76d38f0 --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3993232148d686149b082f51d8dbdc8c96c898b3d4b399724f87c614527e80 +size 27702864 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp b/checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5a951ccc1d50c564ce2d237de73b46e06b708990 --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68262b2ed6d2b9c1b691f92a6ad19522c13477f38044762eccfd58b25d39963 +size 27702864 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp b/checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1e3b51acdd8f36bdd1bf29f53c86f650ad76ea04 --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f12638b582d1293d83d0d56e4ca8036a67a4bf47d1e68fb4260ce7492b4820d +size 27702864 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp b/checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..47f27ccbbe2830acf19f50302325fde57ad82dfe --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e9745a4bdc41fb2deb28b003850b909d73fa094c02afef0ad14e955dfe9b153 +size 27702864 diff --git a/checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp b/checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a4277a57155087ca27cb6048a68cea77f21230cb --- /dev/null +++ b/checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5b433f984a2fbda86bfb4194be437e3ca37d25649ae5ce1ea1a70a13965cea7 +size 27702864 diff --git a/checkpoint-2000/rng_state_0.pth b/checkpoint-2000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..6fbba2ce2c8ca03eaf5e32e92db2a698f21e2f3f --- /dev/null +++ b/checkpoint-2000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8204337523c9526120d9dbd77c1b85e83685a646168ef1aa5614cc1cc72b52f5 +size 15984 diff --git a/checkpoint-2000/rng_state_1.pth b/checkpoint-2000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..a4baa1306e5d1a49ad6c01807f28fc6b3807c6d9 --- /dev/null +++ b/checkpoint-2000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcac6b4bd212354a8c95c4fd130500378c409f8c520e7ec730ba272ddee284d +size 15984 diff --git a/checkpoint-2000/rng_state_2.pth b/checkpoint-2000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe01e3748fad42756c4101c080950530342db307 --- /dev/null +++ b/checkpoint-2000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9bb45dc4e4d1accd280a5304c4c6b48809c92185e462b3df1633b8c69321c1 +size 15984 diff --git a/checkpoint-2000/rng_state_3.pth b/checkpoint-2000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8333bb7bea075fe9e6076b9f0706610eeb8d062 --- /dev/null +++ b/checkpoint-2000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef930e31a56ecd7a23c930248ec8112e80e94de69b129e269c40cdaafa2ef68f +size 15984 diff --git a/checkpoint-2000/rng_state_4.pth b/checkpoint-2000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..73059dc86285a2c3fd2ed6f2a96feb1d614efa0f --- /dev/null +++ b/checkpoint-2000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef8e6410b5a4f726cc91f2453f335b17e99411026c1b324c67fad5d0d01dbce3 +size 15984 diff --git a/checkpoint-2000/rng_state_5.pth b/checkpoint-2000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..815dba8f9e22e37561aebf83c41ab1e089859d1e --- /dev/null +++ b/checkpoint-2000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6589c5ff22c2d7ecacc833982a6a48afdc7588f37d956a4fa112488eb0c20f +size 15984 diff --git a/checkpoint-2000/rng_state_6.pth b/checkpoint-2000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..63624f319a5aaf9c6dbb2967f785bd28b3f7062e --- /dev/null +++ b/checkpoint-2000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5afb3fca37370a1e2b010cddf6ff1b687cedc7aabdd303ea4733636c60e40f +size 15984 diff --git a/checkpoint-2000/rng_state_7.pth b/checkpoint-2000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..d7f57416dc59e284a0e9cf515124ef132b16d7a7 --- /dev/null +++ b/checkpoint-2000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f310f8000ca6cd9fee097b1cce6b8bc4f9bcb822d447cfe0949f1f23cd95a1e2 +size 15984 diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..117ae28b91c3390c065d859101f699335a11ea1d --- /dev/null +++ b/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b50a2a52d019b88bc3a06bc9687818ff977d7e0dc9c08edb7e627171c97e6566 +size 1064 diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..170677e1ffdc3abdf449df98727fbe077e2f4e38 --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,313 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2338062924120914, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.030845157310302282, + "grad_norm": 2.8206074237823486, + "learning_rate": 1.0277492291880782e-05, + "loss": 1.8082, + "step": 50 + }, + { + "epoch": 0.061690314620604564, + "grad_norm": 3.4183013439178467, + "learning_rate": 2.0554984583761563e-05, + "loss": 0.6538, + "step": 100 + }, + { + "epoch": 0.09253547193090685, + "grad_norm": 2.170591354370117, + "learning_rate": 3.083247687564235e-05, + "loss": 0.4563, + "step": 150 + }, + { + "epoch": 0.12338062924120913, + "grad_norm": 1.4687080383300781, + "learning_rate": 4.110996916752313e-05, + "loss": 0.4263, + "step": 200 + }, + { + "epoch": 0.15422578655151142, + "grad_norm": 1.836676836013794, + "learning_rate": 5.1387461459403907e-05, + "loss": 0.3994, + "step": 250 + }, + { + "epoch": 0.1850709438618137, + "grad_norm": 1.2718663215637207, + "learning_rate": 6.16649537512847e-05, + "loss": 0.3665, + "step": 300 + }, + { + "epoch": 0.215916101172116, + "grad_norm": 1.6945191621780396, + "learning_rate": 7.194244604316547e-05, + "loss": 0.3577, + "step": 350 + }, + { + "epoch": 0.24676125848241826, + "grad_norm": 1.2829898595809937, + "learning_rate": 8.221993833504625e-05, + "loss": 0.347, + "step": 400 + }, + { + "epoch": 0.27760641579272055, + "grad_norm": 1.01521635055542, + "learning_rate": 9.249743062692704e-05, + "loss": 0.3288, + "step": 450 + }, + { + "epoch": 0.30845157310302285, + "grad_norm": 1.522111415863037, + "learning_rate": 0.00010277492291880781, + "loss": 0.3267, + "step": 500 + }, + { + "epoch": 0.3392967304133251, + "grad_norm": 0.9678927659988403, + "learning_rate": 0.00011305241521068859, + "loss": 0.3198, + "step": 550 + }, + { + "epoch": 0.3701418877236274, + "grad_norm": 1.2144405841827393, + "learning_rate": 0.0001233299075025694, + "loss": 0.3099, + "step": 600 + }, + { + "epoch": 0.4009870450339297, + "grad_norm": 1.3122639656066895, + "learning_rate": 0.00013360739979445017, + "loss": 0.2929, + "step": 650 + }, + { + "epoch": 0.431832202344232, + "grad_norm": 1.0934101343154907, + "learning_rate": 0.00014388489208633093, + "loss": 0.3003, + "step": 700 + }, + { + "epoch": 0.4626773596545342, + "grad_norm": 0.7938969731330872, + "learning_rate": 0.00015416238437821172, + "loss": 0.2956, + "step": 750 + }, + { + "epoch": 0.4935225169648365, + "grad_norm": 0.6571168303489685, + "learning_rate": 0.0001644398766700925, + "loss": 0.2736, + "step": 800 + }, + { + "epoch": 0.5243676742751388, + "grad_norm": 1.0073938369750977, + "learning_rate": 0.0001747173689619733, + "loss": 0.2892, + "step": 850 + }, + { + "epoch": 0.5552128315854411, + "grad_norm": 0.9874083399772644, + "learning_rate": 0.00018499486125385408, + "loss": 0.2723, + "step": 900 + }, + { + "epoch": 0.5860579888957433, + "grad_norm": 1.1770968437194824, + "learning_rate": 0.00019527235354573487, + "loss": 0.2855, + "step": 950 + }, + { + "epoch": 0.6169031462060457, + "grad_norm": 1.00326669216156, + "learning_rate": 0.00019997622717095418, + "loss": 0.2587, + "step": 1000 + }, + { + "epoch": 0.6477483035163479, + "grad_norm": 1.0380828380584717, + "learning_rate": 0.0001998067088192682, + "loss": 0.2764, + "step": 1050 + }, + { + "epoch": 0.6785934608266502, + "grad_norm": 1.430301547050476, + "learning_rate": 0.00019947447034120033, + "loss": 0.2565, + "step": 1100 + }, + { + "epoch": 0.7094386181369525, + "grad_norm": 1.0970648527145386, + "learning_rate": 0.00019898005340261433, + "loss": 0.2685, + "step": 1150 + }, + { + "epoch": 0.7402837754472548, + "grad_norm": 0.9110261797904968, + "learning_rate": 0.0001983242640774473, + "loss": 0.2489, + "step": 1200 + }, + { + "epoch": 0.7711289327575571, + "grad_norm": 1.0265332460403442, + "learning_rate": 0.00019750817153352506, + "loss": 0.2425, + "step": 1250 + }, + { + "epoch": 0.8019740900678594, + "grad_norm": 0.8820884823799133, + "learning_rate": 0.00019653310628944164, + "loss": 0.2402, + "step": 1300 + }, + { + "epoch": 0.8328192473781616, + "grad_norm": 0.933083713054657, + "learning_rate": 0.00019540065804534467, + "loss": 0.2444, + "step": 1350 + }, + { + "epoch": 0.863664404688464, + "grad_norm": 0.8229042887687683, + "learning_rate": 0.00019411267309116375, + "loss": 0.2172, + "step": 1400 + }, + { + "epoch": 0.8945095619987662, + "grad_norm": 0.9641085267066956, + "learning_rate": 0.00019267125129650688, + "loss": 0.2501, + "step": 1450 + }, + { + "epoch": 0.9253547193090684, + "grad_norm": 0.6993410587310791, + "learning_rate": 0.00019107874268713254, + "loss": 0.2246, + "step": 1500 + }, + { + "epoch": 0.9561998766193708, + "grad_norm": 1.0574674606323242, + "learning_rate": 0.00018933774361357917, + "loss": 0.2265, + "step": 1550 + }, + { + "epoch": 0.987045033929673, + "grad_norm": 1.0701500177383423, + "learning_rate": 0.0001874510925181983, + "loss": 0.2116, + "step": 1600 + }, + { + "epoch": 1.0178901912399754, + "grad_norm": 0.44411325454711914, + "learning_rate": 0.0001854218653074927, + "loss": 0.1978, + "step": 1650 + }, + { + "epoch": 1.0487353485502775, + "grad_norm": 0.8408161401748657, + "learning_rate": 0.0001832533703373043, + "loss": 0.1701, + "step": 1700 + }, + { + "epoch": 1.0795805058605799, + "grad_norm": 0.7417210936546326, + "learning_rate": 0.0001809491430190276, + "loss": 0.1873, + "step": 1750 + }, + { + "epoch": 1.1104256631708822, + "grad_norm": 0.859341025352478, + "learning_rate": 0.00017851294005564254, + "loss": 0.1717, + "step": 1800 + }, + { + "epoch": 1.1412708204811843, + "grad_norm": 0.6794934272766113, + "learning_rate": 0.0001759487333169642, + "loss": 0.1732, + "step": 1850 + }, + { + "epoch": 1.1721159777914867, + "grad_norm": 0.5129622220993042, + "learning_rate": 0.00017326070336409427, + "loss": 0.172, + "step": 1900 + }, + { + "epoch": 1.202961135101789, + "grad_norm": 0.5867941379547119, + "learning_rate": 0.00017045323263363272, + "loss": 0.1724, + "step": 1950 + }, + { + "epoch": 1.2338062924120914, + "grad_norm": 0.887195348739624, + "learning_rate": 0.0001675308982927608, + "loss": 0.1655, + "step": 2000 + } + ], + "logging_steps": 50, + "max_steps": 4863, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4223873847328768e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/optimizer_0/.metadata b/checkpoint-3000/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..37ca2d696b69fb21b0457a7ab8ed247bceb2181a --- /dev/null +++ b/checkpoint-3000/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:748668c95191b65562b974348ff46cab001eb750c2621623b6058016605c3e32 +size 2626018 diff --git a/checkpoint-3000/optimizer_0/__0_0.distcp b/checkpoint-3000/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..f05a1c6db7c4d0fb4277f0019e802e450a76a544 --- /dev/null +++ b/checkpoint-3000/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca48008f373772e1741eeb4d36bef0d7297c823a1447bd2978fb988e692f16bb +size 55406592 diff --git a/checkpoint-3000/optimizer_0/__1_0.distcp b/checkpoint-3000/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..704492698962e08d04cf561afc7bcb55945cfaf8 --- /dev/null +++ b/checkpoint-3000/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6013b3a6bc635d76777e16cbd177d4f7d4c1b395710711107fd5622c64a38f57 +size 55526656 diff --git a/checkpoint-3000/optimizer_0/__2_0.distcp b/checkpoint-3000/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..34bb4b4090369a218e6615e8add5bcbf620b5cbc --- /dev/null +++ b/checkpoint-3000/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c93d7511f7a39add5b137e48e000282b7812533cbfdc3efe90f6729c16461c95 +size 55480896 diff --git a/checkpoint-3000/optimizer_0/__3_0.distcp b/checkpoint-3000/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c682a80f100f87c120077ac3e9f187a818fbdddc --- /dev/null +++ b/checkpoint-3000/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427d0f56bbecf078e0e6b425a1f79cb61dc4fbcccece107186eef0a1742f572d +size 55480896 diff --git a/checkpoint-3000/optimizer_0/__4_0.distcp b/checkpoint-3000/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6ed6191baae32395e4c90ada82d102cc770eb955 --- /dev/null +++ b/checkpoint-3000/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc67d3145dd2f6b111d0474cf3c60ded686150503f0fe2f64c33962765e2dd7 +size 55480032 diff --git a/checkpoint-3000/optimizer_0/__5_0.distcp b/checkpoint-3000/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..190854c91470dd04546d589bb2c3e0185acfbd12 --- /dev/null +++ b/checkpoint-3000/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e52e3a8b1a88c6dffd9a2ecd404a0fa6b8774820ae24eb595b20313e2697675 +size 55480032 diff --git a/checkpoint-3000/optimizer_0/__6_0.distcp b/checkpoint-3000/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..215ad01abfd6c7721c90c581776b06ce7abb8d87 --- /dev/null +++ b/checkpoint-3000/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd85370b5a75d754b176ec54ec00aada88dca7573a4793f5c5664283ceae2e8e +size 55480032 diff --git a/checkpoint-3000/optimizer_0/__7_0.distcp b/checkpoint-3000/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9a04b86aa639c22bf1d314a9fad712d2fd457bd5 --- /dev/null +++ b/checkpoint-3000/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94257614c2c98373055cfb3a2c8cb93669acdcddab4b7eefff1c31abb1f2c152 +size 55480032 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/.metadata b/checkpoint-3000/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..d935f87f4aec3ac5b946691f02f618ea7bb61e62 --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:003e3a2a9fdb97f4600072d3be9d3e229736edc88d952b24ae72c4f61bec728b +size 1064888 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/__0_0.distcp b/checkpoint-3000/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..85cc515f509bd76e719443857eeec815cca6a918 --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4202fec8ab23ca2c5153cddaf6d839c7d5889212f9559be6fcb275aa3d3f9d9b +size 27702864 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/__1_0.distcp b/checkpoint-3000/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8d56d6c18732101ba519cd08f01883791d9fc790 --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caeb7dc0531f33c123ca588af9d3babb2b0686f12b96184d3ebba6adde3af359 +size 27702864 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/__2_0.distcp b/checkpoint-3000/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..effe37b2a3c922151b8aeb63c3b399ce2f452816 --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b797fcc6072c38623b13b327a3a950d74abec027b7c8e7da5ae4b100f965fdc +size 27702864 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/__3_0.distcp b/checkpoint-3000/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..fb3bbc15aeca109bd5084346efd4fa9f5a2ebf86 --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b84dc035b8a6ffb02a74dbb8d4928f1af7828a0453ab615c9df39be4b57989b +size 27702864 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/__4_0.distcp b/checkpoint-3000/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..db1ac85c7495d50fafd74e091947f43d8b43574d --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bba6f51255ab5ff2bb6c2121829f12fa9356202fd41fb9bbfafd132f6d71e78 +size 27702864 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/__5_0.distcp b/checkpoint-3000/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..83f1ccaf645f5af58f41bdf6009137cc86d92fc7 --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e69acc47113551573a0cfbf4d1724077c876a3d8c84da4ac1f9c6d6a05d5d5f2 +size 27702864 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/__6_0.distcp b/checkpoint-3000/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..010c0c23c59ffdb8f7ab3e105da6de98dbfa6f5f --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d118833f16ea9bff6c6ef6f3ec1bfe83542618745af94776955c443d48f8702f +size 27702864 diff --git a/checkpoint-3000/pytorch_model_fsdp_0/__7_0.distcp b/checkpoint-3000/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8a44962b4b4a4569054989f9c42fe79913a758cb --- /dev/null +++ b/checkpoint-3000/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2707aef405a19f1a8eb966e0bab6a873b854fe0234c9aef563b5b774ab0a4c77 +size 27702864 diff --git a/checkpoint-3000/rng_state_0.pth b/checkpoint-3000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..de5f8d04463b2e8a322d607d35285caebde6b708 --- /dev/null +++ b/checkpoint-3000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eca25bb322aead17a6e263780102b1ce1d5bda15adb680ec5e9903734d1775f0 +size 15984 diff --git a/checkpoint-3000/rng_state_1.pth b/checkpoint-3000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae1dcb1048552c8c007a3061a2644f170c6256b7 --- /dev/null +++ b/checkpoint-3000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b8b65bc3bb99850198e79a1bc480e1bd929c3fc32fa8c0712a81898aa2e3ce +size 15984 diff --git a/checkpoint-3000/rng_state_2.pth b/checkpoint-3000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..c453c1c794ec3178cb0a736a931eaac82fa59269 --- /dev/null +++ b/checkpoint-3000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ed8c34dda7d7d77615a76132fa6c0e1d4b88e8810417cc99f85c3103757b2c +size 15984 diff --git a/checkpoint-3000/rng_state_3.pth b/checkpoint-3000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc741c72424a032f0a09c235821b3fc8387fd0e8 --- /dev/null +++ b/checkpoint-3000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b725351c87c578b0a3419b1b57cbcbaf254424b0c250eb8a5ae10f69f9b6e7 +size 15984 diff --git a/checkpoint-3000/rng_state_4.pth b/checkpoint-3000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbea7558e4a23f6cd1224052beb0aca329e4ce2d --- /dev/null +++ b/checkpoint-3000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:263ebdd0f07ec0f7c3e6c614fb14978f91c43d07facabafdd56c5601a11e8e3e +size 15984 diff --git a/checkpoint-3000/rng_state_5.pth b/checkpoint-3000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..d4b36574448bedbb4dcf98dca7a7ce037f86df91 --- /dev/null +++ b/checkpoint-3000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e36719277cb11d3501943a798709b2cff38b2434c4dfcedc2dc40944af70ee4 +size 15984 diff --git a/checkpoint-3000/rng_state_6.pth b/checkpoint-3000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..5279af3b4e27e521831e34e39e89e093bb83abb4 --- /dev/null +++ b/checkpoint-3000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca07c8047502cf7cf1b00fa640b254d3d3fb558b43aa6c067b949cb89616581a +size 15984 diff --git a/checkpoint-3000/rng_state_7.pth b/checkpoint-3000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..d466cac794b02e122d7bff04cb29412e9e8a8463 --- /dev/null +++ b/checkpoint-3000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed7f35db6314c7a0e9d088bb933e9b58e7ab5338d787dc6263737581f36c31f +size 15984 diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..05e3a9027e3080999b69608dd232315fd6104bd8 --- /dev/null +++ b/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:405b010a3b9e35670a4b0d15effd73f4af3c887c46d143657bcff6553bf6726f +size 1064 diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0c3505462eb711522fde119a8135db68101638fb --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,453 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.850709438618137, + "eval_steps": 500, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.030845157310302282, + "grad_norm": 2.8206074237823486, + "learning_rate": 1.0277492291880782e-05, + "loss": 1.8082, + "step": 50 + }, + { + "epoch": 0.061690314620604564, + "grad_norm": 3.4183013439178467, + "learning_rate": 2.0554984583761563e-05, + "loss": 0.6538, + "step": 100 + }, + { + "epoch": 0.09253547193090685, + "grad_norm": 2.170591354370117, + "learning_rate": 3.083247687564235e-05, + "loss": 0.4563, + "step": 150 + }, + { + "epoch": 0.12338062924120913, + "grad_norm": 1.4687080383300781, + "learning_rate": 4.110996916752313e-05, + "loss": 0.4263, + "step": 200 + }, + { + "epoch": 0.15422578655151142, + "grad_norm": 1.836676836013794, + "learning_rate": 5.1387461459403907e-05, + "loss": 0.3994, + "step": 250 + }, + { + "epoch": 0.1850709438618137, + "grad_norm": 1.2718663215637207, + "learning_rate": 6.16649537512847e-05, + "loss": 0.3665, + "step": 300 + }, + { + "epoch": 0.215916101172116, + "grad_norm": 1.6945191621780396, + "learning_rate": 7.194244604316547e-05, + "loss": 0.3577, + "step": 350 + }, + { + "epoch": 0.24676125848241826, + "grad_norm": 1.2829898595809937, + "learning_rate": 8.221993833504625e-05, + "loss": 0.347, + "step": 400 + }, + { + "epoch": 0.27760641579272055, + "grad_norm": 1.01521635055542, + "learning_rate": 9.249743062692704e-05, + "loss": 0.3288, + "step": 450 + }, + { + "epoch": 0.30845157310302285, + "grad_norm": 1.522111415863037, + "learning_rate": 0.00010277492291880781, + "loss": 0.3267, + "step": 500 + }, + { + "epoch": 0.3392967304133251, + "grad_norm": 0.9678927659988403, + "learning_rate": 0.00011305241521068859, + "loss": 0.3198, + "step": 550 + }, + { + "epoch": 0.3701418877236274, + "grad_norm": 1.2144405841827393, + "learning_rate": 0.0001233299075025694, + "loss": 0.3099, + "step": 600 + }, + { + "epoch": 0.4009870450339297, + "grad_norm": 1.3122639656066895, + "learning_rate": 0.00013360739979445017, + "loss": 0.2929, + "step": 650 + }, + { + "epoch": 0.431832202344232, + "grad_norm": 1.0934101343154907, + "learning_rate": 0.00014388489208633093, + "loss": 0.3003, + "step": 700 + }, + { + "epoch": 0.4626773596545342, + "grad_norm": 0.7938969731330872, + "learning_rate": 0.00015416238437821172, + "loss": 0.2956, + "step": 750 + }, + { + "epoch": 0.4935225169648365, + "grad_norm": 0.6571168303489685, + "learning_rate": 0.0001644398766700925, + "loss": 0.2736, + "step": 800 + }, + { + "epoch": 0.5243676742751388, + "grad_norm": 1.0073938369750977, + "learning_rate": 0.0001747173689619733, + "loss": 0.2892, + "step": 850 + }, + { + "epoch": 0.5552128315854411, + "grad_norm": 0.9874083399772644, + "learning_rate": 0.00018499486125385408, + "loss": 0.2723, + "step": 900 + }, + { + "epoch": 0.5860579888957433, + "grad_norm": 1.1770968437194824, + "learning_rate": 0.00019527235354573487, + "loss": 0.2855, + "step": 950 + }, + { + "epoch": 0.6169031462060457, + "grad_norm": 1.00326669216156, + "learning_rate": 0.00019997622717095418, + "loss": 0.2587, + "step": 1000 + }, + { + "epoch": 0.6477483035163479, + "grad_norm": 1.0380828380584717, + "learning_rate": 0.0001998067088192682, + "loss": 0.2764, + "step": 1050 + }, + { + "epoch": 0.6785934608266502, + "grad_norm": 1.430301547050476, + "learning_rate": 0.00019947447034120033, + "loss": 0.2565, + "step": 1100 + }, + { + "epoch": 0.7094386181369525, + "grad_norm": 1.0970648527145386, + "learning_rate": 0.00019898005340261433, + "loss": 0.2685, + "step": 1150 + }, + { + "epoch": 0.7402837754472548, + "grad_norm": 0.9110261797904968, + "learning_rate": 0.0001983242640774473, + "loss": 0.2489, + "step": 1200 + }, + { + "epoch": 0.7711289327575571, + "grad_norm": 1.0265332460403442, + "learning_rate": 0.00019750817153352506, + "loss": 0.2425, + "step": 1250 + }, + { + "epoch": 0.8019740900678594, + "grad_norm": 0.8820884823799133, + "learning_rate": 0.00019653310628944164, + "loss": 0.2402, + "step": 1300 + }, + { + "epoch": 0.8328192473781616, + "grad_norm": 0.933083713054657, + "learning_rate": 0.00019540065804534467, + "loss": 0.2444, + "step": 1350 + }, + { + "epoch": 0.863664404688464, + "grad_norm": 0.8229042887687683, + "learning_rate": 0.00019411267309116375, + "loss": 0.2172, + "step": 1400 + }, + { + "epoch": 0.8945095619987662, + "grad_norm": 0.9641085267066956, + "learning_rate": 0.00019267125129650688, + "loss": 0.2501, + "step": 1450 + }, + { + "epoch": 0.9253547193090684, + "grad_norm": 0.6993410587310791, + "learning_rate": 0.00019107874268713254, + "loss": 0.2246, + "step": 1500 + }, + { + "epoch": 0.9561998766193708, + "grad_norm": 1.0574674606323242, + "learning_rate": 0.00018933774361357917, + "loss": 0.2265, + "step": 1550 + }, + { + "epoch": 0.987045033929673, + "grad_norm": 1.0701500177383423, + "learning_rate": 0.0001874510925181983, + "loss": 0.2116, + "step": 1600 + }, + { + "epoch": 1.0178901912399754, + "grad_norm": 0.44411325454711914, + "learning_rate": 0.0001854218653074927, + "loss": 0.1978, + "step": 1650 + }, + { + "epoch": 1.0487353485502775, + "grad_norm": 0.8408161401748657, + "learning_rate": 0.0001832533703373043, + "loss": 0.1701, + "step": 1700 + }, + { + "epoch": 1.0795805058605799, + "grad_norm": 0.7417210936546326, + "learning_rate": 0.0001809491430190276, + "loss": 0.1873, + "step": 1750 + }, + { + "epoch": 1.1104256631708822, + "grad_norm": 0.859341025352478, + "learning_rate": 0.00017851294005564254, + "loss": 0.1717, + "step": 1800 + }, + { + "epoch": 1.1412708204811843, + "grad_norm": 0.6794934272766113, + "learning_rate": 0.0001759487333169642, + "loss": 0.1732, + "step": 1850 + }, + { + "epoch": 1.1721159777914867, + "grad_norm": 0.5129622220993042, + "learning_rate": 0.00017326070336409427, + "loss": 0.172, + "step": 1900 + }, + { + "epoch": 1.202961135101789, + "grad_norm": 0.5867941379547119, + "learning_rate": 0.00017045323263363272, + "loss": 0.1724, + "step": 1950 + }, + { + "epoch": 1.2338062924120914, + "grad_norm": 0.887195348739624, + "learning_rate": 0.0001675308982927608, + "loss": 0.1655, + "step": 2000 + }, + { + "epoch": 1.2646514497223935, + "grad_norm": 0.4721340835094452, + "learning_rate": 0.0001644984647768447, + "loss": 0.1642, + "step": 2050 + }, + { + "epoch": 1.2954966070326959, + "grad_norm": 0.551078200340271, + "learning_rate": 0.00016136087602172582, + "loss": 0.1678, + "step": 2100 + }, + { + "epoch": 1.3263417643429982, + "grad_norm": 0.97613525390625, + "learning_rate": 0.00015812324740336248, + "loss": 0.159, + "step": 2150 + }, + { + "epoch": 1.3571869216533003, + "grad_norm": 0.7279055714607239, + "learning_rate": 0.00015479085739796328, + "loss": 0.1612, + "step": 2200 + }, + { + "epoch": 1.3880320789636027, + "grad_norm": 0.9861264228820801, + "learning_rate": 0.0001513691389762097, + "loss": 0.1578, + "step": 2250 + }, + { + "epoch": 1.418877236273905, + "grad_norm": 0.7998865246772766, + "learning_rate": 0.00014786367074559828, + "loss": 0.1569, + "step": 2300 + }, + { + "epoch": 1.4497223935842074, + "grad_norm": 0.851041853427887, + "learning_rate": 0.0001442801678553436, + "loss": 0.1486, + "step": 2350 + }, + { + "epoch": 1.4805675508945095, + "grad_norm": 0.42780816555023193, + "learning_rate": 0.00014062447267866986, + "loss": 0.1486, + "step": 2400 + }, + { + "epoch": 1.5114127082048119, + "grad_norm": 0.7399270534515381, + "learning_rate": 0.00013690254528768225, + "loss": 0.1364, + "step": 2450 + }, + { + "epoch": 1.542257865515114, + "grad_norm": 1.0372874736785889, + "learning_rate": 0.0001331204537363485, + "loss": 0.1393, + "step": 2500 + }, + { + "epoch": 1.5731030228254164, + "grad_norm": 1.0774208307266235, + "learning_rate": 0.00012928436416743098, + "loss": 0.1468, + "step": 2550 + }, + { + "epoch": 1.6039481801357187, + "grad_norm": 0.849371075630188, + "learning_rate": 0.00012540053075949987, + "loss": 0.1357, + "step": 2600 + }, + { + "epoch": 1.634793337446021, + "grad_norm": 0.9047374725341797, + "learning_rate": 0.00012147528553041718, + "loss": 0.1292, + "step": 2650 + }, + { + "epoch": 1.6656384947563234, + "grad_norm": 0.46791982650756836, + "learning_rate": 0.00011751502801391479, + "loss": 0.1308, + "step": 2700 + }, + { + "epoch": 1.6964836520666255, + "grad_norm": 0.8372901082038879, + "learning_rate": 0.00011352621482609807, + "loss": 0.1401, + "step": 2750 + }, + { + "epoch": 1.7273288093769277, + "grad_norm": 0.7648535966873169, + "learning_rate": 0.00010951534913888515, + "loss": 0.1245, + "step": 2800 + }, + { + "epoch": 1.75817396668723, + "grad_norm": 0.4626932442188263, + "learning_rate": 0.00010548897007754374, + "loss": 0.1288, + "step": 2850 + }, + { + "epoch": 1.7890191239975324, + "grad_norm": 0.5342707633972168, + "learning_rate": 0.00010145364205961125, + "loss": 0.1237, + "step": 2900 + }, + { + "epoch": 1.8198642813078347, + "grad_norm": 0.7241202592849731, + "learning_rate": 9.74159440925796e-05, + "loss": 0.1168, + "step": 2950 + }, + { + "epoch": 1.850709438618137, + "grad_norm": 0.48138442635536194, + "learning_rate": 9.338245904779345e-05, + "loss": 0.1149, + "step": 3000 + } + ], + "logging_steps": 50, + "max_steps": 4863, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.1332533710946304e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/optimizer_0/.metadata b/checkpoint-4000/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..972df240e2e891d9e11e317859225a0993ede0d4 --- /dev/null +++ b/checkpoint-4000/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9322e9dd7c71ba94efa40c466e9b54939667bbfb2701a2c40531b897a2433c94 +size 2626018 diff --git a/checkpoint-4000/optimizer_0/__0_0.distcp b/checkpoint-4000/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..83b3e0b2c3af4f97ebf81a0ac58f141f9bedbb38 --- /dev/null +++ b/checkpoint-4000/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19825c2d4809d9c9698a68c9cfa1b72d2c3cd60ca1c7a4661e33f92316fb038f +size 55406592 diff --git a/checkpoint-4000/optimizer_0/__1_0.distcp b/checkpoint-4000/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..445bed14be2561c5d099d87995a55d0be501eaea --- /dev/null +++ b/checkpoint-4000/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ca98957223762ee6d00a486ccd7e131e55687fc48dd20368ced71a216cbce6 +size 55526656 diff --git a/checkpoint-4000/optimizer_0/__2_0.distcp b/checkpoint-4000/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..9881084cfc9351bd0f56c355ecf408ba164f0155 --- /dev/null +++ b/checkpoint-4000/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d73c321efe7d457727b51c2e549815e2c70184ab4b9991d1d2227dc80cc9b730 +size 55480896 diff --git a/checkpoint-4000/optimizer_0/__3_0.distcp b/checkpoint-4000/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c698547e333cfe18cc06c7edd41243a1b9ae6a79 --- /dev/null +++ b/checkpoint-4000/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aeb4cbd0c358b1f758cc8b1d7417ed6072f02222e6ad73a453b3c938370bbab +size 55480896 diff --git a/checkpoint-4000/optimizer_0/__4_0.distcp b/checkpoint-4000/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8eb4453f98abb88a1d4fe1751060d8e2f596d37b --- /dev/null +++ b/checkpoint-4000/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e973ccd48d4cf8c1bec651f12a9ee118f9055637756a7b6402a241b80e0b3894 +size 55480032 diff --git a/checkpoint-4000/optimizer_0/__5_0.distcp b/checkpoint-4000/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7026c2fe606644302d0fcd54c4d95368b9b4ff34 --- /dev/null +++ b/checkpoint-4000/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c710821d5c401d4627e324f08e3e6708925a7f37ae3251824c919bb23db40cb0 +size 55480032 diff --git a/checkpoint-4000/optimizer_0/__6_0.distcp b/checkpoint-4000/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a3e737ddfc426d9596aeda1b1e7c5cecef547426 --- /dev/null +++ b/checkpoint-4000/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5640075fd5aa97dabc664ba7dee1f9f96d93887f6b68ac3761f04b12ec804249 +size 55480032 diff --git a/checkpoint-4000/optimizer_0/__7_0.distcp b/checkpoint-4000/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..87e7072ab2924711991e3e7d1dcfe99a8ad66006 --- /dev/null +++ b/checkpoint-4000/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93faf9de563a4939d4c8c298ab9faf84e10947ade68bb7adc8f5e618d7427827 +size 55480032 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/.metadata b/checkpoint-4000/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..520a9103e4eb33aa6e92abdab7bfce9f052e49c1 --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64db2a01fabe26ebd65bf5e36de8b24fa77dc654c82fb6c3e3718561dbee9a5e +size 1064888 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/__0_0.distcp b/checkpoint-4000/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..53f1af0f5516b5a139d1942d09d0a2874ca9a877 --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4753f4ea88d8c36a857983ff62a8ca5160fa0a5f173c7145e86380c9cebebb4e +size 27702864 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/__1_0.distcp b/checkpoint-4000/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..695643bea840f11f245dba7a35c5343cffceccb0 --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa9c40359c40f6a3a4a6c54e258d2a5c34cbe014737072d7d766df69d5df589 +size 27702864 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/__2_0.distcp b/checkpoint-4000/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1a1cc1c20ee4d41cff1ca2b837d725c2210ff0a3 --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c7ebf6e41bcd0f3d06c1d389ebce3891563d5cf3d93ad04aa402a97bcb48ea +size 27702864 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/__3_0.distcp b/checkpoint-4000/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..1756925e673c2e764e7ba8685498dfc7211fc721 --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea65ca9128696f22a9eaf08ff8f57e703e801846a5875b8eb338a65085c93680 +size 27702864 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/__4_0.distcp b/checkpoint-4000/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..44aa8eeecb845ca05c462af496dd32ee42e7071c --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd049def058808174b25f7d7e8fe1cbef3bec606066cb02489fce2f1df7b37a6 +size 27702864 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/__5_0.distcp b/checkpoint-4000/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..8ecd5ca796119b1320009a4c1a0e9b66b08fc096 --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:374ef6e8b2266662e7de9a142c9092cea9a19ab169171d74039b3364f433a380 +size 27702864 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/__6_0.distcp b/checkpoint-4000/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..700d4d30fe6f8f52e31a841dba75b0cedacde297 --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05bb29fe17e429e3024036164ff69f981ee6d0702de28192bd4edc5c54764712 +size 27702864 diff --git a/checkpoint-4000/pytorch_model_fsdp_0/__7_0.distcp b/checkpoint-4000/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..dcfa104f5d2984db644eafb2ce70bff7baae7aca --- /dev/null +++ b/checkpoint-4000/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95dd1a4b1b80326a8b3d43d5d205b59ed03fbc0f9bbd857564f4c52d01ea748f +size 27702864 diff --git a/checkpoint-4000/rng_state_0.pth b/checkpoint-4000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..f9dbb881e0122b6cc93a79c6b1eaf87d5bb4998c --- /dev/null +++ b/checkpoint-4000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86506be69d6b5008a1c4730d11db100f6bfa6e4e6746904133cbd98e05c8140 +size 15984 diff --git a/checkpoint-4000/rng_state_1.pth b/checkpoint-4000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..efbe65e41d46f9a753f12c910e49bf295719504a --- /dev/null +++ b/checkpoint-4000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96abc72caae2543fd42121f793128f0e338e1d278a7be1484bd51835becd71da +size 15984 diff --git a/checkpoint-4000/rng_state_2.pth b/checkpoint-4000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..af638ad12c8e2741f3b7fac625d4677d1c22b181 --- /dev/null +++ b/checkpoint-4000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b71bf190b6d660361aa4387f876444d32f12d831aa90172906f9257641a37d8 +size 15984 diff --git a/checkpoint-4000/rng_state_3.pth b/checkpoint-4000/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..946df35c07667abc1a3d97ce7147b71abcbd9eb6 --- /dev/null +++ b/checkpoint-4000/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c8270851f0b09f50d687d42602cfb19d7719d7c1d5f2cb4a8ddc75d93cf6b6 +size 15984 diff --git a/checkpoint-4000/rng_state_4.pth b/checkpoint-4000/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..23d797d8d70b176ff6776c4444c3fd9f5e311a08 --- /dev/null +++ b/checkpoint-4000/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf08e27c8257c9011a7bf626eab1cc890253c1b508c8d057982c9ef85b0b676 +size 15984 diff --git a/checkpoint-4000/rng_state_5.pth b/checkpoint-4000/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d4415523940aec35a77f93084e5d8973f0ea787 --- /dev/null +++ b/checkpoint-4000/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d7fd451419d345af3b71e83fe38a4585c9f6c939ef43fee0485c173ad70da4 +size 15984 diff --git a/checkpoint-4000/rng_state_6.pth b/checkpoint-4000/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..824f5ae45aa1d8e9b90624036172ccd3af50a5c8 --- /dev/null +++ b/checkpoint-4000/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d95c31901119f92dc964632c22588848bd1eb88907442995c32a91896dee2a +size 15984 diff --git a/checkpoint-4000/rng_state_7.pth b/checkpoint-4000/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..08f1e10851da2a9fcb4ebceb65359c6ae20e8e5d --- /dev/null +++ b/checkpoint-4000/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5afe8189fce1de46fbdfcbf5d74380ecddd3cf7467e6afdd3075efa77bed0ad5 +size 15984 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..425415ecd897996c6e6f53b9455c7859576993cd --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44069a7ff09a1ccfdde9a13816338abfb57d4782e6a325028474e61dd6c56b0e +size 1064 diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4966cce8762e3fac2391853ff5fc157acac04722 --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,593 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.4676125848241828, + "eval_steps": 500, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.030845157310302282, + "grad_norm": 2.8206074237823486, + "learning_rate": 1.0277492291880782e-05, + "loss": 1.8082, + "step": 50 + }, + { + "epoch": 0.061690314620604564, + "grad_norm": 3.4183013439178467, + "learning_rate": 2.0554984583761563e-05, + "loss": 0.6538, + "step": 100 + }, + { + "epoch": 0.09253547193090685, + "grad_norm": 2.170591354370117, + "learning_rate": 3.083247687564235e-05, + "loss": 0.4563, + "step": 150 + }, + { + "epoch": 0.12338062924120913, + "grad_norm": 1.4687080383300781, + "learning_rate": 4.110996916752313e-05, + "loss": 0.4263, + "step": 200 + }, + { + "epoch": 0.15422578655151142, + "grad_norm": 1.836676836013794, + "learning_rate": 5.1387461459403907e-05, + "loss": 0.3994, + "step": 250 + }, + { + "epoch": 0.1850709438618137, + "grad_norm": 1.2718663215637207, + "learning_rate": 6.16649537512847e-05, + "loss": 0.3665, + "step": 300 + }, + { + "epoch": 0.215916101172116, + "grad_norm": 1.6945191621780396, + "learning_rate": 7.194244604316547e-05, + "loss": 0.3577, + "step": 350 + }, + { + "epoch": 0.24676125848241826, + "grad_norm": 1.2829898595809937, + "learning_rate": 8.221993833504625e-05, + "loss": 0.347, + "step": 400 + }, + { + "epoch": 0.27760641579272055, + "grad_norm": 1.01521635055542, + "learning_rate": 9.249743062692704e-05, + "loss": 0.3288, + "step": 450 + }, + { + "epoch": 0.30845157310302285, + "grad_norm": 1.522111415863037, + "learning_rate": 0.00010277492291880781, + "loss": 0.3267, + "step": 500 + }, + { + "epoch": 0.3392967304133251, + "grad_norm": 0.9678927659988403, + "learning_rate": 0.00011305241521068859, + "loss": 0.3198, + "step": 550 + }, + { + "epoch": 0.3701418877236274, + "grad_norm": 1.2144405841827393, + "learning_rate": 0.0001233299075025694, + "loss": 0.3099, + "step": 600 + }, + { + "epoch": 0.4009870450339297, + "grad_norm": 1.3122639656066895, + "learning_rate": 0.00013360739979445017, + "loss": 0.2929, + "step": 650 + }, + { + "epoch": 0.431832202344232, + "grad_norm": 1.0934101343154907, + "learning_rate": 0.00014388489208633093, + "loss": 0.3003, + "step": 700 + }, + { + "epoch": 0.4626773596545342, + "grad_norm": 0.7938969731330872, + "learning_rate": 0.00015416238437821172, + "loss": 0.2956, + "step": 750 + }, + { + "epoch": 0.4935225169648365, + "grad_norm": 0.6571168303489685, + "learning_rate": 0.0001644398766700925, + "loss": 0.2736, + "step": 800 + }, + { + "epoch": 0.5243676742751388, + "grad_norm": 1.0073938369750977, + "learning_rate": 0.0001747173689619733, + "loss": 0.2892, + "step": 850 + }, + { + "epoch": 0.5552128315854411, + "grad_norm": 0.9874083399772644, + "learning_rate": 0.00018499486125385408, + "loss": 0.2723, + "step": 900 + }, + { + "epoch": 0.5860579888957433, + "grad_norm": 1.1770968437194824, + "learning_rate": 0.00019527235354573487, + "loss": 0.2855, + "step": 950 + }, + { + "epoch": 0.6169031462060457, + "grad_norm": 1.00326669216156, + "learning_rate": 0.00019997622717095418, + "loss": 0.2587, + "step": 1000 + }, + { + "epoch": 0.6477483035163479, + "grad_norm": 1.0380828380584717, + "learning_rate": 0.0001998067088192682, + "loss": 0.2764, + "step": 1050 + }, + { + "epoch": 0.6785934608266502, + "grad_norm": 1.430301547050476, + "learning_rate": 0.00019947447034120033, + "loss": 0.2565, + "step": 1100 + }, + { + "epoch": 0.7094386181369525, + "grad_norm": 1.0970648527145386, + "learning_rate": 0.00019898005340261433, + "loss": 0.2685, + "step": 1150 + }, + { + "epoch": 0.7402837754472548, + "grad_norm": 0.9110261797904968, + "learning_rate": 0.0001983242640774473, + "loss": 0.2489, + "step": 1200 + }, + { + "epoch": 0.7711289327575571, + "grad_norm": 1.0265332460403442, + "learning_rate": 0.00019750817153352506, + "loss": 0.2425, + "step": 1250 + }, + { + "epoch": 0.8019740900678594, + "grad_norm": 0.8820884823799133, + "learning_rate": 0.00019653310628944164, + "loss": 0.2402, + "step": 1300 + }, + { + "epoch": 0.8328192473781616, + "grad_norm": 0.933083713054657, + "learning_rate": 0.00019540065804534467, + "loss": 0.2444, + "step": 1350 + }, + { + "epoch": 0.863664404688464, + "grad_norm": 0.8229042887687683, + "learning_rate": 0.00019411267309116375, + "loss": 0.2172, + "step": 1400 + }, + { + "epoch": 0.8945095619987662, + "grad_norm": 0.9641085267066956, + "learning_rate": 0.00019267125129650688, + "loss": 0.2501, + "step": 1450 + }, + { + "epoch": 0.9253547193090684, + "grad_norm": 0.6993410587310791, + "learning_rate": 0.00019107874268713254, + "loss": 0.2246, + "step": 1500 + }, + { + "epoch": 0.9561998766193708, + "grad_norm": 1.0574674606323242, + "learning_rate": 0.00018933774361357917, + "loss": 0.2265, + "step": 1550 + }, + { + "epoch": 0.987045033929673, + "grad_norm": 1.0701500177383423, + "learning_rate": 0.0001874510925181983, + "loss": 0.2116, + "step": 1600 + }, + { + "epoch": 1.0178901912399754, + "grad_norm": 0.44411325454711914, + "learning_rate": 0.0001854218653074927, + "loss": 0.1978, + "step": 1650 + }, + { + "epoch": 1.0487353485502775, + "grad_norm": 0.8408161401748657, + "learning_rate": 0.0001832533703373043, + "loss": 0.1701, + "step": 1700 + }, + { + "epoch": 1.0795805058605799, + "grad_norm": 0.7417210936546326, + "learning_rate": 0.0001809491430190276, + "loss": 0.1873, + "step": 1750 + }, + { + "epoch": 1.1104256631708822, + "grad_norm": 0.859341025352478, + "learning_rate": 0.00017851294005564254, + "loss": 0.1717, + "step": 1800 + }, + { + "epoch": 1.1412708204811843, + "grad_norm": 0.6794934272766113, + "learning_rate": 0.0001759487333169642, + "loss": 0.1732, + "step": 1850 + }, + { + "epoch": 1.1721159777914867, + "grad_norm": 0.5129622220993042, + "learning_rate": 0.00017326070336409427, + "loss": 0.172, + "step": 1900 + }, + { + "epoch": 1.202961135101789, + "grad_norm": 0.5867941379547119, + "learning_rate": 0.00017045323263363272, + "loss": 0.1724, + "step": 1950 + }, + { + "epoch": 1.2338062924120914, + "grad_norm": 0.887195348739624, + "learning_rate": 0.0001675308982927608, + "loss": 0.1655, + "step": 2000 + }, + { + "epoch": 1.2646514497223935, + "grad_norm": 0.4721340835094452, + "learning_rate": 0.0001644984647768447, + "loss": 0.1642, + "step": 2050 + }, + { + "epoch": 1.2954966070326959, + "grad_norm": 0.551078200340271, + "learning_rate": 0.00016136087602172582, + "loss": 0.1678, + "step": 2100 + }, + { + "epoch": 1.3263417643429982, + "grad_norm": 0.97613525390625, + "learning_rate": 0.00015812324740336248, + "loss": 0.159, + "step": 2150 + }, + { + "epoch": 1.3571869216533003, + "grad_norm": 0.7279055714607239, + "learning_rate": 0.00015479085739796328, + "loss": 0.1612, + "step": 2200 + }, + { + "epoch": 1.3880320789636027, + "grad_norm": 0.9861264228820801, + "learning_rate": 0.0001513691389762097, + "loss": 0.1578, + "step": 2250 + }, + { + "epoch": 1.418877236273905, + "grad_norm": 0.7998865246772766, + "learning_rate": 0.00014786367074559828, + "loss": 0.1569, + "step": 2300 + }, + { + "epoch": 1.4497223935842074, + "grad_norm": 0.851041853427887, + "learning_rate": 0.0001442801678553436, + "loss": 0.1486, + "step": 2350 + }, + { + "epoch": 1.4805675508945095, + "grad_norm": 0.42780816555023193, + "learning_rate": 0.00014062447267866986, + "loss": 0.1486, + "step": 2400 + }, + { + "epoch": 1.5114127082048119, + "grad_norm": 0.7399270534515381, + "learning_rate": 0.00013690254528768225, + "loss": 0.1364, + "step": 2450 + }, + { + "epoch": 1.542257865515114, + "grad_norm": 1.0372874736785889, + "learning_rate": 0.0001331204537363485, + "loss": 0.1393, + "step": 2500 + }, + { + "epoch": 1.5731030228254164, + "grad_norm": 1.0774208307266235, + "learning_rate": 0.00012928436416743098, + "loss": 0.1468, + "step": 2550 + }, + { + "epoch": 1.6039481801357187, + "grad_norm": 0.849371075630188, + "learning_rate": 0.00012540053075949987, + "loss": 0.1357, + "step": 2600 + }, + { + "epoch": 1.634793337446021, + "grad_norm": 0.9047374725341797, + "learning_rate": 0.00012147528553041718, + "loss": 0.1292, + "step": 2650 + }, + { + "epoch": 1.6656384947563234, + "grad_norm": 0.46791982650756836, + "learning_rate": 0.00011751502801391479, + "loss": 0.1308, + "step": 2700 + }, + { + "epoch": 1.6964836520666255, + "grad_norm": 0.8372901082038879, + "learning_rate": 0.00011352621482609807, + "loss": 0.1401, + "step": 2750 + }, + { + "epoch": 1.7273288093769277, + "grad_norm": 0.7648535966873169, + "learning_rate": 0.00010951534913888515, + "loss": 0.1245, + "step": 2800 + }, + { + "epoch": 1.75817396668723, + "grad_norm": 0.4626932442188263, + "learning_rate": 0.00010548897007754374, + "loss": 0.1288, + "step": 2850 + }, + { + "epoch": 1.7890191239975324, + "grad_norm": 0.5342707633972168, + "learning_rate": 0.00010145364205961125, + "loss": 0.1237, + "step": 2900 + }, + { + "epoch": 1.8198642813078347, + "grad_norm": 0.7241202592849731, + "learning_rate": 9.74159440925796e-05, + "loss": 0.1168, + "step": 2950 + }, + { + "epoch": 1.850709438618137, + "grad_norm": 0.48138442635536194, + "learning_rate": 9.338245904779345e-05, + "loss": 0.1149, + "step": 3000 + }, + { + "epoch": 1.8815545959284392, + "grad_norm": 0.5214439630508423, + "learning_rate": 8.93597629280487e-05, + "loss": 0.1175, + "step": 3050 + }, + { + "epoch": 1.9123997532387416, + "grad_norm": 0.5231944918632507, + "learning_rate": 8.535441414638937e-05, + "loss": 0.1168, + "step": 3100 + }, + { + "epoch": 1.9432449105490437, + "grad_norm": 0.7800565958023071, + "learning_rate": 8.13729428335819e-05, + "loss": 0.1081, + "step": 3150 + }, + { + "epoch": 1.974090067859346, + "grad_norm": 0.592022716999054, + "learning_rate": 7.742184019169945e-05, + "loss": 0.115, + "step": 3200 + }, + { + "epoch": 2.0049352251696484, + "grad_norm": 0.4546678960323334, + "learning_rate": 7.350754791117384e-05, + "loss": 0.1002, + "step": 3250 + }, + { + "epoch": 2.0357803824799507, + "grad_norm": 0.4683123826980591, + "learning_rate": 6.963644766856894e-05, + "loss": 0.0696, + "step": 3300 + }, + { + "epoch": 2.066625539790253, + "grad_norm": 0.4901474416255951, + "learning_rate": 6.581485072219755e-05, + "loss": 0.0696, + "step": 3350 + }, + { + "epoch": 2.097470697100555, + "grad_norm": 0.7581807971000671, + "learning_rate": 6.204898762254524e-05, + "loss": 0.0705, + "step": 3400 + }, + { + "epoch": 2.1283158544108574, + "grad_norm": 0.5819096565246582, + "learning_rate": 5.8344998054276115e-05, + "loss": 0.0695, + "step": 3450 + }, + { + "epoch": 2.1591610117211597, + "grad_norm": 0.7029954791069031, + "learning_rate": 5.4708920826382035e-05, + "loss": 0.0683, + "step": 3500 + }, + { + "epoch": 2.190006169031462, + "grad_norm": 0.6168348789215088, + "learning_rate": 5.114668402679472e-05, + "loss": 0.0675, + "step": 3550 + }, + { + "epoch": 2.2208513263417644, + "grad_norm": 0.5696262717247009, + "learning_rate": 4.766409535751225e-05, + "loss": 0.0678, + "step": 3600 + }, + { + "epoch": 2.2516964836520668, + "grad_norm": 0.7154285907745361, + "learning_rate": 4.426683266599702e-05, + "loss": 0.0655, + "step": 3650 + }, + { + "epoch": 2.2825416409623687, + "grad_norm": 0.6194272041320801, + "learning_rate": 4.0960434688282515e-05, + "loss": 0.0623, + "step": 3700 + }, + { + "epoch": 2.313386798272671, + "grad_norm": 0.4507332444190979, + "learning_rate": 3.775029201888051e-05, + "loss": 0.0645, + "step": 3750 + }, + { + "epoch": 2.3442319555829734, + "grad_norm": 0.42285481095314026, + "learning_rate": 3.4641638322211456e-05, + "loss": 0.0623, + "step": 3800 + }, + { + "epoch": 2.3750771128932757, + "grad_norm": 0.4432896077632904, + "learning_rate": 3.1639541799886083e-05, + "loss": 0.0625, + "step": 3850 + }, + { + "epoch": 2.405922270203578, + "grad_norm": 0.4708622097969055, + "learning_rate": 2.874889692774978e-05, + "loss": 0.063, + "step": 3900 + }, + { + "epoch": 2.4367674275138804, + "grad_norm": 0.30538269877433777, + "learning_rate": 2.5974416476161167e-05, + "loss": 0.06, + "step": 3950 + }, + { + "epoch": 2.4676125848241828, + "grad_norm": 0.7072311639785767, + "learning_rate": 2.3320623826514897e-05, + "loss": 0.0623, + "step": 4000 + } + ], + "logging_steps": 50, + "max_steps": 4863, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.8423096729862144e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4863/optimizer_0/.metadata b/checkpoint-4863/optimizer_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..010160821ededc434705c3f720d2c8d80c7cfbab --- /dev/null +++ b/checkpoint-4863/optimizer_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27378502f7a970eecbcb6e2f6200d23d0ea9c1e8a0d45bd1a8fdb1ac02a6994a +size 2626018 diff --git a/checkpoint-4863/optimizer_0/__0_0.distcp b/checkpoint-4863/optimizer_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7521f228d9e735e19fb4dd7a9336ad710d261ac4 --- /dev/null +++ b/checkpoint-4863/optimizer_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57113c86b12620e176c6ea79f71c1a46b1846858a3ae5ecd230bb6beb36a4fa1 +size 55406592 diff --git a/checkpoint-4863/optimizer_0/__1_0.distcp b/checkpoint-4863/optimizer_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..296961f0ca85cfe9bcbd46f8b62b4d90e2f86c2c --- /dev/null +++ b/checkpoint-4863/optimizer_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c83adefb200d9587dff16e252bc5af691723b858aa949fa74c1597830f1b866d +size 55526656 diff --git a/checkpoint-4863/optimizer_0/__2_0.distcp b/checkpoint-4863/optimizer_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..765bf667cd4b2a7255d718e79b19b316341c4bd6 --- /dev/null +++ b/checkpoint-4863/optimizer_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:290283a303874485eec53e27aa654a8f828ac62357d193e6d3904cf05e05a8d2 +size 55480896 diff --git a/checkpoint-4863/optimizer_0/__3_0.distcp b/checkpoint-4863/optimizer_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..272586037009cbc2d03cd54c8b0358ff5a615e5b --- /dev/null +++ b/checkpoint-4863/optimizer_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b579190aabfb44e13b70973b40e8d44eeda3cd064dee3a1c3caae815eff99829 +size 55480896 diff --git a/checkpoint-4863/optimizer_0/__4_0.distcp b/checkpoint-4863/optimizer_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5b65f8ee709653504860fbedb7d103ee5e1a065b --- /dev/null +++ b/checkpoint-4863/optimizer_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15778e2ac4fe2d51b157c7e823ec025019cf7c7bf026d327c47c319acd902b11 +size 55480032 diff --git a/checkpoint-4863/optimizer_0/__5_0.distcp b/checkpoint-4863/optimizer_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a0607d972ebd1dfd7cabd7f15f8f8ae68aff36f3 --- /dev/null +++ b/checkpoint-4863/optimizer_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c3d2518ce345da2fd871b20483cb1b73a0744c757834775158d71bb82b559f1 +size 55480032 diff --git a/checkpoint-4863/optimizer_0/__6_0.distcp b/checkpoint-4863/optimizer_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..cb75faebfdd8c2afb06cd1fdecefd1868f83a7e7 --- /dev/null +++ b/checkpoint-4863/optimizer_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f83d9cc1d6500e842b6991db7fdb6fa9ea2b36f8989a479579ce6a5b4650dfe +size 55480032 diff --git a/checkpoint-4863/optimizer_0/__7_0.distcp b/checkpoint-4863/optimizer_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7a52e55c5e7fd76e60bbd1f8248d3fd43588c3f8 --- /dev/null +++ b/checkpoint-4863/optimizer_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d391bc682490268b6a32abb5e74423c408f7fce769569aa0e79672fe7c0f910c +size 55480032 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/.metadata b/checkpoint-4863/pytorch_model_fsdp_0/.metadata new file mode 100644 index 0000000000000000000000000000000000000000..1c7815f4879f595a735061ab8a3f67e979a32fe8 --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/.metadata @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:266f81efdff43641e4b32feaebc54844f92fad01fcb57f19c882f718f70c0050 +size 1064888 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/__0_0.distcp b/checkpoint-4863/pytorch_model_fsdp_0/__0_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..bea39d58e58a057aea6cac05c69ff8112f84ba2c --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/__0_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b1c1736e66f8a558380cfbf35f726b076391df4bd9651b39fe6e06f4e50a5e +size 27702864 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/__1_0.distcp b/checkpoint-4863/pytorch_model_fsdp_0/__1_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..6d8dd7edfd9866176bc46a8efc2528d003bb1310 --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/__1_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d65e04ea450a9c53dfbcd86b461715564bc6784c7f4ea569c2f1779210e6a25 +size 27702864 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/__2_0.distcp b/checkpoint-4863/pytorch_model_fsdp_0/__2_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7e81f5fb78df67dea401f1b14c21fc3b08756ff6 --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/__2_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baafffa91875798d4c38139cccfa484a31d1933ca2d25b58c0801fc11924db3f +size 27702864 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/__3_0.distcp b/checkpoint-4863/pytorch_model_fsdp_0/__3_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..5971b319505fd0450adaa77fba67f555ef5c1680 --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/__3_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ace2e4d4de019c3b000b27b9bbbc85f6270273a45590a324fb17cd40c9b8f1cb +size 27702864 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/__4_0.distcp b/checkpoint-4863/pytorch_model_fsdp_0/__4_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..7449152150c71ee2b039d747321a3398984d3350 --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/__4_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce7d63cb4197b75a4f52bd9c35340d5853b4ded78e816140a278532c5d5f2d13 +size 27702864 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/__5_0.distcp b/checkpoint-4863/pytorch_model_fsdp_0/__5_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..a948b2adda408ed28e162094c1c67831e020c1aa --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/__5_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27cb902f77b13e5b2b1042829bcb1985f5ef5f5beec8f25806f18ec9711f8f49 +size 27702864 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/__6_0.distcp b/checkpoint-4863/pytorch_model_fsdp_0/__6_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..42212129c19121c7e6d292d353c879dbd2385752 --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/__6_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f5ba23d8045382880d0d3240072c65304b2621ba5486d781478ad89cecfadb1 +size 27702864 diff --git a/checkpoint-4863/pytorch_model_fsdp_0/__7_0.distcp b/checkpoint-4863/pytorch_model_fsdp_0/__7_0.distcp new file mode 100644 index 0000000000000000000000000000000000000000..c6e72fb9cf6d5650f0acf28e6469fce288888968 --- /dev/null +++ b/checkpoint-4863/pytorch_model_fsdp_0/__7_0.distcp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e133263af7bbe190b7a3009c8b2d109ebc581a6f7c1134e1827f349c1cbeb3 +size 27702864 diff --git a/checkpoint-4863/rng_state_0.pth b/checkpoint-4863/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d92240c8ece96fa5b9cafa3847fd9993a6f714e --- /dev/null +++ b/checkpoint-4863/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:963a888980d912f39ceaa63e96be99ed9eaa2b65a918200a6ec746d26717e328 +size 15984 diff --git a/checkpoint-4863/rng_state_1.pth b/checkpoint-4863/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..50a0c53dc48427c016f6355f78b2b335fd67a7d9 --- /dev/null +++ b/checkpoint-4863/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c887add6f9e1324ae7522d9d44be8cddc3aee818f2bbbfcfd740f032d59f2711 +size 15984 diff --git a/checkpoint-4863/rng_state_2.pth b/checkpoint-4863/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a59a4df0c5ab961f05fe6f87ac8259cde1241f7 --- /dev/null +++ b/checkpoint-4863/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0559f88471aa8d99b1d68d0b50f74fd31bf0fd8cd2492881f2c9318f91a360f9 +size 15984 diff --git a/checkpoint-4863/rng_state_3.pth b/checkpoint-4863/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..8cb14b84bff385705ebf0754ff1ecc0a3d04fc9d --- /dev/null +++ b/checkpoint-4863/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5e66cfc47f4bce3d355fd4e6a94fccce574aacb351e7c3535b94e09003fa27 +size 15984 diff --git a/checkpoint-4863/rng_state_4.pth b/checkpoint-4863/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..9565ff6a0ffc9dab3cfad0500a6f82d45a0058c2 --- /dev/null +++ b/checkpoint-4863/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94911f72db334e37aaf993b63a45eea16bd1bdbe9e22ac0ee20a8a1c09131363 +size 15984 diff --git a/checkpoint-4863/rng_state_5.pth b/checkpoint-4863/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fa31c8d1a2aec5f4f80c4f75139cdfd885ac31a --- /dev/null +++ b/checkpoint-4863/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80454c8c1a42455cede14e6b11c60bd8e37008f028ae8fa221b8af2baaec8e8 +size 15984 diff --git a/checkpoint-4863/rng_state_6.pth b/checkpoint-4863/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e7b777c8b16c96582b1c3152fbf67f8ec3e4ac5 --- /dev/null +++ b/checkpoint-4863/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7562f85ad4c383c46c521a327860f7369bc5596d8658ae24e7c5d7e6310c96 +size 15984 diff --git a/checkpoint-4863/rng_state_7.pth b/checkpoint-4863/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..e50aafc5109215dd5f3a73cf315672d62075fde3 --- /dev/null +++ b/checkpoint-4863/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d724ba84d599dde870ac406284f5550a2109fc51e3f24cca99658379d3fa45 +size 15984 diff --git a/checkpoint-4863/scheduler.pt b/checkpoint-4863/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fca7733ca8ede6b6b03aa2ee841b827c0d0497e1 --- /dev/null +++ b/checkpoint-4863/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed612c5444de7ebcf86fa50d052e74a66eb9bb74f52652af5a64371a32164ff7 +size 1064 diff --git a/checkpoint-4863/trainer_state.json b/checkpoint-4863/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..613f92d9dbf90f4430ea099cf6b1a5ad9d27ebbc --- /dev/null +++ b/checkpoint-4863/trainer_state.json @@ -0,0 +1,712 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 4863, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.030845157310302282, + "grad_norm": 2.8206074237823486, + "learning_rate": 1.0277492291880782e-05, + "loss": 1.8082, + "step": 50 + }, + { + "epoch": 0.061690314620604564, + "grad_norm": 3.4183013439178467, + "learning_rate": 2.0554984583761563e-05, + "loss": 0.6538, + "step": 100 + }, + { + "epoch": 0.09253547193090685, + "grad_norm": 2.170591354370117, + "learning_rate": 3.083247687564235e-05, + "loss": 0.4563, + "step": 150 + }, + { + "epoch": 0.12338062924120913, + "grad_norm": 1.4687080383300781, + "learning_rate": 4.110996916752313e-05, + "loss": 0.4263, + "step": 200 + }, + { + "epoch": 0.15422578655151142, + "grad_norm": 1.836676836013794, + "learning_rate": 5.1387461459403907e-05, + "loss": 0.3994, + "step": 250 + }, + { + "epoch": 0.1850709438618137, + "grad_norm": 1.2718663215637207, + "learning_rate": 6.16649537512847e-05, + "loss": 0.3665, + "step": 300 + }, + { + "epoch": 0.215916101172116, + "grad_norm": 1.6945191621780396, + "learning_rate": 7.194244604316547e-05, + "loss": 0.3577, + "step": 350 + }, + { + "epoch": 0.24676125848241826, + "grad_norm": 1.2829898595809937, + "learning_rate": 8.221993833504625e-05, + "loss": 0.347, + "step": 400 + }, + { + "epoch": 0.27760641579272055, + "grad_norm": 1.01521635055542, + "learning_rate": 9.249743062692704e-05, + "loss": 0.3288, + "step": 450 + }, + { + "epoch": 0.30845157310302285, + "grad_norm": 1.522111415863037, + "learning_rate": 0.00010277492291880781, + "loss": 0.3267, + "step": 500 + }, + { + "epoch": 0.3392967304133251, + "grad_norm": 0.9678927659988403, + "learning_rate": 0.00011305241521068859, + "loss": 0.3198, + "step": 550 + }, + { + "epoch": 0.3701418877236274, + "grad_norm": 1.2144405841827393, + "learning_rate": 0.0001233299075025694, + "loss": 0.3099, + "step": 600 + }, + { + "epoch": 0.4009870450339297, + "grad_norm": 1.3122639656066895, + "learning_rate": 0.00013360739979445017, + "loss": 0.2929, + "step": 650 + }, + { + "epoch": 0.431832202344232, + "grad_norm": 1.0934101343154907, + "learning_rate": 0.00014388489208633093, + "loss": 0.3003, + "step": 700 + }, + { + "epoch": 0.4626773596545342, + "grad_norm": 0.7938969731330872, + "learning_rate": 0.00015416238437821172, + "loss": 0.2956, + "step": 750 + }, + { + "epoch": 0.4935225169648365, + "grad_norm": 0.6571168303489685, + "learning_rate": 0.0001644398766700925, + "loss": 0.2736, + "step": 800 + }, + { + "epoch": 0.5243676742751388, + "grad_norm": 1.0073938369750977, + "learning_rate": 0.0001747173689619733, + "loss": 0.2892, + "step": 850 + }, + { + "epoch": 0.5552128315854411, + "grad_norm": 0.9874083399772644, + "learning_rate": 0.00018499486125385408, + "loss": 0.2723, + "step": 900 + }, + { + "epoch": 0.5860579888957433, + "grad_norm": 1.1770968437194824, + "learning_rate": 0.00019527235354573487, + "loss": 0.2855, + "step": 950 + }, + { + "epoch": 0.6169031462060457, + "grad_norm": 1.00326669216156, + "learning_rate": 0.00019997622717095418, + "loss": 0.2587, + "step": 1000 + }, + { + "epoch": 0.6477483035163479, + "grad_norm": 1.0380828380584717, + "learning_rate": 0.0001998067088192682, + "loss": 0.2764, + "step": 1050 + }, + { + "epoch": 0.6785934608266502, + "grad_norm": 1.430301547050476, + "learning_rate": 0.00019947447034120033, + "loss": 0.2565, + "step": 1100 + }, + { + "epoch": 0.7094386181369525, + "grad_norm": 1.0970648527145386, + "learning_rate": 0.00019898005340261433, + "loss": 0.2685, + "step": 1150 + }, + { + "epoch": 0.7402837754472548, + "grad_norm": 0.9110261797904968, + "learning_rate": 0.0001983242640774473, + "loss": 0.2489, + "step": 1200 + }, + { + "epoch": 0.7711289327575571, + "grad_norm": 1.0265332460403442, + "learning_rate": 0.00019750817153352506, + "loss": 0.2425, + "step": 1250 + }, + { + "epoch": 0.8019740900678594, + "grad_norm": 0.8820884823799133, + "learning_rate": 0.00019653310628944164, + "loss": 0.2402, + "step": 1300 + }, + { + "epoch": 0.8328192473781616, + "grad_norm": 0.933083713054657, + "learning_rate": 0.00019540065804534467, + "loss": 0.2444, + "step": 1350 + }, + { + "epoch": 0.863664404688464, + "grad_norm": 0.8229042887687683, + "learning_rate": 0.00019411267309116375, + "loss": 0.2172, + "step": 1400 + }, + { + "epoch": 0.8945095619987662, + "grad_norm": 0.9641085267066956, + "learning_rate": 0.00019267125129650688, + "loss": 0.2501, + "step": 1450 + }, + { + "epoch": 0.9253547193090684, + "grad_norm": 0.6993410587310791, + "learning_rate": 0.00019107874268713254, + "loss": 0.2246, + "step": 1500 + }, + { + "epoch": 0.9561998766193708, + "grad_norm": 1.0574674606323242, + "learning_rate": 0.00018933774361357917, + "loss": 0.2265, + "step": 1550 + }, + { + "epoch": 0.987045033929673, + "grad_norm": 1.0701500177383423, + "learning_rate": 0.0001874510925181983, + "loss": 0.2116, + "step": 1600 + }, + { + "epoch": 1.0178901912399754, + "grad_norm": 0.44411325454711914, + "learning_rate": 0.0001854218653074927, + "loss": 0.1978, + "step": 1650 + }, + { + "epoch": 1.0487353485502775, + "grad_norm": 0.8408161401748657, + "learning_rate": 0.0001832533703373043, + "loss": 0.1701, + "step": 1700 + }, + { + "epoch": 1.0795805058605799, + "grad_norm": 0.7417210936546326, + "learning_rate": 0.0001809491430190276, + "loss": 0.1873, + "step": 1750 + }, + { + "epoch": 1.1104256631708822, + "grad_norm": 0.859341025352478, + "learning_rate": 0.00017851294005564254, + "loss": 0.1717, + "step": 1800 + }, + { + "epoch": 1.1412708204811843, + "grad_norm": 0.6794934272766113, + "learning_rate": 0.0001759487333169642, + "loss": 0.1732, + "step": 1850 + }, + { + "epoch": 1.1721159777914867, + "grad_norm": 0.5129622220993042, + "learning_rate": 0.00017326070336409427, + "loss": 0.172, + "step": 1900 + }, + { + "epoch": 1.202961135101789, + "grad_norm": 0.5867941379547119, + "learning_rate": 0.00017045323263363272, + "loss": 0.1724, + "step": 1950 + }, + { + "epoch": 1.2338062924120914, + "grad_norm": 0.887195348739624, + "learning_rate": 0.0001675308982927608, + "loss": 0.1655, + "step": 2000 + }, + { + "epoch": 1.2646514497223935, + "grad_norm": 0.4721340835094452, + "learning_rate": 0.0001644984647768447, + "loss": 0.1642, + "step": 2050 + }, + { + "epoch": 1.2954966070326959, + "grad_norm": 0.551078200340271, + "learning_rate": 0.00016136087602172582, + "loss": 0.1678, + "step": 2100 + }, + { + "epoch": 1.3263417643429982, + "grad_norm": 0.97613525390625, + "learning_rate": 0.00015812324740336248, + "loss": 0.159, + "step": 2150 + }, + { + "epoch": 1.3571869216533003, + "grad_norm": 0.7279055714607239, + "learning_rate": 0.00015479085739796328, + "loss": 0.1612, + "step": 2200 + }, + { + "epoch": 1.3880320789636027, + "grad_norm": 0.9861264228820801, + "learning_rate": 0.0001513691389762097, + "loss": 0.1578, + "step": 2250 + }, + { + "epoch": 1.418877236273905, + "grad_norm": 0.7998865246772766, + "learning_rate": 0.00014786367074559828, + "loss": 0.1569, + "step": 2300 + }, + { + "epoch": 1.4497223935842074, + "grad_norm": 0.851041853427887, + "learning_rate": 0.0001442801678553436, + "loss": 0.1486, + "step": 2350 + }, + { + "epoch": 1.4805675508945095, + "grad_norm": 0.42780816555023193, + "learning_rate": 0.00014062447267866986, + "loss": 0.1486, + "step": 2400 + }, + { + "epoch": 1.5114127082048119, + "grad_norm": 0.7399270534515381, + "learning_rate": 0.00013690254528768225, + "loss": 0.1364, + "step": 2450 + }, + { + "epoch": 1.542257865515114, + "grad_norm": 1.0372874736785889, + "learning_rate": 0.0001331204537363485, + "loss": 0.1393, + "step": 2500 + }, + { + "epoch": 1.5731030228254164, + "grad_norm": 1.0774208307266235, + "learning_rate": 0.00012928436416743098, + "loss": 0.1468, + "step": 2550 + }, + { + "epoch": 1.6039481801357187, + "grad_norm": 0.849371075630188, + "learning_rate": 0.00012540053075949987, + "loss": 0.1357, + "step": 2600 + }, + { + "epoch": 1.634793337446021, + "grad_norm": 0.9047374725341797, + "learning_rate": 0.00012147528553041718, + "loss": 0.1292, + "step": 2650 + }, + { + "epoch": 1.6656384947563234, + "grad_norm": 0.46791982650756836, + "learning_rate": 0.00011751502801391479, + "loss": 0.1308, + "step": 2700 + }, + { + "epoch": 1.6964836520666255, + "grad_norm": 0.8372901082038879, + "learning_rate": 0.00011352621482609807, + "loss": 0.1401, + "step": 2750 + }, + { + "epoch": 1.7273288093769277, + "grad_norm": 0.7648535966873169, + "learning_rate": 0.00010951534913888515, + "loss": 0.1245, + "step": 2800 + }, + { + "epoch": 1.75817396668723, + "grad_norm": 0.4626932442188263, + "learning_rate": 0.00010548897007754374, + "loss": 0.1288, + "step": 2850 + }, + { + "epoch": 1.7890191239975324, + "grad_norm": 0.5342707633972168, + "learning_rate": 0.00010145364205961125, + "loss": 0.1237, + "step": 2900 + }, + { + "epoch": 1.8198642813078347, + "grad_norm": 0.7241202592849731, + "learning_rate": 9.74159440925796e-05, + "loss": 0.1168, + "step": 2950 + }, + { + "epoch": 1.850709438618137, + "grad_norm": 0.48138442635536194, + "learning_rate": 9.338245904779345e-05, + "loss": 0.1149, + "step": 3000 + }, + { + "epoch": 1.8815545959284392, + "grad_norm": 0.5214439630508423, + "learning_rate": 8.93597629280487e-05, + "loss": 0.1175, + "step": 3050 + }, + { + "epoch": 1.9123997532387416, + "grad_norm": 0.5231944918632507, + "learning_rate": 8.535441414638937e-05, + "loss": 0.1168, + "step": 3100 + }, + { + "epoch": 1.9432449105490437, + "grad_norm": 0.7800565958023071, + "learning_rate": 8.13729428335819e-05, + "loss": 0.1081, + "step": 3150 + }, + { + "epoch": 1.974090067859346, + "grad_norm": 0.592022716999054, + "learning_rate": 7.742184019169945e-05, + "loss": 0.115, + "step": 3200 + }, + { + "epoch": 2.0049352251696484, + "grad_norm": 0.4546678960323334, + "learning_rate": 7.350754791117384e-05, + "loss": 0.1002, + "step": 3250 + }, + { + "epoch": 2.0357803824799507, + "grad_norm": 0.4683123826980591, + "learning_rate": 6.963644766856894e-05, + "loss": 0.0696, + "step": 3300 + }, + { + "epoch": 2.066625539790253, + "grad_norm": 0.4901474416255951, + "learning_rate": 6.581485072219755e-05, + "loss": 0.0696, + "step": 3350 + }, + { + "epoch": 2.097470697100555, + "grad_norm": 0.7581807971000671, + "learning_rate": 6.204898762254524e-05, + "loss": 0.0705, + "step": 3400 + }, + { + "epoch": 2.1283158544108574, + "grad_norm": 0.5819096565246582, + "learning_rate": 5.8344998054276115e-05, + "loss": 0.0695, + "step": 3450 + }, + { + "epoch": 2.1591610117211597, + "grad_norm": 0.7029954791069031, + "learning_rate": 5.4708920826382035e-05, + "loss": 0.0683, + "step": 3500 + }, + { + "epoch": 2.190006169031462, + "grad_norm": 0.6168348789215088, + "learning_rate": 5.114668402679472e-05, + "loss": 0.0675, + "step": 3550 + }, + { + "epoch": 2.2208513263417644, + "grad_norm": 0.5696262717247009, + "learning_rate": 4.766409535751225e-05, + "loss": 0.0678, + "step": 3600 + }, + { + "epoch": 2.2516964836520668, + "grad_norm": 0.7154285907745361, + "learning_rate": 4.426683266599702e-05, + "loss": 0.0655, + "step": 3650 + }, + { + "epoch": 2.2825416409623687, + "grad_norm": 0.6194272041320801, + "learning_rate": 4.0960434688282515e-05, + "loss": 0.0623, + "step": 3700 + }, + { + "epoch": 2.313386798272671, + "grad_norm": 0.4507332444190979, + "learning_rate": 3.775029201888051e-05, + "loss": 0.0645, + "step": 3750 + }, + { + "epoch": 2.3442319555829734, + "grad_norm": 0.42285481095314026, + "learning_rate": 3.4641638322211456e-05, + "loss": 0.0623, + "step": 3800 + }, + { + "epoch": 2.3750771128932757, + "grad_norm": 0.4432896077632904, + "learning_rate": 3.1639541799886083e-05, + "loss": 0.0625, + "step": 3850 + }, + { + "epoch": 2.405922270203578, + "grad_norm": 0.4708622097969055, + "learning_rate": 2.874889692774978e-05, + "loss": 0.063, + "step": 3900 + }, + { + "epoch": 2.4367674275138804, + "grad_norm": 0.30538269877433777, + "learning_rate": 2.5974416476161167e-05, + "loss": 0.06, + "step": 3950 + }, + { + "epoch": 2.4676125848241828, + "grad_norm": 0.7072311639785767, + "learning_rate": 2.3320623826514897e-05, + "loss": 0.0623, + "step": 4000 + }, + { + "epoch": 2.4984577421344847, + "grad_norm": 0.8356983661651611, + "learning_rate": 2.079184559653483e-05, + "loss": 0.0572, + "step": 4050 + }, + { + "epoch": 2.529302899444787, + "grad_norm": 0.4603017568588257, + "learning_rate": 1.8392204586361784e-05, + "loss": 0.0615, + "step": 4100 + }, + { + "epoch": 2.5601480567550894, + "grad_norm": 0.7272722125053406, + "learning_rate": 1.612561305693555e-05, + "loss": 0.0543, + "step": 4150 + }, + { + "epoch": 2.5909932140653917, + "grad_norm": 0.5553550124168396, + "learning_rate": 1.3995766351630313e-05, + "loss": 0.0594, + "step": 4200 + }, + { + "epoch": 2.621838371375694, + "grad_norm": 0.6362884640693665, + "learning_rate": 1.200613687154164e-05, + "loss": 0.0535, + "step": 4250 + }, + { + "epoch": 2.6526835286859964, + "grad_norm": 0.5222265124320984, + "learning_rate": 1.0159968414248778e-05, + "loss": 0.0569, + "step": 4300 + }, + { + "epoch": 2.683528685996299, + "grad_norm": 0.7063607573509216, + "learning_rate": 8.460270885280585e-06, + "loss": 0.0548, + "step": 4350 + }, + { + "epoch": 2.7143738433066007, + "grad_norm": 0.5181366205215454, + "learning_rate": 6.9098153909083366e-06, + "loss": 0.0534, + "step": 4400 + }, + { + "epoch": 2.745219000616903, + "grad_norm": 0.5155737400054932, + "learning_rate": 5.511129720265396e-06, + "loss": 0.0572, + "step": 4450 + }, + { + "epoch": 2.7760641579272054, + "grad_norm": 0.6396743059158325, + "learning_rate": 4.266494224159778e-06, + "loss": 0.0541, + "step": 4500 + }, + { + "epoch": 2.8069093152375078, + "grad_norm": 0.6049082279205322, + "learning_rate": 3.1779380972981163e-06, + "loss": 0.055, + "step": 4550 + }, + { + "epoch": 2.83775447254781, + "grad_norm": 0.5717052817344666, + "learning_rate": 2.2472360699829387e-06, + "loss": 0.0506, + "step": 4600 + }, + { + "epoch": 2.868599629858112, + "grad_norm": 0.7880396246910095, + "learning_rate": 1.4759055146764789e-06, + "loss": 0.0491, + "step": 4650 + }, + { + "epoch": 2.899444787168415, + "grad_norm": 0.5519769191741943, + "learning_rate": 8.652039721485539e-07, + "loss": 0.0499, + "step": 4700 + }, + { + "epoch": 2.9302899444787167, + "grad_norm": 0.5851826071739197, + "learning_rate": 4.161271012417256e-07, + "loss": 0.0522, + "step": 4750 + }, + { + "epoch": 2.961135101789019, + "grad_norm": 0.41791629791259766, + "learning_rate": 1.2940705559629783e-07, + "loss": 0.052, + "step": 4800 + }, + { + "epoch": 2.9919802590993214, + "grad_norm": 0.46808063983917236, + "learning_rate": 5.511289981730006e-09, + "loss": 0.0514, + "step": 4850 + } + ], + "logging_steps": 50, + "max_steps": 4863, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.4562212328636416e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d313ff90abc4445b68a507f98195a7c589e15bd --- /dev/null +++ b/config.json @@ -0,0 +1,34 @@ +{ + "_name_or_path": "google/gemma-2-9b-it", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 16, + "num_hidden_layers": 42, + "num_key_value_heads": 8, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "sliding_window_size": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 256000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8703b38fbfec50f6e9abf4621bb96f1a9c62d51b --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.46.1" +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a87ded161ba707ff7ce0f176ae9a7f206aebb7d5 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c71132d902906e7ce6ec710f1cade7d96a0399a2e7f3fbba06eac58c4f9eb0b +size 4903351912 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27637d2a5b2dfe2ec73fcbbe39a086b81f16c8d0 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:475dde4aa603fdc7a835869e6def9220f9821f19115aff0ef09ac75e2f682a44 +size 4947570872 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b00c3b62889b4bacef50347a02366305e98e5c45 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64f524baf134f7480b4e47f9b6bfd3cccf93b40ee0b3a218f0bf9585a78c395 +size 4962221464 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97f06c7826e501b1378b926616a8c7126aa69544 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e820fff4bbf84bd4d654e54ef74d2a20d3d20227be45c733f5a1d5983ea351c +size 3670322200 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..43bf0e426e9bf12945463de98856c221831faf66 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,471 @@ +{ + "metadata": { + "total_size": 18483411968 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.pre_feedforward_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.32.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.32.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.33.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.33.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.33.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.34.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.34.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.34.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.36.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.36.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.36.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.37.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.37.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.37.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.38.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.38.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.38.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.39.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.39.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.39.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.40.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.40.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.40.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.41.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.41.post_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.41.pre_feedforward_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.pre_feedforward_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.7.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.7.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.8.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.8.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.pre_feedforward_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a169013d289a376495faa781c49ca6913ab5d050 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9afc4612a55c9c4b6223058eb59ae051813e1a51cb3f95b0f29ad864d50e4781 +size 34362972 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}