diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07e23822fa82e683efaf5d09c99bcf2955759e48 --- /dev/null +++ b/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": ".", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 24576, + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 64, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 152064 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ac982f486fb097e80b78b83d704bf9378c50548d --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "bos_token_id": 151644, + "do_sample": true, + "eos_token_id": 151645, + "max_new_tokens": 2048, + "transformers_version": "4.37.2" +} diff --git a/model-00001-of-00063.safetensors b/model-00001-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..244a9a3328b66b25cd71a8c1717f8ef4f1a881ea --- /dev/null +++ b/model-00001-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acccd2158300774c94db715aa61c266a601ed864c2858e90b401cae7979a1c96 +size 4982833288 diff --git a/model-00002-of-00063.safetensors b/model-00002-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58bee5264c51579093ba5f4c95d75453310a2098 --- /dev/null +++ b/model-00002-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e38f6f6e2b1686a69d0d8163e624b492dc8c1c974917f4210205708ec22b0c84 +size 4563667072 diff --git a/model-00003-of-00063.safetensors b/model-00003-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e9dd1afeae773c083f628462a8bc1cbb9a04a77 --- /dev/null +++ b/model-00003-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d9b7734f9876b2bdf108bca5ebb67b55c1043e642d9b7588a3b32fa8e0afa5 +size 4295132632 diff --git a/model-00004-of-00063.safetensors b/model-00004-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e65957ab24f26ec70d1d3cbe67522babb5aaf18 --- /dev/null +++ b/model-00004-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d1433fb9524a406d4e7576b4e0bf3690b18d9e49d8e108c08c5c9d3693ed1b3 +size 4295132632 diff --git a/model-00005-of-00063.safetensors b/model-00005-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a26df37c32d8d22aa11985475ff392a38795638 --- /dev/null +++ b/model-00005-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f139ede515f5f6ba1c4ad82e4c00b1c5f01b46db92f964202861affe9a36c66 +size 4832135288 diff --git a/model-00006-of-00063.safetensors b/model-00006-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8259649249e18f8c6e714ee030535982ba890ac --- /dev/null +++ b/model-00006-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b24e3f97958053b683b79fd0e73e148b3cfe6e948e43512f8db48e4a8e05f592 +size 4832036656 diff --git a/model-00007-of-00063.safetensors b/model-00007-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e330df3692f064a7022d6d37e23a4b3c4ba8871b --- /dev/null +++ b/model-00007-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6695940df7a810b72b7f93ac9a23970d015335d3a6a1f5db6e21d97cdecab59 +size 4295132632 diff --git a/model-00008-of-00063.safetensors b/model-00008-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20e171107c602281256d213029953ddfe63d9a19 --- /dev/null +++ b/model-00008-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be660444edd459e04bb5bcbc46feb44670bcbf4ced2efb97e5b2d7fdac1e4e50 +size 4832135288 diff --git a/model-00009-of-00063.safetensors b/model-00009-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c955f731acd8f9bf32856bd09f3381493a905839 --- /dev/null +++ b/model-00009-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea649dfb0fcdb6bf57d41d03ea88e48bfbc433f948cff20f653e16e6a3eb1700 +size 4832036672 diff --git a/model-00010-of-00063.safetensors b/model-00010-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cea650fa89d83edeb83414ba078fb58c1441a604 --- /dev/null +++ b/model-00010-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9bab17a643499fb5b44c9e5cf2b18d5b7d8d85df7f54d6e14cea4cdbf09ef47 +size 4295132640 diff --git a/model-00011-of-00063.safetensors b/model-00011-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..47013d025d1f807c2ff0582c288a8a0c42ba6dd3 --- /dev/null +++ b/model-00011-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be9529e6d40a4e3da086b43c9b2e19da8f0774145cc949e24bf8621a358d2090 +size 4832135312 diff --git a/model-00012-of-00063.safetensors b/model-00012-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3669e599c0abe59f5fae4a059e7c11d8a6b2739e --- /dev/null +++ b/model-00012-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0307453a0553d8f78b8a72656e02ffdcbe176cb871f88cac9a6da6e70cd173 +size 4832036672 diff --git a/model-00013-of-00063.safetensors b/model-00013-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d723adbb294607b75c3e89a07599fd4c1cac6d2e --- /dev/null +++ b/model-00013-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e389e0ad4cabaaf5edb18fc7d80d205f6fca5f560862a2292fb0c475a2d54a +size 4295132640 diff --git a/model-00014-of-00063.safetensors b/model-00014-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..765dc1e08032f2416da50fe48dde6c960701c93f --- /dev/null +++ b/model-00014-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82bd3a9ff4683856d8b9b260372a8696ed38704a8d145185e33fa893d6a52f2 +size 4832135312 diff --git a/model-00015-of-00063.safetensors b/model-00015-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b1d0d5ddf71dd9050a31fb64e1a3587b0db2dd8 --- /dev/null +++ b/model-00015-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03b79a2a5ee055c5aad168ca107e46b7681335785b6a76581145a1c7bf2271b +size 4832036672 diff --git a/model-00016-of-00063.safetensors b/model-00016-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df8adceff57d17a9b6502fdbf9169bf2d6137506 --- /dev/null +++ b/model-00016-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63a13704103d3cda1f6707c9e52d2d2d6fd27acc39acb23852fcc5e01cc0dedd +size 4295132640 diff --git a/model-00017-of-00063.safetensors b/model-00017-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5e3e4e64bd8c2fbcd60ca835050ba98c26ed11f --- /dev/null +++ b/model-00017-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918cdbd486b051df26d28fe4225baff3b4328472a5ce35d11f3a0ce6dabf8299 +size 4832135312 diff --git a/model-00018-of-00063.safetensors b/model-00018-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e266d878665eb70d19210ed89c6b57d7088a184c --- /dev/null +++ b/model-00018-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ed293bd1f5898e66acd50bcd4d03f1aea5b831cbe516132de2244dcbec4768 +size 4832036672 diff --git a/model-00019-of-00063.safetensors b/model-00019-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..692668ef2a6a24126b987bb23fed6abb5cb3ccba --- /dev/null +++ b/model-00019-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1706fd1d4ac47182ed2c6409deedfcdb1bdfacaf464dacd7f1b800011ef3db8b +size 4295132640 diff --git a/model-00020-of-00063.safetensors b/model-00020-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f38847868dcd4e9479e1ca9c8344233de77287a2 --- /dev/null +++ b/model-00020-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4832f5514a2f22422f84552e60bc373d167137f97521e196360d0114ce579f4 +size 4832135312 diff --git a/model-00021-of-00063.safetensors b/model-00021-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea097290e85fcc3db8b511d07eb021454fabefae --- /dev/null +++ b/model-00021-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383915a5323a3453c94f0dbf7861d75b92f485eac346a7f9049cf7da28d05ab2 +size 4832036672 diff --git a/model-00022-of-00063.safetensors b/model-00022-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebb2e3caeb3b2e520cb2b8f7b7e4a93c4e1691f9 --- /dev/null +++ b/model-00022-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb885b65e06b4ad43c248d00e8a989477a2f1f7f13d79ac771c065572106846d +size 4295132640 diff --git a/model-00023-of-00063.safetensors b/model-00023-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd663945789b0539cb015085ae2dc9112f86997c --- /dev/null +++ b/model-00023-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a87a8522fb3a3ebb62a33f5ef73841bb45987e59029c93c7cd4e94e002f79e4 +size 4832135312 diff --git a/model-00024-of-00063.safetensors b/model-00024-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4666e104830fc44062582fff3900476cb534dd2 --- /dev/null +++ b/model-00024-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62cc63c5523d24d4d165ee205c5d3c18c9d2f4bb7819faf682b5d3f28f3155ed +size 4832036672 diff --git a/model-00025-of-00063.safetensors b/model-00025-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83b7a7b46fbbba875dc4171cadc3295912489b04 --- /dev/null +++ b/model-00025-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd6d61314bfcec194cc6aede78337f0a898973b38c60d1ee1aee6e7ec9d86e4f +size 4295132640 diff --git a/model-00026-of-00063.safetensors b/model-00026-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff815dba3785b9a200b114a0f9f7e878cde59a5d --- /dev/null +++ b/model-00026-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6dbfca645db1dacf4a5736ca7ea9ea2803770c9141d9c21b779c2ca31a43896 +size 4832135312 diff --git a/model-00027-of-00063.safetensors b/model-00027-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8da2becf0884d59abe7246f52a3f93b4edb23b4b --- /dev/null +++ b/model-00027-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e9768bdf01a0dddeb97bb058d34c474caebb3c10b188fbfaa2efef64223da7 +size 4832036672 diff --git a/model-00028-of-00063.safetensors b/model-00028-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..083ea029b61cfdd9704e23e1639290e66d53dfbb --- /dev/null +++ b/model-00028-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9caf90e7138c169cf0be804d8aac8c621d7e85f0f3f5c754fa5af8fd40af27c +size 4295132640 diff --git a/model-00029-of-00063.safetensors b/model-00029-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1d8a2acd3a91c9980adb531dde774e1609b96b5 --- /dev/null +++ b/model-00029-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91dedf5693253877f2ad587ec81b5dc40781752327bbafcdae0ecc018ced2671 +size 4832135312 diff --git a/model-00030-of-00063.safetensors b/model-00030-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0337673310ae9d49b18cae0c306d2bcff1834c96 --- /dev/null +++ b/model-00030-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032d5d7e08f2240229b94ed0d99daa2124eedad8b9fcfe1d1a46dca675d1db28 +size 4832036672 diff --git a/model-00031-of-00063.safetensors b/model-00031-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2921dfff49f99284214acd84f1575d81ba67f09 --- /dev/null +++ b/model-00031-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ca2a2be18f3235197fd941694c4fcf4092c6c725762e63291d604d8e0d2a991 +size 4295132640 diff --git a/model-00032-of-00063.safetensors b/model-00032-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50ff5e8635b23d964f6142f4411797eeed34d32c --- /dev/null +++ b/model-00032-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a483848832f6a31976c1acba2425e95b01a83bdd4755317a0ddafa2aea992b2a +size 4832135312 diff --git a/model-00033-of-00063.safetensors b/model-00033-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..270409e97e4464afe19bb675717be204da01f366 --- /dev/null +++ b/model-00033-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f04c029b52afed6d70ce5b635854eb716fc7c43cc8465c032ed2012bec1aff1 +size 4832036672 diff --git a/model-00034-of-00063.safetensors b/model-00034-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91ca7d11de3b42dfe5f1f8a9d690547b0137a9e6 --- /dev/null +++ b/model-00034-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c77cb928c54414331275fdb42fcd39a5d098b59e22d6b57ec0cbce4de2604cd +size 4295132640 diff --git a/model-00035-of-00063.safetensors b/model-00035-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d05d42ddd848e4cc80fa43581fe6ad0f5b4f41d --- /dev/null +++ b/model-00035-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0107caaac168e435a9b265c7594fddb33a8272db0ce4241b4861aa0f660b2040 +size 4832135312 diff --git a/model-00036-of-00063.safetensors b/model-00036-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a8d8b7ef189f152903706f34ccfb6c1b90b4194 --- /dev/null +++ b/model-00036-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc1bc4835ecc4e0f243d813dee7915fb9e91a26d792db62fa5cda53de6fdce4 +size 4832036672 diff --git a/model-00037-of-00063.safetensors b/model-00037-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4eeb63dc292b73bf23d4992eda60d040a385c62e --- /dev/null +++ b/model-00037-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11f47bee7a2bb05b120a328057a21fbe6e69831ab53fceaaf3535c3d13ecb0f0 +size 4295132640 diff --git a/model-00038-of-00063.safetensors b/model-00038-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59cadf07b86ae9b573f6ef6c2ba8ef590bb86713 --- /dev/null +++ b/model-00038-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9988436ccd2bec881b7652c63e9816e999e97c6bab048e9f4a6999fe75ba303b +size 4832135312 diff --git a/model-00039-of-00063.safetensors b/model-00039-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdb393eba2b6c067679ce6df8134c784ed1167c6 --- /dev/null +++ b/model-00039-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c40bf9ef0b51c99f13dc2a746b36f548a3848cc71f99ed3c0f47ac9936b9d0b +size 4832036672 diff --git a/model-00040-of-00063.safetensors b/model-00040-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0ef499b502412ae9c2256d21eb337ce1c9ed2e5 --- /dev/null +++ b/model-00040-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbf1a7469d59e3a32cae58c2394f132c407e54a3f60065e2c055f98ea7ab9a04 +size 4295132640 diff --git a/model-00041-of-00063.safetensors b/model-00041-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..981d5e7aa9f76a87dcc84a5e849db691156c1b32 --- /dev/null +++ b/model-00041-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8c9919d6148707ed3a49a8e51c99c2d24b47885fa5f9b6e91b86f443f5d421 +size 4832135312 diff --git a/model-00042-of-00063.safetensors b/model-00042-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f24261c70208489002d4409b11d6660d6a86d87 --- /dev/null +++ b/model-00042-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e967742dfab0c959e27d657947004d12d919427859ccaf8d1af425cbbdc16877 +size 4832036672 diff --git a/model-00043-of-00063.safetensors b/model-00043-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..759fcc5faee1e2c2511b842628cf99d2929159bb --- /dev/null +++ b/model-00043-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2c2592ba1390380c2836d6d7a1d06c7a9c245820751bc4ce16ddd858693da9 +size 4295132640 diff --git a/model-00044-of-00063.safetensors b/model-00044-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..564c7c11612e6988c5f5b1134529bc4d8d1e2728 --- /dev/null +++ b/model-00044-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdbd234aa0d32be71fb9cb61813be9ec606cefdff8dc5d891fffdc6a289b2321 +size 4832135312 diff --git a/model-00045-of-00063.safetensors b/model-00045-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb251ad6a280a1c69e77708f7617769f891c4dff --- /dev/null +++ b/model-00045-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a623f9d940bad8d1345569dfd5302d531e30dceb99a713a686b9256532ff243e +size 4832036672 diff --git a/model-00046-of-00063.safetensors b/model-00046-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b33675a9ffafbe7fd8ccc0332a3bac2543a030dd --- /dev/null +++ b/model-00046-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d6e1bca38153123fda6c0be181b0337d68155ae296ba617c365e500fe66c326 +size 4295132640 diff --git a/model-00047-of-00063.safetensors b/model-00047-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f928a04bf6bc5ea6b482198dff00c9edcd47b1e2 --- /dev/null +++ b/model-00047-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc47f4ac851abf42703e5e9c0f812f9a25848478c15a1f08443bb83e91d60145 +size 4832135312 diff --git a/model-00048-of-00063.safetensors b/model-00048-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5322a55d94f5467e793d3ec74af13d6abf8588dc --- /dev/null +++ b/model-00048-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0c04190e90606beb8b0ad235454df11ab1050323c7a3e55c3c05fa76ffe99d +size 4832036672 diff --git a/model-00049-of-00063.safetensors b/model-00049-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a085fa8de4078715bae68c7187231721cec50097 --- /dev/null +++ b/model-00049-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ffbdeb0b00ec79ffe70834b3a96083ccf57c9bd4f08be9769fc6a47e556f13 +size 4295132640 diff --git a/model-00050-of-00063.safetensors b/model-00050-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..808e8c199bafa57a0b806a8c35c414a736b9b1a8 --- /dev/null +++ b/model-00050-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12971cb15f7cd21cc208140045f9dd746e39a3f94af263d02b01bf1f1a057c57 +size 4832135312 diff --git a/model-00051-of-00063.safetensors b/model-00051-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c52e5f041faaca4e3268e446a2a1deeb2fa011e --- /dev/null +++ b/model-00051-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010de28cd1b9658780cbcc538f5bd41a5ca161b0295c15b7a2a5d28220dd767a +size 4832036672 diff --git a/model-00052-of-00063.safetensors b/model-00052-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c631f310ae954153182b4ad41ba2f6287af3bed4 --- /dev/null +++ b/model-00052-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6798a51a9c561d368f45954259d9a8fb8f248f4cc2b08cf932483c6aef250c00 +size 4295132640 diff --git a/model-00053-of-00063.safetensors b/model-00053-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69f84ec6bc7772364af11353382104a2a5ce471f --- /dev/null +++ b/model-00053-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45587f4da25b418c7fea7b479d1a95da730875ee51d71d2958a0a07062acd53 +size 4832135312 diff --git a/model-00054-of-00063.safetensors b/model-00054-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4557a270d41441a4220db254d582b8f08b5fa07 --- /dev/null +++ b/model-00054-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b0c582718ee92733b632e17b036202b5050b0820e01a9a6c87d84aa71119224 +size 4832036672 diff --git a/model-00055-of-00063.safetensors b/model-00055-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29413ba525d97b500c4013bcd5faf97506a9b4dd --- /dev/null +++ b/model-00055-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7bfe21c7da1cbe9db0b9aa636e2a79675e2c8b82238b71fd85a93566569731a +size 4295132640 diff --git a/model-00056-of-00063.safetensors b/model-00056-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc1ef9af2668e271fcb8ebc1b61f7e035f6107de --- /dev/null +++ b/model-00056-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e29c17c8a5e06f4fa42e0d92e9c18bc48574d2498066a019ee016e1c9418c99 +size 4832135312 diff --git a/model-00057-of-00063.safetensors b/model-00057-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92322d283690159bc6833b875dfb4d853b304c55 --- /dev/null +++ b/model-00057-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb5795fdcfdbb3a3f97c28b093a378387436894d9443855badddf822eb59a4c +size 4832036672 diff --git a/model-00058-of-00063.safetensors b/model-00058-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bddbdc26c6a5791b691bdcd6113f661610691f8 --- /dev/null +++ b/model-00058-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa136a95b230a2e0bdded5ecf8dbb5553a57bad7f66955d93f91cc163ecd3bf3 +size 4295132640 diff --git a/model-00059-of-00063.safetensors b/model-00059-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..deb3fa15e62e3ad6f24888ac555412845a5008db --- /dev/null +++ b/model-00059-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a92675896313cea65cd08f3bdf450f0c97bbf7be732fe0703900a72e47f1ae64 +size 4832135312 diff --git a/model-00060-of-00063.safetensors b/model-00060-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42166c2c0342180b00ecab2a375f720e4f6e6df1 --- /dev/null +++ b/model-00060-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fad26ed4f3b3057ea75a8b2a3c00c73ce8794e2fafb10309f3dd3bb65f2091f4 +size 4832036672 diff --git a/model-00061-of-00063.safetensors b/model-00061-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53b727c62f520f9da5ac22937b1e224869d3a5e4 --- /dev/null +++ b/model-00061-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ed0c628d02f336ad45c2e011c6edff354c2154fb350c01ce152b1b829a15350 +size 4295132640 diff --git a/model-00062-of-00063.safetensors b/model-00062-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..854b78902c946dd9a3282637e04479ce790a7dc4 --- /dev/null +++ b/model-00062-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5262899a04f14e875103b6fba91a8b3810faa399bb6f12dcbdc122a4b29045d9 +size 805405128 diff --git a/model-00063-of-00063.safetensors b/model-00063-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bbf7cbc04b6039631a4a3293c8590750bb9d935 --- /dev/null +++ b/model-00063-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a64d3a3e9ddfeb07d3d11d62bf755eb799612c0c8026700cd21a7333ad7e626 +size 4982833280 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..045d3004b159f9fd77c32812bf5ecd51cd4edd7e --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,970 @@ +{ + "metadata": { + "total_size": 289151680512 + }, + "weight_map": { + "lm_head.weight": "model-00063-of-00063.safetensors", + "model.embed_tokens.weight": "model-00001-of-00063.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00063.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00002-of-00063.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00063.safetensors", + "model.layers.10.input_layernorm.weight": "model-00010-of-00063.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00010-of-00063.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.11.input_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00010-of-00063.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00010-of-00063.safetensors", + "model.layers.12.input_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.input_layernorm.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00011-of-00063.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.input_layernorm.weight": "model-00013-of-00063.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00013-of-00063.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00012-of-00063.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00012-of-00063.safetensors", + "model.layers.15.input_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00013-of-00063.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00013-of-00063.safetensors", + "model.layers.16.input_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.input_layernorm.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00014-of-00063.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.input_layernorm.weight": "model-00016-of-00063.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00016-of-00063.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00015-of-00063.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00015-of-00063.safetensors", + "model.layers.19.input_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00016-of-00063.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00016-of-00063.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00063.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00063.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00003-of-00063.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00063.safetensors", + "model.layers.20.input_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.input_layernorm.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00017-of-00063.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.input_layernorm.weight": "model-00019-of-00063.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00019-of-00063.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00018-of-00063.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00018-of-00063.safetensors", + "model.layers.23.input_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00019-of-00063.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00019-of-00063.safetensors", + "model.layers.24.input_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.input_layernorm.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00020-of-00063.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.input_layernorm.weight": "model-00022-of-00063.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00022-of-00063.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00021-of-00063.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00021-of-00063.safetensors", + "model.layers.27.input_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00022-of-00063.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00022-of-00063.safetensors", + "model.layers.28.input_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.input_layernorm.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00023-of-00063.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.3.input_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00004-of-00063.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00063.safetensors", + "model.layers.30.input_layernorm.weight": "model-00025-of-00063.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00025-of-00063.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00024-of-00063.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00024-of-00063.safetensors", + "model.layers.31.input_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00025-of-00063.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00025-of-00063.safetensors", + "model.layers.32.input_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.input_layernorm.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00026-of-00063.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.input_layernorm.weight": "model-00028-of-00063.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00028-of-00063.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00027-of-00063.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00027-of-00063.safetensors", + "model.layers.35.input_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00028-of-00063.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00028-of-00063.safetensors", + "model.layers.36.input_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.k_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.q_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.v_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.input_layernorm.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.k_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.q_proj.bias": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00029-of-00063.safetensors", + "model.layers.37.self_attn.v_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.input_layernorm.weight": "model-00031-of-00063.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00031-of-00063.safetensors", + "model.layers.38.self_attn.k_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.q_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.v_proj.bias": "model-00030-of-00063.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00030-of-00063.safetensors", + "model.layers.39.input_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.39.self_attn.k_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.q_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.v_proj.bias": "model-00031-of-00063.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00031-of-00063.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.40.input_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.k_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.q_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.v_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.input_layernorm.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.k_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.q_proj.bias": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00032-of-00063.safetensors", + "model.layers.41.self_attn.v_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.input_layernorm.weight": "model-00034-of-00063.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00034-of-00063.safetensors", + "model.layers.42.self_attn.k_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.q_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.v_proj.bias": "model-00033-of-00063.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00033-of-00063.safetensors", + "model.layers.43.input_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.43.self_attn.k_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.q_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.v_proj.bias": "model-00034-of-00063.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00034-of-00063.safetensors", + "model.layers.44.input_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.k_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.q_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.v_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.input_layernorm.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.k_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.q_proj.bias": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00035-of-00063.safetensors", + "model.layers.45.self_attn.v_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.input_layernorm.weight": "model-00037-of-00063.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00037-of-00063.safetensors", + "model.layers.46.self_attn.k_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.q_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.v_proj.bias": "model-00036-of-00063.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00036-of-00063.safetensors", + "model.layers.47.input_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.47.self_attn.k_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.q_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.v_proj.bias": "model-00037-of-00063.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00037-of-00063.safetensors", + "model.layers.48.input_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.k_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.q_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.v_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.input_layernorm.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.k_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.q_proj.bias": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00038-of-00063.safetensors", + "model.layers.49.self_attn.v_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00063.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.50.input_layernorm.weight": "model-00040-of-00063.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00040-of-00063.safetensors", + "model.layers.50.self_attn.k_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.q_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.v_proj.bias": "model-00039-of-00063.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00039-of-00063.safetensors", + "model.layers.51.input_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.51.self_attn.k_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.q_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.v_proj.bias": "model-00040-of-00063.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00040-of-00063.safetensors", + "model.layers.52.input_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.k_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.q_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.v_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.input_layernorm.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.k_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.q_proj.bias": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00041-of-00063.safetensors", + "model.layers.53.self_attn.v_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.input_layernorm.weight": "model-00043-of-00063.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00043-of-00063.safetensors", + "model.layers.54.self_attn.k_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.q_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.v_proj.bias": "model-00042-of-00063.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00042-of-00063.safetensors", + "model.layers.55.input_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.55.self_attn.k_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.q_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.v_proj.bias": "model-00043-of-00063.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00043-of-00063.safetensors", + "model.layers.56.input_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.k_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.q_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.v_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.input_layernorm.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.k_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.q_proj.bias": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00044-of-00063.safetensors", + "model.layers.57.self_attn.v_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.input_layernorm.weight": "model-00046-of-00063.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00046-of-00063.safetensors", + "model.layers.58.self_attn.k_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.q_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.v_proj.bias": "model-00045-of-00063.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00045-of-00063.safetensors", + "model.layers.59.input_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.59.self_attn.k_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.q_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.v_proj.bias": "model-00046-of-00063.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00046-of-00063.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-00063.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00063.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00006-of-00063.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00063.safetensors", + "model.layers.60.input_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.k_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.q_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.v_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.input_layernorm.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.k_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.q_proj.bias": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00047-of-00063.safetensors", + "model.layers.61.self_attn.v_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.input_layernorm.weight": "model-00049-of-00063.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00049-of-00063.safetensors", + "model.layers.62.self_attn.k_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.q_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.v_proj.bias": "model-00048-of-00063.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00048-of-00063.safetensors", + "model.layers.63.input_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.63.self_attn.k_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.q_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.v_proj.bias": "model-00049-of-00063.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00049-of-00063.safetensors", + "model.layers.64.input_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.k_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.q_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.v_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.input_layernorm.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.k_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.q_proj.bias": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00050-of-00063.safetensors", + "model.layers.65.self_attn.v_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.input_layernorm.weight": "model-00052-of-00063.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00052-of-00063.safetensors", + "model.layers.66.self_attn.k_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.q_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.v_proj.bias": "model-00051-of-00063.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00051-of-00063.safetensors", + "model.layers.67.input_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.67.self_attn.k_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.q_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.v_proj.bias": "model-00052-of-00063.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00052-of-00063.safetensors", + "model.layers.68.input_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.k_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.q_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.v_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.input_layernorm.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.k_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.q_proj.bias": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00053-of-00063.safetensors", + "model.layers.69.self_attn.v_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.7.input_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00007-of-00063.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00007-of-00063.safetensors", + "model.layers.70.input_layernorm.weight": "model-00055-of-00063.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00055-of-00063.safetensors", + "model.layers.70.self_attn.k_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.q_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.v_proj.bias": "model-00054-of-00063.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00054-of-00063.safetensors", + "model.layers.71.input_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.71.self_attn.k_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.q_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.v_proj.bias": "model-00055-of-00063.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00055-of-00063.safetensors", + "model.layers.72.input_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.k_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.q_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.v_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.input_layernorm.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.k_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.q_proj.bias": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00056-of-00063.safetensors", + "model.layers.73.self_attn.v_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.input_layernorm.weight": "model-00058-of-00063.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00058-of-00063.safetensors", + "model.layers.74.self_attn.k_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.q_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.v_proj.bias": "model-00057-of-00063.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00057-of-00063.safetensors", + "model.layers.75.input_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.75.self_attn.k_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.q_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.v_proj.bias": "model-00058-of-00063.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00058-of-00063.safetensors", + "model.layers.76.input_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.k_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.q_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.v_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.input_layernorm.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.k_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.q_proj.bias": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00059-of-00063.safetensors", + "model.layers.77.self_attn.v_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.input_layernorm.weight": "model-00061-of-00063.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00061-of-00063.safetensors", + "model.layers.78.self_attn.k_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.q_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.v_proj.bias": "model-00060-of-00063.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00060-of-00063.safetensors", + "model.layers.79.input_layernorm.weight": "model-00062-of-00063.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00062-of-00063.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00062-of-00063.safetensors", + "model.layers.79.self_attn.k_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.q_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.v_proj.bias": "model-00061-of-00063.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00061-of-00063.safetensors", + "model.layers.8.input_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.input_layernorm.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00008-of-00063.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00009-of-00063.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00009-of-00063.safetensors", + "model.norm.weight": "model-00062-of-00063.safetensors" + } +}