diff --git a/.ipynb_checkpoints/config-checkpoint.json b/.ipynb_checkpoints/config-checkpoint.json new file mode 100644 index 0000000000000000000000000000000000000000..83d1345ae079449678e9057df5b0c9e249700cc9 --- /dev/null +++ b/.ipynb_checkpoints/config-checkpoint.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "v2ray/Mixtral-8x22B-v0.1", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 32000, + "hidden_act": "silu", + "hidden_size": 6144, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 65536, + "model_type": "mixtral", + "num_attention_heads": 48, + "num_experts_per_tok": 2, + "num_hidden_layers": 56, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.40.0.dev0", + "use_cache": false, + "vocab_size": 32002 +} diff --git a/.ipynb_checkpoints/tokenizer_config-checkpoint.json b/.ipynb_checkpoints/tokenizer_config-checkpoint.json new file mode 100644 index 0000000000000000000000000000000000000000..1a00b7631fa5bf71e294fba19eafd227f29cdc89 --- /dev/null +++ b/.ipynb_checkpoints/tokenizer_config-checkpoint.json @@ -0,0 +1,61 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/adapter/.ipynb_checkpoints/config-checkpoint.json b/adapter/.ipynb_checkpoints/config-checkpoint.json new file mode 100644 index 0000000000000000000000000000000000000000..5f6f5881dbf72402281a6fa6b6be1dcfe2e6a620 --- /dev/null +++ b/adapter/.ipynb_checkpoints/config-checkpoint.json @@ -0,0 +1,46 @@ +{ + "_name_or_path": "v2ray/Mixtral-8x22B-v0.1", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 32000, + "hidden_act": "silu", + "hidden_size": 6144, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 65536, + "model_type": "mixtral", + "num_attention_heads": 48, + "num_experts_per_tok": 2, + "num_hidden_layers": 56, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "quantization_config": { + "_load_in_4bit": true, + "_load_in_8bit": false, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_storage": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": true, + "load_in_8bit": false, + "quant_method": "bitsandbytes" + }, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": false, + "vocab_size": 32002 +} diff --git a/adapter/.ipynb_checkpoints/tokenizer_config-checkpoint.json b/adapter/.ipynb_checkpoints/tokenizer_config-checkpoint.json new file mode 100644 index 0000000000000000000000000000000000000000..1a00b7631fa5bf71e294fba19eafd227f29cdc89 --- /dev/null +++ b/adapter/.ipynb_checkpoints/tokenizer_config-checkpoint.json @@ -0,0 +1,61 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/adapter/adapter_config.json b/adapter/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..aa6d5710f1c86a8455cbf4e864f216d03b37ec8c --- /dev/null +++ b/adapter/adapter_config.json @@ -0,0 +1,38 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "v2ray/Mixtral-8x22B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "embed_tokens", + "lm_head" + ], + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "w2", + "w3", + "o_proj", + "v_proj", + "k_proj", + "gate", + "w1" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter/adapter_model.safetensors b/adapter/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3f9ba0208289b484bddebd80719a2a9bd3cfaaf --- /dev/null +++ b/adapter/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7c3e6eac571b2034dd287ea2c037b3187e9ea2a77b00a0166a08708fc006e87 +size 3409573984 diff --git a/adapter/added_tokens.json b/adapter/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..e36863df2bc13b20909d6711019409e777802fb5 --- /dev/null +++ b/adapter/added_tokens.json @@ -0,0 +1,4 @@ +{ + "<|im_end|>": 32000, + "<|im_start|>": 32001 +} diff --git a/adapter/config.json b/adapter/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5f6f5881dbf72402281a6fa6b6be1dcfe2e6a620 --- /dev/null +++ b/adapter/config.json @@ -0,0 +1,46 @@ +{ + "_name_or_path": "v2ray/Mixtral-8x22B-v0.1", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 32000, + "hidden_act": "silu", + "hidden_size": 6144, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 65536, + "model_type": "mixtral", + "num_attention_heads": 48, + "num_experts_per_tok": 2, + "num_hidden_layers": 56, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "quantization_config": { + "_load_in_4bit": true, + "_load_in_8bit": false, + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_storage": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": true, + "load_in_8bit": false, + "quant_method": "bitsandbytes" + }, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": false, + "vocab_size": 32002 +} diff --git a/adapter/special_tokens_map.json b/adapter/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..40b1c6dadc2aed5b9e61dc7f9c7299e0aee16069 --- /dev/null +++ b/adapter/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/adapter/tokenizer.model b/adapter/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/adapter/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/adapter/tokenizer_config.json b/adapter/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a00b7631fa5bf71e294fba19eafd227f29cdc89 --- /dev/null +++ b/adapter/tokenizer_config.json @@ -0,0 +1,61 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..e36863df2bc13b20909d6711019409e777802fb5 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "<|im_end|>": 32000, + "<|im_start|>": 32001 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..83d1345ae079449678e9057df5b0c9e249700cc9 --- /dev/null +++ b/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "v2ray/Mixtral-8x22B-v0.1", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 32000, + "hidden_act": "silu", + "hidden_size": 6144, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 65536, + "model_type": "mixtral", + "num_attention_heads": 48, + "num_experts_per_tok": 2, + "num_hidden_layers": 56, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.40.0.dev0", + "use_cache": false, + "vocab_size": 32002 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..16dd90acbcc482b30661bf1c48c719fec177f4a8 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "do_sample": true, + "eos_token_id": 2, + "transformers_version": "4.40.0.dev0" +} diff --git a/model-00001-of-00117.safetensors b/model-00001-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43b1543d520ebefb83c47e7049c72256bbf07699 --- /dev/null +++ b/model-00001-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9ccee4b063e10c87cc2dfceff015e0a3e678e2fb90f3abbfb1fc8e7915aa9c +size 4762879840 diff --git a/model-00002-of-00117.safetensors b/model-00002-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..341ab4a892dc8bea900788a4e28b27e8ad4e751f --- /dev/null +++ b/model-00002-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22d549ed12f075f97104a759327e1f500f3d22693c2d1fde680317b123223ac +size 4831839800 diff --git a/model-00003-of-00117.safetensors b/model-00003-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84dec9b2340d0bf00ca5ea80d729595c2abcf996 --- /dev/null +++ b/model-00003-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c6b76c9cce92c00ba519b615a8f06fbe59dafc4e3de4b79a06408c2a9c1514 +size 4781754592 diff --git a/model-00004-of-00117.safetensors b/model-00004-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b21af08054aa797805194d1167515fcc785e76f7 --- /dev/null +++ b/model-00004-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243be14b5542d668490df8848ff2b1589c6486165bf69c38cf8b48e47cb5d8f3 +size 4831839800 diff --git a/model-00005-of-00117.safetensors b/model-00005-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69e93f70217e1c2fa7bf713ed83bb847f3fab8ca --- /dev/null +++ b/model-00005-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570b13e32ac883b39d079ac4c34c836f13d681e67963d21f5b3f9c6404a39b22 +size 4781754592 diff --git a/model-00006-of-00117.safetensors b/model-00006-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06243b52ac3d0058dd7ae9edcdcccc984d417d6c --- /dev/null +++ b/model-00006-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8a5c88af32c56a7ca1dc2082402effcae75e4ea341241cbc56009820614845 +size 4831839800 diff --git a/model-00007-of-00117.safetensors b/model-00007-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b213450fcce9b6bb6f6b17e6fbf0b2dc12eb740c --- /dev/null +++ b/model-00007-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ca84f92b0c4bb27248d8e51a02cc5664fe3dc3f4121eed732b25a9ec53e3220 +size 4781754592 diff --git a/model-00008-of-00117.safetensors b/model-00008-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1de4e7b1b3246d000873a5bab4b91960b986c17b --- /dev/null +++ b/model-00008-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5045952f7053f129a938a4c1f92f271d8662bf3302dc2544a309d015a244717 +size 4831839800 diff --git a/model-00009-of-00117.safetensors b/model-00009-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8a1ee291f8a256bedc61c683150efbccbdc2610 --- /dev/null +++ b/model-00009-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d49115ba7f3eb498d8e90f46ee39fca11b8e8cd46e3b3650f192291a49e1e79 +size 4781754592 diff --git a/model-00010-of-00117.safetensors b/model-00010-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d550dee0be7f0c8b329135edf1656e0cf7bfaaec --- /dev/null +++ b/model-00010-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c24577203ef245bf9c9c4733263ff803955aa45198ac695ef058e5223ec50c8 +size 4831839800 diff --git a/model-00011-of-00117.safetensors b/model-00011-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51fdeb441fbb6fd99e4f288defcee18cf43e5b91 --- /dev/null +++ b/model-00011-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61eb41a21b93c2762da0381320e8a2815ef22c72da0204217d05c064105b606b +size 4781754592 diff --git a/model-00012-of-00117.safetensors b/model-00012-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18729893e34e5e1e84ba26a2bef25a52ec21da6b --- /dev/null +++ b/model-00012-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9dbb091b3bff6126fd5cb482672d64df73f549e2f35e0ee0a0b05b6cef6114 +size 4831839800 diff --git a/model-00013-of-00117.safetensors b/model-00013-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..449afd0f1d0eebc6aa0bd5bd375931f28c391a4d --- /dev/null +++ b/model-00013-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50fadd1d36aeaf2c6c97157c3668695113e86bc823a9510d29eff3d26685920d +size 4781754592 diff --git a/model-00014-of-00117.safetensors b/model-00014-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1c5fe401d092d7f167ffa3c62921329670b86e9 --- /dev/null +++ b/model-00014-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63553794ae97e292ccabcf8c279aa58e96fa7ffc366888f6fd28b61bf728094 +size 4831839800 diff --git a/model-00015-of-00117.safetensors b/model-00015-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8eb33b475cdc2913679e1b3aab6cd41b773b318a --- /dev/null +++ b/model-00015-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9836e419e59e2997b020c82fabf89431def9fbeb30454fe472f1b245f9c280cf +size 4781754592 diff --git a/model-00016-of-00117.safetensors b/model-00016-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9ef4cb6d13606252405f74e9d559e187a79af3d --- /dev/null +++ b/model-00016-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33bf7f116407b576afcaf16a7aef0a7daa43694ff49a90e6444ffb78d65819be +size 4831839800 diff --git a/model-00017-of-00117.safetensors b/model-00017-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24e76bb4da85c9d4cf4ced36e7903bc75d4efd84 --- /dev/null +++ b/model-00017-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a9d2cf8345f044fba69fa5e3db916b922f23de90b33f1ca20af7c9a5888ed3 +size 4781754592 diff --git a/model-00018-of-00117.safetensors b/model-00018-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee7c827231411d6767696a867250cae9af5aab0b --- /dev/null +++ b/model-00018-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aadacd71c89329cbb00956f5184a44e2b00ec70d990186fee188942faae39e3 +size 4831839800 diff --git a/model-00019-of-00117.safetensors b/model-00019-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..121138080086f723c666024788288c7b1a4b5902 --- /dev/null +++ b/model-00019-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2aa34ae8d24e7b01224c11e4d5d8fc68d824d56e948ac9329d670394442107 +size 4781754592 diff --git a/model-00020-of-00117.safetensors b/model-00020-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e868f26755a9321826abde1753c3a51c714feb0 --- /dev/null +++ b/model-00020-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6e48d5f00e00a9722ade9d3aca8b11f984314c724fd1ed6f46009af47dd687 +size 4831839800 diff --git a/model-00021-of-00117.safetensors b/model-00021-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c1fd7eb2a5cefab05d1165a327ea709b9ec47f6 --- /dev/null +++ b/model-00021-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:044656ef715db71f006c4e9cdd6a790f0b90017d3378dc0ef44a75aa7dfa62f8 +size 4982884240 diff --git a/model-00022-of-00117.safetensors b/model-00022-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf81e66ba8517a358aa04629b417d46f0d698b5b --- /dev/null +++ b/model-00022-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0bfa03e064a941ad41dfff8359414bce4ba670c065b42bd52bdcec3d18dfcd +size 4630710168 diff --git a/model-00023-of-00117.safetensors b/model-00023-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1563c72e55ac5ccd40e5f3f85142d3238cd04fc --- /dev/null +++ b/model-00023-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b43e20babd6871217657425e7690a64407f62740150529ca6c7e27774a531c19 +size 4831839808 diff --git a/model-00024-of-00117.safetensors b/model-00024-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3fff7f1453ba811eb898f6f63183bae06c417d35 --- /dev/null +++ b/model-00024-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfff7ffae614189f50adff5519690df74290d122b241afd6de26319e051f011 +size 4781754608 diff --git a/model-00025-of-00117.safetensors b/model-00025-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29801d44c62c312e35ec85cedb4110a613cd44b9 --- /dev/null +++ b/model-00025-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ebf3ad56678d6860cac36cfcf9c42061129e91ebe8af6ac34c68ebefdece0b +size 4831839808 diff --git a/model-00026-of-00117.safetensors b/model-00026-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..275410f3b93340154ab216535ac7d464eb85a832 --- /dev/null +++ b/model-00026-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3743756ea336fec881893a1236f70fc75d635783e42a574bfac763a7495f7d01 +size 4781754608 diff --git a/model-00027-of-00117.safetensors b/model-00027-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..952c515176b86091be37caae7000801729a387a3 --- /dev/null +++ b/model-00027-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41a7d915254a7191916791c7d26a700a3c9043f87e82b7244bdb88d554aff7f9 +size 4831839808 diff --git a/model-00028-of-00117.safetensors b/model-00028-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b2fe85049f840126152159633afdd2ae325fb3d --- /dev/null +++ b/model-00028-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804c38ba54981e0cc73ab557ed2e61676dc66802865b70d4363001ad53898ccf +size 4781754608 diff --git a/model-00029-of-00117.safetensors b/model-00029-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..405f1389b00286962141eddb5fc1db65298fb616 --- /dev/null +++ b/model-00029-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9a91e9b92885c8f316f8a5bb966fdc7293bc82ebeb0f3e867f9d2ea8870623 +size 4831839808 diff --git a/model-00030-of-00117.safetensors b/model-00030-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f89dfca0e7eca3c241c86ffa835dc2fdfb31b18e --- /dev/null +++ b/model-00030-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd9a6501726d7f0616eef570b17a26c9ff30a1295b7b2ffb7ffebb635f54347f +size 4781754608 diff --git a/model-00031-of-00117.safetensors b/model-00031-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fa747ccf224e041f9ce9e1cb231f411a0787675 --- /dev/null +++ b/model-00031-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f3ce74a0727d90631ffe4aa90a5c3c43caa32d3e8c7f0400f0bf18ed4ad6caf +size 4831839808 diff --git a/model-00032-of-00117.safetensors b/model-00032-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..796d861e9dbfada66d4c627c75d6fcf787487551 --- /dev/null +++ b/model-00032-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd28605ac4b054904ed535da531cbd965e812b2b2d0a6a8fa5825a8973d4d11 +size 4781754608 diff --git a/model-00033-of-00117.safetensors b/model-00033-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b003d5df49a9e02f20c006889d66581b72055f1 --- /dev/null +++ b/model-00033-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ec7fee4a70220c3dae069c26858b6a8fa0ea8da756199ea005c77274800452 +size 4831839808 diff --git a/model-00034-of-00117.safetensors b/model-00034-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f20a25e010a5b5ba6fb67d41724513a66f3d4d2 --- /dev/null +++ b/model-00034-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc54c2e044db500106bfff731c7d1ae0aa3c5cfa38e366f7b999c8fbf8f4cf5 +size 4781754608 diff --git a/model-00035-of-00117.safetensors b/model-00035-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c09c03fefd8e2bb58ab37642fbbb80834b01708 --- /dev/null +++ b/model-00035-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725e9151be7acebc2d1f40e10d8e11688254f540cbfa2d06effb0d19c43410e4 +size 4831839808 diff --git a/model-00036-of-00117.safetensors b/model-00036-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a260b650ba3fdba6f45b2c2d52b261c8e91b341 --- /dev/null +++ b/model-00036-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdbc7a37fa36bea7902e2af4f7846f1be91b13900a5f6646589b2d16ac376058 +size 4781754608 diff --git a/model-00037-of-00117.safetensors b/model-00037-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00f309d6ab24efe113a43e34a7de3e8f13136551 --- /dev/null +++ b/model-00037-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a35439c786572df3894619e205335af2cf2e1a8e048d1b21591e49f5ed850c8 +size 4831839808 diff --git a/model-00038-of-00117.safetensors b/model-00038-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9396940762c0d5cc05ffccf8880cb2104fb84f9f --- /dev/null +++ b/model-00038-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d28f9941ee17425604a60baaabb4984f153d1ba0f3ec4e2de7d68335a7adf2fc +size 4781754608 diff --git a/model-00039-of-00117.safetensors b/model-00039-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86170dcd5d78f91ef07e9860b1127a50fda527f8 --- /dev/null +++ b/model-00039-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ddc705a73b3ebce9ca29d33a7cc7dc0743b5e8c933d1c90197df174a850e0cc +size 4831839808 diff --git a/model-00040-of-00117.safetensors b/model-00040-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03bfb579560d3999f611de318538317b99e19777 --- /dev/null +++ b/model-00040-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3878184f65b8bc564508993fc30dda46d8d784e868f0c23f177d3299e308fb +size 4781754608 diff --git a/model-00041-of-00117.safetensors b/model-00041-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ce6f5b91b6eddbd7d2c87edd516f785bec6ba0e --- /dev/null +++ b/model-00041-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7cac09374e76bd9c1f9f80d80cfbe4b64d090b1cb5eaf168fcf0f40e141ad81 +size 4831839808 diff --git a/model-00042-of-00117.safetensors b/model-00042-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0101537d91875056f34cb2403838a24e593ef10d --- /dev/null +++ b/model-00042-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda386bcc8c334fa0dfa7138047328c725cdad7a4545aa264e73cb03eb86f3d0 +size 4781754608 diff --git a/model-00043-of-00117.safetensors b/model-00043-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32c45d768fe782059d596872eaa24e1d888db5c8 --- /dev/null +++ b/model-00043-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589fff7f9a88219454b4c97e6dd193454689e2fbf0ff5981a1a300a27d37ef60 +size 4831839808 diff --git a/model-00044-of-00117.safetensors b/model-00044-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e3a2a5bf74c3347b2831dde68e0cf7092d465d1 --- /dev/null +++ b/model-00044-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42661daae5c7c390451023b2d437f881a3c39e706a503187b990a62a099ae8e2 +size 4781754608 diff --git a/model-00045-of-00117.safetensors b/model-00045-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ed5f9bf9992e7fbf27b6492b8ce42850176f6a7 --- /dev/null +++ b/model-00045-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b0ad62cf5602162fd32c5e8ccd4d1350a9701a338c8bb09671596e328e9ff7d +size 4831839808 diff --git a/model-00046-of-00117.safetensors b/model-00046-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0db31a0fe05f52f74410618284fb025ea67ab6c --- /dev/null +++ b/model-00046-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd723ead7e89c235808eb87de2309a0dc530d8dd45a9de31203fc4ba9ca4966 +size 4982884256 diff --git a/model-00047-of-00117.safetensors b/model-00047-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0335edaf637c3a1b8e2c5739593f8ef6c20535f --- /dev/null +++ b/model-00047-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:112ec0208367672b71264d32e85a6b641f2702dfbe8e7752f861971d7325b21c +size 4630710168 diff --git a/model-00048-of-00117.safetensors b/model-00048-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b06dc67acc15632a35795ddb94ee560877999d84 --- /dev/null +++ b/model-00048-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e77dd122ceac479849c8f63995ae6952e15ae18a56040ed27c5a513abf3927 +size 4831839808 diff --git a/model-00049-of-00117.safetensors b/model-00049-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13005aab1485f0c4e16b5f0a0862202cc88d4c97 --- /dev/null +++ b/model-00049-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d3a5109f50d34d0d32ba5d3dd4ef450cfb728d1ad9ee493bf3826c554531d1 +size 4781754608 diff --git a/model-00050-of-00117.safetensors b/model-00050-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74752e1306613d489aa5cdbd91c479179a93af30 --- /dev/null +++ b/model-00050-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7ca1d1e8a35b03a9ccc803dab40a1581e965b26e81286d665da8ae670e4924 +size 4831839808 diff --git a/model-00051-of-00117.safetensors b/model-00051-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e3172f695d9d00e4643706cea4700dc1bedf4ddb --- /dev/null +++ b/model-00051-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ed305f4b70d7ba39bf3029fda9ed310699548e7e563107507cad15a5edd8f75 +size 4781754608 diff --git a/model-00052-of-00117.safetensors b/model-00052-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8726dcaf13fbfa35adb9597e7f44c28caa9782bf --- /dev/null +++ b/model-00052-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27113c3ae202971eede1a4b0f9a285adeb2a9825b59587b7b3bdc2e739bb012e +size 4831839808 diff --git a/model-00053-of-00117.safetensors b/model-00053-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a35deab54026fc467c39afef1474f8326884f7f5 --- /dev/null +++ b/model-00053-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5698c3037c9e66acd41ae562d174fc942da4fbb6e1ae5d38d122466aac2b6313 +size 4781754608 diff --git a/model-00054-of-00117.safetensors b/model-00054-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1ec791e245327fff5c572f6e10029d906cf3f66 --- /dev/null +++ b/model-00054-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c539691fb551c0ff9406d7efccdad924de38e44fe1c3dde7b8e2ac4e713e47 +size 4831839808 diff --git a/model-00055-of-00117.safetensors b/model-00055-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01f60e6c1996839c1c13585a98fafe9ec5ca298e --- /dev/null +++ b/model-00055-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced66bc91d5f1c2982537ebcefd3f63972f242cf16dee117c8bfee2f376761e2 +size 4781754608 diff --git a/model-00056-of-00117.safetensors b/model-00056-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc6074d683918db3ad529c764f9c0f504dacd8d7 --- /dev/null +++ b/model-00056-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616bb97d8c912b74348a68dfed91ea917942095a036f1c24deea87fe166eb69c +size 4831839808 diff --git a/model-00057-of-00117.safetensors b/model-00057-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a430f65d0abc9451ce68c89008c84331fc85ede --- /dev/null +++ b/model-00057-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8df159d59cc14e95ef6fab112aae41942b93e834d8ba69230606cc40f1ef3ed +size 4781754608 diff --git a/model-00058-of-00117.safetensors b/model-00058-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c09bc1ef3c57db83d1faca5969adcd8691d49548 --- /dev/null +++ b/model-00058-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5136729ac1b5bf30d86eb60be57ed4194933cbfaf1073d7bcd303967fb68986a +size 4831839808 diff --git a/model-00059-of-00117.safetensors b/model-00059-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95df1c3e40bdea8028d529e6193ddb6aa87ceafc --- /dev/null +++ b/model-00059-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327a00eba0426677ef5774a37ec0f27c0ee360f7b20120157bd013210c0d0008 +size 4781754608 diff --git a/model-00060-of-00117.safetensors b/model-00060-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b5dad656e9977910238e2c927d7daa20a90c914 --- /dev/null +++ b/model-00060-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199355d37eec989a64dbabaea1136d1e580dff83be47b9410bb21f52c4e776c3 +size 4831839808 diff --git a/model-00061-of-00117.safetensors b/model-00061-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..266dc38fd90bdf2b0a74055b18030b26d868cefd --- /dev/null +++ b/model-00061-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c36be2c27a0c85c3f62edd2b43c6b6ee80fb20089a046f0f89f4b5f0437ce955 +size 4781754608 diff --git a/model-00062-of-00117.safetensors b/model-00062-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e658c48d9c49f193916f7eb229ba638b95c1eb9c --- /dev/null +++ b/model-00062-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de1d0a94786bffbbdf2a4e7d4d9f0e255db2b25687c321990cbd714f65ca5cb3 +size 4831839808 diff --git a/model-00063-of-00117.safetensors b/model-00063-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..989bd8798817751b4aa0901436b42b60fd6c4ace --- /dev/null +++ b/model-00063-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec444ac6c07aed4bb4c5f5248824001c2c4b6b38bd19b4cc8c9e66e565966973 +size 4781754608 diff --git a/model-00064-of-00117.safetensors b/model-00064-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d7d03c8cd1f1664d3674226741c4ce08a0091ca --- /dev/null +++ b/model-00064-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f647c05f9bc7fc0605a57e523d25a6f20d60ddfa2878de2d25318b3891b0f827 +size 4831839808 diff --git a/model-00065-of-00117.safetensors b/model-00065-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd0b3efb0c96872ee13ba77d9bce25dc71b7e2a1 --- /dev/null +++ b/model-00065-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e06f9879b8e7c3d23fae6f54faf312cdb86b9d5aace708f0048c6f11c0e721 +size 4781754608 diff --git a/model-00066-of-00117.safetensors b/model-00066-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9fb2bf69fe995f6bace33e90d9db69df478f8b7 --- /dev/null +++ b/model-00066-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a8af1ff2a3876a12a9249f01a0f4f5d739763320a1b2128fde43a20c4f212a +size 4831839808 diff --git a/model-00067-of-00117.safetensors b/model-00067-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e825183f3b7e1ea52930546820e197dc13cb6723 --- /dev/null +++ b/model-00067-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956c85ffffc49162751d029a8a65487a31be3b8f343e7585eb904f566ae18dc4 +size 4781754608 diff --git a/model-00068-of-00117.safetensors b/model-00068-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87ab7821e4312c7d1e71e077e2a0835a751f3418 --- /dev/null +++ b/model-00068-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b123d1e0e12242827329dc680859793d507231e2b2628768e852a10b9d49056b +size 4831839808 diff --git a/model-00069-of-00117.safetensors b/model-00069-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97ce10ff48370d3f909bcfabf48cd0f47921a2eb --- /dev/null +++ b/model-00069-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b916a5e6c31d7beed1badfba77e3fff0ead61843758faa8bd0a38107b658a39 +size 4781754608 diff --git a/model-00070-of-00117.safetensors b/model-00070-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d01a011e59dc2718d30d893414730589cf11f213 --- /dev/null +++ b/model-00070-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8232e505a56c08ad74f003a249a94910c8c2cd206b5f9c0d741b059964d04ff +size 4831839808 diff --git a/model-00071-of-00117.safetensors b/model-00071-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc78c2940340bd4600a90b7c44364c2b891b383f --- /dev/null +++ b/model-00071-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d537bf65941eec23391806e1362b4ac25963ec6edef9dc30095dfbd6e82d58de +size 4982884256 diff --git a/model-00072-of-00117.safetensors b/model-00072-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52c28e60e02f88d24709696bd8cafb313316a1ff --- /dev/null +++ b/model-00072-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c1fa2b54ada67f45125582448b60c17725c03808dd878ad6a8b8a98af6284e7 +size 4630710168 diff --git a/model-00073-of-00117.safetensors b/model-00073-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1b8e4b4829fd581501463a675d6547e16e11fdf --- /dev/null +++ b/model-00073-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a3c56a3d50fe133b585bf8dcc09b54ff5e9c243444ff8eb21be45a75880b43 +size 4831839808 diff --git a/model-00074-of-00117.safetensors b/model-00074-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aae92bbc70dd349a2b17c587116ef8cb8fffe4d1 --- /dev/null +++ b/model-00074-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ccd0e8ebcc78508ddf91abb430664069fbebc218c871b0d2a8b153ca8035de +size 4781754608 diff --git a/model-00075-of-00117.safetensors b/model-00075-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca4b23af16f8c8028665780e9d8bd2058851c020 --- /dev/null +++ b/model-00075-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6c52b58f8f38edea4f6705efa2679c2d7223cc3e0e3069e99d46c6caeab1702 +size 4831839808 diff --git a/model-00076-of-00117.safetensors b/model-00076-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..62b2111e5c6b0d109f62ec9b58c595a0bd719aa5 --- /dev/null +++ b/model-00076-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8539a58d1db169ba05fd7d0f8755d502ddfd2adcb587facdf5f427f7252cca +size 4781754608 diff --git a/model-00077-of-00117.safetensors b/model-00077-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9c1f3d13222a8f30372317f62fa1ee60bb7bc98 --- /dev/null +++ b/model-00077-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4de8352cbbc966ecded8da36a2b930c766cdf6fb20c87e7e57d29c326d8487d0 +size 4831839808 diff --git a/model-00078-of-00117.safetensors b/model-00078-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90ac8a40d34d92d8026d5ef92202b76fe502aa5a --- /dev/null +++ b/model-00078-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6032e83a23d2c574cf8de3397191312413df0b7fc286fbf8ef6bafe503545fcd +size 4781754608 diff --git a/model-00079-of-00117.safetensors b/model-00079-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c20167b065743dacdaff871067020b4f13d86e95 --- /dev/null +++ b/model-00079-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62946f66cf38492d8cb10b78cc070f724f8ed840b7e6e3a6ade46b2ba84a6519 +size 4831839808 diff --git a/model-00080-of-00117.safetensors b/model-00080-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e344d473b365f3869bb5c587b9710c9ad2dbf657 --- /dev/null +++ b/model-00080-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d9208b64f40c3b74ab7a810b340f6630915ccb357f4fbece5054f06ed646ca +size 4781754608 diff --git a/model-00081-of-00117.safetensors b/model-00081-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e57dcaece4390d1822326a1ba4e0a15c58d193d9 --- /dev/null +++ b/model-00081-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aa734eeb65f199347af77dc4c49ce025aafcb54c32723f5dfc068c3f3a4e7db +size 4831839808 diff --git a/model-00082-of-00117.safetensors b/model-00082-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85f49155f6d03a512623ee5cd3451472a8d0e258 --- /dev/null +++ b/model-00082-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dfd67e612237f125ba13d1e89ec9ac92633995cd9f72344baa8d75cbd959ab9 +size 4781754608 diff --git a/model-00083-of-00117.safetensors b/model-00083-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f00a292b881b343e2dcca7b70664510f802729a7 --- /dev/null +++ b/model-00083-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d789dc6167885a5a28addfc55dd6ee1c51a0d3e63b8ca17ffbcf5a549a8f35f +size 4831839808 diff --git a/model-00084-of-00117.safetensors b/model-00084-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1729132ccbe11abc04367ec949001a2390f3955 --- /dev/null +++ b/model-00084-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b454a23ac6a8de917939ba33212a58304d7f4c3ac58e2e615c80c426d3ae9445 +size 4781754608 diff --git a/model-00085-of-00117.safetensors b/model-00085-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fcf82ff410a9c1619c2089d25fcaab368cc55c8e --- /dev/null +++ b/model-00085-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6c92e6b194fd518f4d52fe940906cdac6bb3a020ae00d738bc4f289ea591f7 +size 4831839808 diff --git a/model-00086-of-00117.safetensors b/model-00086-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c622fbd13ae04480e4e5cb05fb705361ab8498e8 --- /dev/null +++ b/model-00086-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2f6c53bb9728ab5bdf61b664d42ea2f7f09cf13134e0e15de93eca1337261d +size 4781754608 diff --git a/model-00087-of-00117.safetensors b/model-00087-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe704f0496acc9ca169446fd2678ae0b0e24c7f3 --- /dev/null +++ b/model-00087-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1133cabde9849afee05d6973652321790be026860ebf55a38664e6ccf33106f1 +size 4831839808 diff --git a/model-00088-of-00117.safetensors b/model-00088-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3613771e366279abbd906ba89093da46e44265fb --- /dev/null +++ b/model-00088-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c12e879ad84a76141484e5cddea8b7eec19dfc2936a32e71b8b1bf855d34b7cb +size 4781754608 diff --git a/model-00089-of-00117.safetensors b/model-00089-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c81362dc64fed490dda385a52bc5e23e17b6e7b --- /dev/null +++ b/model-00089-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8ef3e84c628e3d8861564439c6d2747ed4ccdbd4c1475bc4c39ae213ee0d1f +size 4831839808 diff --git a/model-00090-of-00117.safetensors b/model-00090-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6ea20cce2323c035faf417a7bf5a69548299ce8 --- /dev/null +++ b/model-00090-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c1484c19e1f5126a0fb725014a2081913322483dd88df5470a25cea8528b73 +size 4781754608 diff --git a/model-00091-of-00117.safetensors b/model-00091-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c3ff840f872209b68d352701187fd3ad56c2a30 --- /dev/null +++ b/model-00091-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f6cf943179e1b2a18431b524749d5b62474c738755df6cd01459cd111e0d03 +size 4831839808 diff --git a/model-00092-of-00117.safetensors b/model-00092-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13a9abf8426471de82f7b9fdfbd969d278ed785f --- /dev/null +++ b/model-00092-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9929ef7862da009439a7dbdcfbddc021a0e9f68b4f027742a70e95d06fb428 +size 4781754608 diff --git a/model-00093-of-00117.safetensors b/model-00093-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33ab17279b6fc824aad0dc11853223dfb812c2b4 --- /dev/null +++ b/model-00093-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99294f50a415c199682af4cf41d09eaf079f53abdc3b984b7dac1bf79a64ba81 +size 4831839808 diff --git a/model-00094-of-00117.safetensors b/model-00094-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ff08bc08f9075966baa5f7988fba50d899ee028 --- /dev/null +++ b/model-00094-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fed5b39174dbab5c0460e5de5919033cf69cb1252bc3b4afd43ada2f8af84fc +size 4781754608 diff --git a/model-00095-of-00117.safetensors b/model-00095-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b404dc647ab6823b34b9559f7a4da3598e9c4f65 --- /dev/null +++ b/model-00095-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f02de469341ef7bec68a395481c464b8aa8252fb2cdb88d375b14f933a576c +size 4831839808 diff --git a/model-00096-of-00117.safetensors b/model-00096-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77566d44e21a2e826cef5d1df8900db27b26737d --- /dev/null +++ b/model-00096-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ff58321cb1044e97451a430b48eb924ed79978c14da65f8e4c12685ec072975 +size 4982884256 diff --git a/model-00097-of-00117.safetensors b/model-00097-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6b24175b77ccc8faa91e5fddefd7473573594b4 --- /dev/null +++ b/model-00097-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef31a301510e7250211d3844c7895f711b888366f42391a7b717bc130425fb9 +size 4630710168 diff --git a/model-00098-of-00117.safetensors b/model-00098-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..54758f2ad88929cb686727606a5ce469644c5177 --- /dev/null +++ b/model-00098-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb6d408c158423fa499ec85545194e4f4f23f6e77016e3a8cb581f4a974c188 +size 4831839808 diff --git a/model-00099-of-00117.safetensors b/model-00099-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fc41671170d9e0ce3bf29e828b4bb73db663c29 --- /dev/null +++ b/model-00099-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f17557e689a4708d0d19efa2b56913787f0582bc11b6ceb09ea7ba4ab75143ec +size 4781754608 diff --git a/model-00100-of-00117.safetensors b/model-00100-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..771c42fb03b6fd9d0e396845f09a07e237d2708a --- /dev/null +++ b/model-00100-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8c1db36a1410741690ddc788a2e8f6a9b1eace6521bc8511d43bb26edc74e6 +size 4831839808 diff --git a/model-00101-of-00117.safetensors b/model-00101-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..113906114fbed5d4281a36960b0a68e42ef9bd59 --- /dev/null +++ b/model-00101-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5145d3317dfd907cef9ca025b626c16807f9a2b469d2b905d71e20a6383562 +size 4781754608 diff --git a/model-00102-of-00117.safetensors b/model-00102-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4a93db14604ae4399f8573be53958e22517128a --- /dev/null +++ b/model-00102-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc8a4a05ab4beda3a9ee4b842ff639956561172620031fc0e6bad0c3dd7a69f5 +size 4831839808 diff --git a/model-00103-of-00117.safetensors b/model-00103-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1fb5ed9216eff006d4c3d0df30cda63d8237109 --- /dev/null +++ b/model-00103-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b4533ee420817108906f2f2ec863d8c643e46e1c0a31e06b95dd10755a1ceb +size 4781754608 diff --git a/model-00104-of-00117.safetensors b/model-00104-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1308221de6635231cf85d18b7822acfdb7525b78 --- /dev/null +++ b/model-00104-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce80ef2d4d9e6885419f8dfd29bce5b9ae8984c6b96d59d906c6750a81d4cc8b +size 4831839808 diff --git a/model-00105-of-00117.safetensors b/model-00105-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6222faec9ee5f824be8ac8da44b281b51ec3043 --- /dev/null +++ b/model-00105-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a205df079c2d674b2dfbffec60049989f7f0557a7876369e85a990a10f64f40 +size 4781754608 diff --git a/model-00106-of-00117.safetensors b/model-00106-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16263e57f6477edd7dfd3bcae167696e46acd116 --- /dev/null +++ b/model-00106-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efaf9aea3f7bd52e46c69c895a742d649449b5514f374ab54de026222bbb6bba +size 4831839808 diff --git a/model-00107-of-00117.safetensors b/model-00107-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02698ec3ab414c5d29a4b2a94b17366a55541f59 --- /dev/null +++ b/model-00107-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d90fed792da7caf9f4fc20982bdb39c8a1950277d50b698c04628f41b9becda +size 4781754608 diff --git a/model-00108-of-00117.safetensors b/model-00108-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85ce69ee111178277dcb5d428e35952fe3a9d32b --- /dev/null +++ b/model-00108-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5c8aca1fede85b155e6c17e64dd29c32c83f7a8f5ec14c771e243136503565e +size 4831839808 diff --git a/model-00109-of-00117.safetensors b/model-00109-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08b9bfa6e1ff7ea3c4987608ca64e1ccbd10169e --- /dev/null +++ b/model-00109-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c58eea7411d6dda53a9265402244005fa59a77596811dd05dbc43ada7f21f08 +size 4781754608 diff --git a/model-00110-of-00117.safetensors b/model-00110-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2c557c3ed60d212b06ccfcf1675a477286288cc --- /dev/null +++ b/model-00110-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3768ebcca16bd651956b7e2f2d5cd45d336821860bb2b5976831cbc5760f67fc +size 4831839808 diff --git a/model-00111-of-00117.safetensors b/model-00111-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0683c78e301300f915ceaca842e8d2fecab4ee30 --- /dev/null +++ b/model-00111-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62ac7c9a9fbf91687a42a886ee3395d002f088b00004e9f2b96b4485b526852c +size 4781754608 diff --git a/model-00112-of-00117.safetensors b/model-00112-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cea592ae9d13a752b3feec351db241020106d21 --- /dev/null +++ b/model-00112-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb9dc310517855da3d3bed4b388bb6f138f4f3d6903984dc2515c9c269dc5ce1 +size 4831839808 diff --git a/model-00113-of-00117.safetensors b/model-00113-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dffe2652e0bc32072bbcddbe79d470368859d226 --- /dev/null +++ b/model-00113-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a62b6277eccf6e36ed5d9bf209db51008d318b7f2e510322b1e266eba1f2ac +size 4781754608 diff --git a/model-00114-of-00117.safetensors b/model-00114-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..205abf50a113b95c6772c79e0fd537d9f1ad5ab6 --- /dev/null +++ b/model-00114-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ebadb669bd3567689d75083aac5554362da1db61594cad1d597c62c8b1a3af9 +size 4831839808 diff --git a/model-00115-of-00117.safetensors b/model-00115-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea19d20c77a5a5e581c755cc6edd423ba5259b1d --- /dev/null +++ b/model-00115-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07d069c9a233421997d54806818e0dc5b541ec2521652ebcab7b2d8bf790dc2 +size 4781754608 diff --git a/model-00116-of-00117.safetensors b/model-00116-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df804c1b65e49ee211c37b0573ed9940a45c240b --- /dev/null +++ b/model-00116-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383857ebe74f03b39914d1778f95bb11a553c9dbc0957b2d4cbf20f59ee61c9b +size 4831839808 diff --git a/model-00117-of-00117.safetensors b/model-00117-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c8682ce9ab83ac9be04bdfc9168aed99bb6dad0 --- /dev/null +++ b/model-00117-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1dd817c457cee15ad195028f79602c0a29d9f241febda6226c8a95804459fb +size 4813088480 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..873380b7bd029dfe7f5b976438deaca1c7947cbe --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1746 @@ +{ + "metadata": { + "total_size": 562482634752 + }, + "weight_map": { + "lm_head.weight": "model-00117-of-00117.safetensors", + "model.embed_tokens.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00117.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00117.safetensors", + "model.layers.0.input_layernorm.weight": "model-00003-of-00117.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00117.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00117.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00117.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00117.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00117.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00003-of-00117.safetensors", + "model.layers.1.input_layernorm.weight": "model-00005-of-00117.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00005-of-00117.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00003-of-00117.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00117.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00003-of-00117.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00003-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00117.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00022-of-00117.safetensors", + "model.layers.10.input_layernorm.weight": "model-00024-of-00117.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00024-of-00117.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00022-of-00117.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00022-of-00117.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00021-of-00117.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00022-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00117.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00024-of-00117.safetensors", + "model.layers.11.input_layernorm.weight": "model-00026-of-00117.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00026-of-00117.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00024-of-00117.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00024-of-00117.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00024-of-00117.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00024-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00117.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00026-of-00117.safetensors", + "model.layers.12.input_layernorm.weight": "model-00028-of-00117.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00028-of-00117.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00026-of-00117.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00026-of-00117.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00026-of-00117.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00026-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00117.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00028-of-00117.safetensors", + "model.layers.13.input_layernorm.weight": "model-00030-of-00117.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00030-of-00117.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00028-of-00117.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00028-of-00117.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00028-of-00117.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00028-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00030-of-00117.safetensors", + "model.layers.14.input_layernorm.weight": "model-00032-of-00117.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00032-of-00117.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00030-of-00117.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00030-of-00117.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00030-of-00117.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00030-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00032-of-00117.safetensors", + "model.layers.15.input_layernorm.weight": "model-00034-of-00117.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00034-of-00117.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00032-of-00117.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00032-of-00117.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00032-of-00117.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00032-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00034-of-00117.safetensors", + "model.layers.16.input_layernorm.weight": "model-00036-of-00117.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00036-of-00117.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00034-of-00117.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00034-of-00117.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00034-of-00117.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00034-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00036-of-00117.safetensors", + "model.layers.17.input_layernorm.weight": "model-00038-of-00117.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00038-of-00117.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00036-of-00117.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00036-of-00117.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00036-of-00117.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00036-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00038-of-00117.safetensors", + "model.layers.18.input_layernorm.weight": "model-00040-of-00117.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00040-of-00117.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00038-of-00117.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00038-of-00117.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00038-of-00117.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00038-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00040-of-00117.safetensors", + "model.layers.19.input_layernorm.weight": "model-00042-of-00117.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00042-of-00117.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00040-of-00117.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00040-of-00117.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00040-of-00117.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00040-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00005-of-00117.safetensors", + "model.layers.2.input_layernorm.weight": "model-00007-of-00117.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00007-of-00117.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00005-of-00117.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00005-of-00117.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00005-of-00117.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00005-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00042-of-00117.safetensors", + "model.layers.20.input_layernorm.weight": "model-00044-of-00117.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00044-of-00117.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00042-of-00117.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00042-of-00117.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00042-of-00117.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00042-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00044-of-00117.safetensors", + "model.layers.21.input_layernorm.weight": "model-00046-of-00117.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00046-of-00117.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00044-of-00117.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00044-of-00117.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00044-of-00117.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00044-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00117.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00047-of-00117.safetensors", + "model.layers.22.input_layernorm.weight": "model-00049-of-00117.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00049-of-00117.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00047-of-00117.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00047-of-00117.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00046-of-00117.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00047-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00117.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00049-of-00117.safetensors", + "model.layers.23.input_layernorm.weight": "model-00051-of-00117.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00051-of-00117.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00049-of-00117.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00049-of-00117.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00049-of-00117.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00049-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00053-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00053-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00053-of-00117.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00051-of-00117.safetensors", + "model.layers.24.input_layernorm.weight": "model-00053-of-00117.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00053-of-00117.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00051-of-00117.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00051-of-00117.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00051-of-00117.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00051-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00117.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00053-of-00117.safetensors", + "model.layers.25.input_layernorm.weight": "model-00055-of-00117.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00055-of-00117.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00053-of-00117.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00053-of-00117.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00053-of-00117.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00053-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00055-of-00117.safetensors", + "model.layers.26.input_layernorm.weight": "model-00057-of-00117.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00057-of-00117.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00055-of-00117.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00055-of-00117.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00055-of-00117.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00055-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00057-of-00117.safetensors", + "model.layers.27.input_layernorm.weight": "model-00059-of-00117.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00059-of-00117.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00057-of-00117.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00057-of-00117.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00057-of-00117.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00057-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00059-of-00117.safetensors", + "model.layers.28.input_layernorm.weight": "model-00061-of-00117.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00061-of-00117.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00059-of-00117.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00059-of-00117.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00059-of-00117.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00059-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00061-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00061-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00061-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00061-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00061-of-00117.safetensors", + "model.layers.29.input_layernorm.weight": "model-00063-of-00117.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00063-of-00117.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00061-of-00117.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00061-of-00117.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00061-of-00117.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00061-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00007-of-00117.safetensors", + "model.layers.3.input_layernorm.weight": "model-00009-of-00117.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00009-of-00117.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00007-of-00117.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00007-of-00117.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00007-of-00117.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00007-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00063-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00063-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00063-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00063-of-00117.safetensors", + "model.layers.30.input_layernorm.weight": "model-00065-of-00117.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00065-of-00117.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00063-of-00117.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00063-of-00117.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00063-of-00117.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00063-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00065-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00065-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00065-of-00117.safetensors", + "model.layers.31.input_layernorm.weight": "model-00067-of-00117.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00067-of-00117.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00065-of-00117.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00065-of-00117.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00065-of-00117.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00065-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00067-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00067-of-00117.safetensors", + "model.layers.32.input_layernorm.weight": "model-00069-of-00117.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00069-of-00117.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00067-of-00117.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00067-of-00117.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00067-of-00117.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00067-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00069-of-00117.safetensors", + "model.layers.33.input_layernorm.weight": "model-00071-of-00117.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00071-of-00117.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00069-of-00117.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00069-of-00117.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00069-of-00117.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00069-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00074-of-00117.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00072-of-00117.safetensors", + "model.layers.34.input_layernorm.weight": "model-00074-of-00117.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00074-of-00117.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00072-of-00117.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00072-of-00117.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00071-of-00117.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00072-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00076-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00076-of-00117.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00074-of-00117.safetensors", + "model.layers.35.input_layernorm.weight": "model-00076-of-00117.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00076-of-00117.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00074-of-00117.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00074-of-00117.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00074-of-00117.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00074-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00078-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00078-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00078-of-00117.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00076-of-00117.safetensors", + "model.layers.36.input_layernorm.weight": "model-00078-of-00117.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00078-of-00117.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00076-of-00117.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00076-of-00117.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00076-of-00117.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00076-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00080-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00080-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00080-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00080-of-00117.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00078-of-00117.safetensors", + "model.layers.37.input_layernorm.weight": "model-00080-of-00117.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00080-of-00117.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00078-of-00117.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00078-of-00117.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00078-of-00117.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00078-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00080-of-00117.safetensors", + "model.layers.38.input_layernorm.weight": "model-00082-of-00117.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00082-of-00117.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00080-of-00117.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00080-of-00117.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00080-of-00117.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00080-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00082-of-00117.safetensors", + "model.layers.39.input_layernorm.weight": "model-00084-of-00117.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00084-of-00117.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00082-of-00117.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00082-of-00117.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00082-of-00117.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00082-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00009-of-00117.safetensors", + "model.layers.4.input_layernorm.weight": "model-00011-of-00117.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00011-of-00117.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00009-of-00117.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00009-of-00117.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00009-of-00117.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00009-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00084-of-00117.safetensors", + "model.layers.40.input_layernorm.weight": "model-00086-of-00117.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00086-of-00117.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00084-of-00117.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00084-of-00117.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00084-of-00117.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00084-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00086-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00086-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00086-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00086-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00086-of-00117.safetensors", + "model.layers.41.input_layernorm.weight": "model-00088-of-00117.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00088-of-00117.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00086-of-00117.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00086-of-00117.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00086-of-00117.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00086-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00088-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00088-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00088-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00088-of-00117.safetensors", + "model.layers.42.input_layernorm.weight": "model-00090-of-00117.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00090-of-00117.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00088-of-00117.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00088-of-00117.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00088-of-00117.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00088-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00090-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00090-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00090-of-00117.safetensors", + "model.layers.43.input_layernorm.weight": "model-00092-of-00117.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00092-of-00117.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00090-of-00117.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00090-of-00117.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00090-of-00117.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00090-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00092-of-00117.safetensors", + "model.layers.44.input_layernorm.weight": "model-00094-of-00117.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00094-of-00117.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00092-of-00117.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00092-of-00117.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00092-of-00117.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00092-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00094-of-00117.safetensors", + "model.layers.45.input_layernorm.weight": "model-00096-of-00117.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00096-of-00117.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00094-of-00117.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00094-of-00117.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00094-of-00117.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00094-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00099-of-00117.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00097-of-00117.safetensors", + "model.layers.46.input_layernorm.weight": "model-00099-of-00117.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00099-of-00117.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00097-of-00117.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00097-of-00117.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00096-of-00117.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00097-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00101-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00101-of-00117.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00099-of-00117.safetensors", + "model.layers.47.input_layernorm.weight": "model-00101-of-00117.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00101-of-00117.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00099-of-00117.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00099-of-00117.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00099-of-00117.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00099-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00103-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00103-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00103-of-00117.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00101-of-00117.safetensors", + "model.layers.48.input_layernorm.weight": "model-00103-of-00117.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00103-of-00117.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00101-of-00117.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00101-of-00117.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00101-of-00117.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00101-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00105-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00105-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00105-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00105-of-00117.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00103-of-00117.safetensors", + "model.layers.49.input_layernorm.weight": "model-00105-of-00117.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00105-of-00117.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00103-of-00117.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00103-of-00117.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00103-of-00117.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00103-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00011-of-00117.safetensors", + "model.layers.5.input_layernorm.weight": "model-00013-of-00117.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00013-of-00117.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00011-of-00117.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00011-of-00117.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00011-of-00117.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00011-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00105-of-00117.safetensors", + "model.layers.50.input_layernorm.weight": "model-00107-of-00117.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00107-of-00117.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00105-of-00117.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00105-of-00117.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00105-of-00117.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00105-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00107-of-00117.safetensors", + "model.layers.51.input_layernorm.weight": "model-00109-of-00117.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00109-of-00117.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00107-of-00117.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00107-of-00117.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00107-of-00117.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00107-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w1.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w2.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w3.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w1.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w2.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w3.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w1.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w2.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w3.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w1.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w2.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w3.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.gate.weight": "model-00109-of-00117.safetensors", + "model.layers.52.input_layernorm.weight": "model-00111-of-00117.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00111-of-00117.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00109-of-00117.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00109-of-00117.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00109-of-00117.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00109-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00111-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00111-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00111-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00111-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w1.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w2.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w3.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w1.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w2.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w3.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w1.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w2.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w3.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w1.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w2.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w3.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.gate.weight": "model-00111-of-00117.safetensors", + "model.layers.53.input_layernorm.weight": "model-00113-of-00117.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00113-of-00117.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00111-of-00117.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00111-of-00117.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00111-of-00117.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00111-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00113-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00113-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00113-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w1.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w2.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w3.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w1.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w2.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w3.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w1.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w2.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w3.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w1.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w2.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w3.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.gate.weight": "model-00113-of-00117.safetensors", + "model.layers.54.input_layernorm.weight": "model-00115-of-00117.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00115-of-00117.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00113-of-00117.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00113-of-00117.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00113-of-00117.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00113-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00115-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00115-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w1.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w2.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w3.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w1.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w2.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w3.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w1.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w2.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w3.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w1.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w2.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w3.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.gate.weight": "model-00115-of-00117.safetensors", + "model.layers.55.input_layernorm.weight": "model-00117-of-00117.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00117-of-00117.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00115-of-00117.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00115-of-00117.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00115-of-00117.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00115-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00013-of-00117.safetensors", + "model.layers.6.input_layernorm.weight": "model-00015-of-00117.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00015-of-00117.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00013-of-00117.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00013-of-00117.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00013-of-00117.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00013-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00015-of-00117.safetensors", + "model.layers.7.input_layernorm.weight": "model-00017-of-00117.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00017-of-00117.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00015-of-00117.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00015-of-00117.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00015-of-00117.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00015-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00017-of-00117.safetensors", + "model.layers.8.input_layernorm.weight": "model-00019-of-00117.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00019-of-00117.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00017-of-00117.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00017-of-00117.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00017-of-00117.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00017-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00019-of-00117.safetensors", + "model.layers.9.input_layernorm.weight": "model-00021-of-00117.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00021-of-00117.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00019-of-00117.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00019-of-00117.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00019-of-00117.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00019-of-00117.safetensors", + "model.norm.weight": "model-00117-of-00117.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..40b1c6dadc2aed5b9e61dc7f9c7299e0aee16069 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a00b7631fa5bf71e294fba19eafd227f29cdc89 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,61 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +}