GPTSAN-japanese / pytorch_model.bin.index.json
Tanrei's picture
Upload GPTSANJapaneseForConditionalGeneration
5ce809b
raw
history blame contribute delete
No virus
37.9 kB
{
"metadata": {
"total_size": 11116003968
},
"weight_map": {
"blocks.0.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin",
"blocks.0.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.0.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.0.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.0.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin",
"blocks.1.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.1.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.1.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.1.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin",
"blocks.2.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.2.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.2.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.2.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin",
"blocks.3.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.3.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.3.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.3.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin",
"blocks.4.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.4.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.4.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.4.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin",
"blocks.5.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.5.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.5.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.5.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin",
"blocks.6.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.6.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.6.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.6.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00001-of-00002.bin",
"blocks.7.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.7.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.7.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.7.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.mlp.router.classifier.weight": "pytorch_model-00001-of-00002.bin",
"blocks.8.FeedForward.norm.bias": "pytorch_model-00002-of-00002.bin",
"blocks.8.FeedForward.norm.weight": "pytorch_model-00002-of-00002.bin",
"blocks.8.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00002-of-00002.bin",
"blocks.8.SelfAttention.SelfAttention.o": "pytorch_model-00001-of-00002.bin",
"blocks.8.SelfAttention.SelfAttention.qkv": "pytorch_model-00001-of-00002.bin",
"blocks.8.SelfAttention.norm.bias": "pytorch_model-00001-of-00002.bin",
"blocks.8.SelfAttention.norm.weight": "pytorch_model-00001-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_0.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_0.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_1.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_1.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_10.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_10.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_11.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_11.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_12.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_12.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_13.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_13.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_14.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_14.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_15.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_15.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_2.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_2.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_3.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_3.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_4.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_4.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_5.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_5.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_6.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_6.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_7.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_7.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_8.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_8.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_9.wi.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.experts.expert_9.wo.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.mlp.router.classifier.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.norm.bias": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.norm.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.FeedForward.soft_bypass_mlp.weight": "pytorch_model-00002-of-00002.bin",
"blocks.9.SelfAttention.SelfAttention.o": "pytorch_model-00002-of-00002.bin",
"blocks.9.SelfAttention.SelfAttention.qkv": "pytorch_model-00002-of-00002.bin",
"blocks.9.SelfAttention.norm.bias": "pytorch_model-00002-of-00002.bin",
"blocks.9.SelfAttention.norm.weight": "pytorch_model-00002-of-00002.bin",
"embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
"logits.bias": "pytorch_model-00001-of-00002.bin",
"logits.weight": "pytorch_model-00001-of-00002.bin",
"position_embeddings.weight": "pytorch_model-00001-of-00002.bin",
"spout.0.weight": "pytorch_model-00002-of-00002.bin",
"spout.10.weight": "pytorch_model-00002-of-00002.bin",
"spout.12.weight": "pytorch_model-00002-of-00002.bin",
"spout.14.weight": "pytorch_model-00002-of-00002.bin",
"spout.16.weight": "pytorch_model-00002-of-00002.bin",
"spout.2.weight": "pytorch_model-00002-of-00002.bin",
"spout.4.weight": "pytorch_model-00002-of-00002.bin",
"spout.6.weight": "pytorch_model-00002-of-00002.bin",
"spout.8.weight": "pytorch_model-00002-of-00002.bin",
"token_bias": "pytorch_model-00001-of-00002.bin"
}
}