OpenBA-InstructGen / pytorch_model.bin.index.json
OpenBA
first commit
ec736cc
{
"metadata": {
"total_size": 29139601408
},
"weight_map": {
"decoder.block.0.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.inter_attn.kv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.inter_attn.kv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.inter_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.inter_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.inter_attn.q.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.inter_attn.q.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.0.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.post_inter_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.post_inter_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.0.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.inter_attn.kv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.inter_attn.kv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.inter_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.inter_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.inter_attn.q.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.inter_attn.q.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.1.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.post_inter_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.post_inter_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.1.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.10.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.10.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.10.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.11.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.11.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.12.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.12.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.13.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.13.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.14.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.14.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.15.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.15.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.16.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.16.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.17.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.17.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.18.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.18.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.19.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.19.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.2.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.inter_attn.kv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.inter_attn.kv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.inter_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.inter_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.inter_attn.q.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.inter_attn.q.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.2.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.post_inter_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.post_inter_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.2.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.20.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.20.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.20.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.21.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.21.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.22.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.22.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.23.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.23.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.24.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.24.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.25.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.25.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.26.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.26.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.27.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.27.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.28.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.28.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.inter_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.29.inter_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.29.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.29.mlp.fc_out.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.29.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.post_inter_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.29.post_inter_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.29.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.29.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.3.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.inter_attn.kv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.inter_attn.kv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.inter_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.inter_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.inter_attn.q.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.inter_attn.q.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.3.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.post_inter_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.post_inter_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.3.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.30.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.inter_attn.kv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.inter_attn.kv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.inter_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.inter_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.inter_attn.q.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.inter_attn.q.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.mlp.fc_out.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.post_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.post_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.post_inter_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.post_inter_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.self_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.self_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.self_attn.qkv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.30.self_attn.qkv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.inter_attn.kv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.inter_attn.kv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.inter_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.inter_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.inter_attn.q.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.inter_attn.q.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.mlp.fc_out.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.post_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.post_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.post_inter_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.post_inter_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.self_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.self_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.self_attn.qkv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.31.self_attn.qkv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.inter_attn.kv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.inter_attn.kv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.inter_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.inter_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.inter_attn.q.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.inter_attn.q.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.mlp.fc_out.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.post_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.post_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.post_inter_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.post_inter_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.self_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.self_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.self_attn.qkv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.32.self_attn.qkv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.inter_attn.kv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.inter_attn.kv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.inter_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.inter_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.inter_attn.q.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.inter_attn.q.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.mlp.fc_out.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.post_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.post_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.post_inter_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.post_inter_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.self_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.self_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.self_attn.qkv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.33.self_attn.qkv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.inter_attn.kv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.inter_attn.kv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.inter_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.inter_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.inter_attn.q.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.inter_attn.q.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.mlp.fc_out.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.post_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.post_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.post_inter_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.post_inter_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.self_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.self_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.self_attn.qkv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.34.self_attn.qkv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.inter_attn.kv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.inter_attn.kv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.inter_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.inter_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.inter_attn.q.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.inter_attn.q.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.mlp.fc_out.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.post_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.post_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.post_inter_attn_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.post_inter_attn_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.self_attn.o.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.self_attn.o.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.self_attn.qkv.bias": "pytorch_model-00003-of-00003.bin",
"decoder.block.35.self_attn.qkv.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.4.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.inter_attn.kv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.inter_attn.kv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.inter_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.inter_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.inter_attn.q.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.inter_attn.q.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.4.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.post_inter_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.post_inter_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.4.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.inter_attn.kv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.inter_attn.kv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.inter_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.inter_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.inter_attn.q.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.inter_attn.q.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.5.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.post_inter_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.post_inter_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.5.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.6.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.6.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.6.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.6.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"decoder.block.6.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"decoder.block.7.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.7.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.7.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.8.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.8.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.inter_attn.kv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.inter_attn.kv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.inter_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.inter_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.inter_attn.q.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.inter_attn.q.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.mlp.fc_in.weight": "pytorch_model-00003-of-00003.bin",
"decoder.block.9.mlp.fc_out.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.post_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.post_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.post_inter_attn_layernorm.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.post_inter_attn_layernorm.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.self_attn.o.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.self_attn.o.weight": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.self_attn.qkv.bias": "pytorch_model-00002-of-00003.bin",
"decoder.block.9.self_attn.qkv.weight": "pytorch_model-00002-of-00003.bin",
"decoder.final_layernorm.bias": "pytorch_model-00003-of-00003.bin",
"decoder.final_layernorm.weight": "pytorch_model-00003-of-00003.bin",
"encoder.block.0.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.0.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.1.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.10.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.11.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.2.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.3.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.4.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.5.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.6.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.7.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.8.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.mlp.fc_in.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.mlp.fc_out.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.post_attn_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.post_attn_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.self_attn.o.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.self_attn.o.weight": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.self_attn.qkv.bias": "pytorch_model-00001-of-00003.bin",
"encoder.block.9.self_attn.qkv.weight": "pytorch_model-00001-of-00003.bin",
"encoder.final_layernorm.bias": "pytorch_model-00001-of-00003.bin",
"encoder.final_layernorm.weight": "pytorch_model-00001-of-00003.bin",
"lm_head.bias": "pytorch_model-00003-of-00003.bin",
"lm_head.weight": "pytorch_model-00001-of-00003.bin",
"shared_embedding.weight": "pytorch_model-00001-of-00003.bin"
}
}