{ "metadata": { "total_size": 9524621312 }, "weight_map": { "encoder/block/0/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/0/SelfAttention/relative_attention_bias/embedding": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/0/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/1/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/10/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/11/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/12/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/13/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/14/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/15/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/16/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/17/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/18/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/19/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/2/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/2/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/2/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/2/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/2/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", "encoder/block/2/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/2/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", "encoder/block/2/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/2/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/20/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/21/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/22/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/23/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/3/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/4/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/5/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/6/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/7/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/8/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", "encoder/block/9/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", "encoder/final_layer_norm/weight": "flax_model-00002-of-00002.msgpack", "shared/embedding": "flax_model-00002-of-00002.msgpack" } }