Add TF weights

Browse files

Fix weights after [#17785](https://github.com/huggingface/transformers/commit/abc400b06a8ab26cd438b6e9add3aad082ffc48f)

Files changed (4) hide show

tf_model-00001-of-00003.h5 +3 -0
tf_model-00002-of-00003.h5 +3 -0
tf_model-00003-of-00003.h5 +3 -0
tf_model.h5.index.json +651 -0

tf_model-00001-of-00003.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f1abdee1f888a33d9366f0c4c263c3cdd0c4aa6b88007d6fb8e422b7d242c91
+size 9975229760

tf_model-00002-of-00003.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6044db30527267d1bc072d816fa64cbc81efd0b149dcbbd1c41b245b997ad384
+size 9858987768

tf_model-00003-of-00003.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65aea614891119664aa51188867b5a11e89f82d14849b1c23935b7572e4e70c2
+size 5873414216

tf_model.h5.index.json ADDED Viewed

	@@ -0,0 +1,651 @@

+{
+  "metadata": {
+    "total_size": 25706946560
+  },
+  "weight_map": {
+    "tfopt_for_causal_lm/model/decoder/embed_positions/weight:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/embed_tokens/weight:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/fc2/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/fc2/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/final_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/final_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/fc1/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/fc1/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/k_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/k_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/out_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/out_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/q_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/q_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/v_proj/bias:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/v_proj/kernel:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn_layer_norm/beta:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/fc1/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/fc1/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/fc2/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/fc2/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/final_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/final_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/k_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/k_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/out_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/out_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/q_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/q_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/v_proj/bias:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/v_proj/kernel:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn_layer_norm/beta:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/fc1/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/fc1/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/fc2/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/fc2/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/final_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/final_layer_norm/gamma:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/k_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/k_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/out_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/out_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/q_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/q_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/v_proj/bias:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/v_proj/kernel:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn_layer_norm/beta:0": "tf_model-00001-of-00003.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00003.h5"
+  }
+}