aapot
commited on
Commit
•
5b4c301
1
Parent(s):
2cc762b
Add 50k train step model
Browse files- config.gin +1 -4
- config.json +2 -2
- convert_t5x_checkpoint_to_flax.py +44 -43
- flax_model.msgpack +1 -1
- model-info.txt +0 -0
- pytorch_model.bin +1 -1
- small_nl16.gin +1 -1
- small_nl16_pretrain.gin +0 -1
- train/{events.out.tfevents.1661710468.t1v-n-12f94ad0-w-0.60675.0.v2 → events.out.tfevents.1664039578.t1v-n-12f94ad0-w-0.2066226.0.v2} +2 -2
- training_eval/pretrain_finnish/{events.out.tfevents.1661710468.t1v-n-12f94ad0-w-0.60675.1.v2 → events.out.tfevents.1664039578.t1v-n-12f94ad0-w-0.2066226.1.v2} +2 -2
config.gin
CHANGED
@@ -2,7 +2,7 @@ from __gin__ import dynamic_registration
|
|
2 |
import __main__ as train_script
|
3 |
import seqio
|
4 |
from t5x import adafactor
|
5 |
-
from t5x.examples.
|
6 |
from t5x import gin_utils
|
7 |
from t5x import models
|
8 |
from t5x import partitioning
|
@@ -116,8 +116,6 @@ network.T5Config.mlp_dim = 2048
|
|
116 |
network.T5Config.num_decoder_layers = 16
|
117 |
network.T5Config.num_encoder_layers = 16
|
118 |
network.T5Config.num_heads = 8
|
119 |
-
network.T5Config.remat_policy = 'minimal'
|
120 |
-
network.T5Config.scan_layers = True
|
121 |
network.T5Config.vocab_size = 32128
|
122 |
|
123 |
# Parameters for train_script.train:
|
@@ -135,7 +133,6 @@ train_script.train.total_steps = %TRAIN_STEPS
|
|
135 |
train_script.train.train_dataset_cfg = @train/utils.DatasetConfig()
|
136 |
train_script.train.train_eval_dataset_cfg = @train_eval/utils.DatasetConfig()
|
137 |
train_script.train.trainer_cls = @trainer.Trainer
|
138 |
-
train_script.train.use_gda = False
|
139 |
train_script.train.use_hardware_rng = %USE_HARDWARE_RNG
|
140 |
|
141 |
# Parameters for trainer.Trainer:
|
|
|
2 |
import __main__ as train_script
|
3 |
import seqio
|
4 |
from t5x import adafactor
|
5 |
+
from t5x.examples.t5 import network
|
6 |
from t5x import gin_utils
|
7 |
from t5x import models
|
8 |
from t5x import partitioning
|
|
|
116 |
network.T5Config.num_decoder_layers = 16
|
117 |
network.T5Config.num_encoder_layers = 16
|
118 |
network.T5Config.num_heads = 8
|
|
|
|
|
119 |
network.T5Config.vocab_size = 32128
|
120 |
|
121 |
# Parameters for train_script.train:
|
|
|
133 |
train_script.train.train_dataset_cfg = @train/utils.DatasetConfig()
|
134 |
train_script.train.train_eval_dataset_cfg = @train_eval/utils.DatasetConfig()
|
135 |
train_script.train.trainer_cls = @trainer.Trainer
|
|
|
136 |
train_script.train.use_hardware_rng = %USE_HARDWARE_RNG
|
137 |
|
138 |
# Parameters for trainer.Trainer:
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
@@ -26,7 +26,7 @@
|
|
26 |
"relative_attention_num_buckets": 32,
|
27 |
"tie_word_embeddings": false,
|
28 |
"torch_dtype": "float32",
|
29 |
-
"transformers_version": "4.
|
30 |
"use_cache": true,
|
31 |
"vocab_size": 32128
|
32 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./",
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
|
|
26 |
"relative_attention_num_buckets": 32,
|
27 |
"tie_word_embeddings": false,
|
28 |
"torch_dtype": "float32",
|
29 |
+
"transformers_version": "4.22.1",
|
30 |
"use_cache": true,
|
31 |
"vocab_size": 32128
|
32 |
}
|
convert_t5x_checkpoint_to_flax.py
CHANGED
@@ -12,36 +12,37 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f
|
|
12 |
flax_model = FlaxT5ForConditionalGeneration(config=config)
|
13 |
t5x_model = checkpoints.load_t5x_checkpoint(t5x_checkpoint_path)
|
14 |
|
15 |
-
split_mlp_wi = "wi_0" in t5x_model["target"]["encoder"]["
|
16 |
|
17 |
# Encoder
|
18 |
for layer_index in range(config.num_layers):
|
|
|
19 |
|
20 |
# Self-Attention
|
21 |
-
t5x_attention_key = t5x_model["target"]["encoder"][
|
22 |
-
t5x_attention_out = t5x_model["target"]["encoder"][
|
23 |
-
t5x_attention_query = t5x_model["target"]["encoder"][
|
24 |
-
t5x_attention_value = t5x_model["target"]["encoder"][
|
25 |
|
26 |
## Layer Normalization
|
27 |
-
t5x_attention_layer_norm = t5x_model["target"]["encoder"][
|
28 |
|
29 |
if split_mlp_wi:
|
30 |
-
t5x_mlp_wi_0 = t5x_model["target"]["encoder"][
|
31 |
-
t5x_mlp_wi_1 = t5x_model["target"]["encoder"][
|
32 |
else:
|
33 |
-
t5x_mlp_wi = t5x_model["target"]["encoder"][
|
34 |
|
35 |
-
t5x_mlp_wo = t5x_model["target"]["encoder"][
|
36 |
|
37 |
## Layer Normalization
|
38 |
-
t5x_mlp_layer_norm = t5x_model["target"]["encoder"][
|
39 |
|
40 |
# Assigning
|
41 |
-
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"]["kernel"] = t5x_attention_key
|
42 |
-
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"]["kernel"] = t5x_attention_out
|
43 |
-
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"]["kernel"] = t5x_attention_query
|
44 |
-
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"]["kernel"] = t5x_attention_value
|
45 |
|
46 |
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"]["weight"] = t5x_attention_layer_norm
|
47 |
|
@@ -55,8 +56,8 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f
|
|
55 |
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"]["weight"] = t5x_mlp_layer_norm
|
56 |
|
57 |
# Only for layer 0:
|
58 |
-
t5x_encoder_rel_embedding = t5x_model["target"]["encoder"]["
|
59 |
-
flax_model.params["encoder"]["block"]["0"]["layer"]["0"]["SelfAttention"]["relative_attention_bias"]["embedding"] = t5x_encoder_rel_embedding
|
60 |
|
61 |
# Assigning
|
62 |
t5x_encoder_norm = t5x_model["target"]["encoder"]["encoder_norm"]["scale"]
|
@@ -64,49 +65,50 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f
|
|
64 |
|
65 |
# Decoder
|
66 |
for layer_index in range(config.num_decoder_layers):
|
|
|
67 |
|
68 |
# Self-Attention
|
69 |
-
t5x_attention_key = t5x_model["target"]["decoder"][
|
70 |
-
t5x_attention_out = t5x_model["target"]["decoder"][
|
71 |
-
t5x_attention_query = t5x_model["target"]["decoder"][
|
72 |
-
t5x_attention_value = t5x_model["target"]["decoder"][
|
73 |
|
74 |
## Layer Normalization
|
75 |
-
t5x_pre_attention_layer_norm = t5x_model["target"]["decoder"][
|
76 |
|
77 |
# Encoder-Decoder-Attention
|
78 |
-
t5x_enc_dec_attention_key = t5x_model["target"]["decoder"][
|
79 |
-
t5x_enc_dec_attention_out = t5x_model["target"]["decoder"][
|
80 |
-
t5x_enc_dec_attention_query = t5x_model["target"]["decoder"][
|
81 |
-
t5x_enc_dec_attention_value = t5x_model["target"]["decoder"][
|
82 |
|
83 |
## Layer Normalization
|
84 |
-
t5x_cross_layer_norm = t5x_model["target"]["decoder"][
|
85 |
|
86 |
# MLP
|
87 |
if split_mlp_wi:
|
88 |
-
t5x_mlp_wi_0 = t5x_model["target"]["decoder"][
|
89 |
-
t5x_mlp_wi_1 = t5x_model["target"]["decoder"][
|
90 |
else:
|
91 |
-
t5x_mlp_wi = t5x_model["target"]["decoder"][
|
92 |
|
93 |
-
t5x_mlp_wo = t5x_model["target"]["decoder"][
|
94 |
|
95 |
## Layer Normalization
|
96 |
-
tx5_mlp_layer_norm = t5x_model["target"]["decoder"][
|
97 |
|
98 |
# Assigning
|
99 |
-
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"]["kernel"] = t5x_attention_key
|
100 |
-
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"]["kernel"] = t5x_attention_out
|
101 |
-
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"]["kernel"] = t5x_attention_query
|
102 |
-
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"]["kernel"] = t5x_attention_value
|
103 |
|
104 |
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"]["weight"] = t5x_pre_attention_layer_norm
|
105 |
|
106 |
-
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["k"]["kernel"] = t5x_enc_dec_attention_key
|
107 |
-
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["o"]["kernel"] = t5x_enc_dec_attention_out
|
108 |
-
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["q"]["kernel"] = t5x_enc_dec_attention_query
|
109 |
-
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["v"]["kernel"] = t5x_enc_dec_attention_value
|
110 |
|
111 |
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"]["weight"] = t5x_cross_layer_norm
|
112 |
|
@@ -125,8 +127,8 @@ def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_f
|
|
125 |
flax_model.params["decoder"]["final_layer_norm"]["weight"] = tx5_decoder_norm
|
126 |
|
127 |
# Only for layer 0:
|
128 |
-
t5x_decoder_rel_embedding = t5x_model["target"]["decoder"]["
|
129 |
-
flax_model.params["decoder"]["block"]["0"]["layer"]["0"]["SelfAttention"]["relative_attention_bias"]["embedding"] = t5x_decoder_rel_embedding
|
130 |
|
131 |
# Token Embeddings
|
132 |
tx5_token_embeddings = t5x_model["target"]["token_embedder"]["embedding"]
|
@@ -158,5 +160,4 @@ if __name__ == "__main__":
|
|
158 |
args = parser.parse_args()
|
159 |
convert_t5x_checkpoint_to_flax(args.t5x_checkpoint_path, args.config_name, args.flax_dump_folder_path)
|
160 |
convert_flax_to_pytorch(args.flax_dump_folder_path, args.flax_dump_folder_path)
|
161 |
-
|
162 |
|
|
|
12 |
flax_model = FlaxT5ForConditionalGeneration(config=config)
|
13 |
t5x_model = checkpoints.load_t5x_checkpoint(t5x_checkpoint_path)
|
14 |
|
15 |
+
split_mlp_wi = "wi_0" in t5x_model["target"]["encoder"]["layers_0"]["mlp"]
|
16 |
|
17 |
# Encoder
|
18 |
for layer_index in range(config.num_layers):
|
19 |
+
layer_name = f"layers_{str(layer_index)}"
|
20 |
|
21 |
# Self-Attention
|
22 |
+
t5x_attention_key = t5x_model["target"]["encoder"][layer_name]["attention"]["key"]["kernel"]
|
23 |
+
t5x_attention_out = t5x_model["target"]["encoder"][layer_name]["attention"]["out"]["kernel"]
|
24 |
+
t5x_attention_query = t5x_model["target"]["encoder"][layer_name]["attention"]["query"]["kernel"]
|
25 |
+
t5x_attention_value = t5x_model["target"]["encoder"][layer_name]["attention"]["value"]["kernel"]
|
26 |
|
27 |
## Layer Normalization
|
28 |
+
t5x_attention_layer_norm = t5x_model["target"]["encoder"][layer_name]["pre_attention_layer_norm"]["scale"]
|
29 |
|
30 |
if split_mlp_wi:
|
31 |
+
t5x_mlp_wi_0 = t5x_model["target"]["encoder"][layer_name]["mlp"]["wi_0"]["kernel"]
|
32 |
+
t5x_mlp_wi_1 = t5x_model["target"]["encoder"][layer_name]["mlp"]["wi_1"]["kernel"]
|
33 |
else:
|
34 |
+
t5x_mlp_wi = t5x_model["target"]["encoder"][layer_name]["mlp"]["wi"]["kernel"]
|
35 |
|
36 |
+
t5x_mlp_wo = t5x_model["target"]["encoder"][layer_name]["mlp"]["wo"]["kernel"]
|
37 |
|
38 |
## Layer Normalization
|
39 |
+
t5x_mlp_layer_norm = t5x_model["target"]["encoder"][layer_name]["pre_mlp_layer_norm"]["scale"]
|
40 |
|
41 |
# Assigning
|
42 |
+
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"]["kernel"] = t5x_attention_key
|
43 |
+
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"]["kernel"] = t5x_attention_out
|
44 |
+
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"]["kernel"] = t5x_attention_query
|
45 |
+
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"]["kernel"] = t5x_attention_value
|
46 |
|
47 |
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"]["weight"] = t5x_attention_layer_norm
|
48 |
|
|
|
56 |
flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"]["weight"] = t5x_mlp_layer_norm
|
57 |
|
58 |
# Only for layer 0:
|
59 |
+
t5x_encoder_rel_embedding = t5x_model["target"]["encoder"]["relpos_bias"]["rel_embedding"].T
|
60 |
+
flax_model.params["encoder"]["block"]["0"]["layer"]["0"]["SelfAttention"]["relative_attention_bias"]["embedding"] = t5x_encoder_rel_embedding
|
61 |
|
62 |
# Assigning
|
63 |
t5x_encoder_norm = t5x_model["target"]["encoder"]["encoder_norm"]["scale"]
|
|
|
65 |
|
66 |
# Decoder
|
67 |
for layer_index in range(config.num_decoder_layers):
|
68 |
+
layer_name = f"layers_{str(layer_index)}"
|
69 |
|
70 |
# Self-Attention
|
71 |
+
t5x_attention_key = t5x_model["target"]["decoder"][layer_name]["self_attention"]["key"]["kernel"]
|
72 |
+
t5x_attention_out = t5x_model["target"]["decoder"][layer_name]["self_attention"]["out"]["kernel"]
|
73 |
+
t5x_attention_query = t5x_model["target"]["decoder"][layer_name]["self_attention"]["query"]["kernel"]
|
74 |
+
t5x_attention_value = t5x_model["target"]["decoder"][layer_name]["self_attention"]["value"]["kernel"]
|
75 |
|
76 |
## Layer Normalization
|
77 |
+
t5x_pre_attention_layer_norm = t5x_model["target"]["decoder"][layer_name]["pre_self_attention_layer_norm"]["scale"]
|
78 |
|
79 |
# Encoder-Decoder-Attention
|
80 |
+
t5x_enc_dec_attention_key = t5x_model["target"]["decoder"][layer_name]["encoder_decoder_attention"]["key"]["kernel"]
|
81 |
+
t5x_enc_dec_attention_out = t5x_model["target"]["decoder"][layer_name]["encoder_decoder_attention"]["out"]["kernel"]
|
82 |
+
t5x_enc_dec_attention_query = t5x_model["target"]["decoder"][layer_name]["encoder_decoder_attention"]["query"]["kernel"]
|
83 |
+
t5x_enc_dec_attention_value = t5x_model["target"]["decoder"][layer_name]["encoder_decoder_attention"]["value"]["kernel"]
|
84 |
|
85 |
## Layer Normalization
|
86 |
+
t5x_cross_layer_norm = t5x_model["target"]["decoder"][layer_name]["pre_cross_attention_layer_norm"]["scale"]
|
87 |
|
88 |
# MLP
|
89 |
if split_mlp_wi:
|
90 |
+
t5x_mlp_wi_0 = t5x_model["target"]["decoder"][layer_name]["mlp"]["wi_0"]["kernel"]
|
91 |
+
t5x_mlp_wi_1 = t5x_model["target"]["decoder"][layer_name]["mlp"]["wi_1"]["kernel"]
|
92 |
else:
|
93 |
+
t5x_mlp_wi = t5x_model["target"]["decoder"][layer_name]["mlp"]["wi"]["kernel"]
|
94 |
|
95 |
+
t5x_mlp_wo = t5x_model["target"]["decoder"][layer_name]["mlp"]["wo"]["kernel"]
|
96 |
|
97 |
## Layer Normalization
|
98 |
+
tx5_mlp_layer_norm = t5x_model["target"]["decoder"][layer_name]["pre_mlp_layer_norm"]["scale"]
|
99 |
|
100 |
# Assigning
|
101 |
+
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"]["kernel"] = t5x_attention_key
|
102 |
+
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"]["kernel"] = t5x_attention_out
|
103 |
+
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"]["kernel"] = t5x_attention_query
|
104 |
+
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"]["kernel"] = t5x_attention_value
|
105 |
|
106 |
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"]["weight"] = t5x_pre_attention_layer_norm
|
107 |
|
108 |
+
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["k"]["kernel"] = t5x_enc_dec_attention_key
|
109 |
+
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["o"]["kernel"] = t5x_enc_dec_attention_out
|
110 |
+
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["q"]["kernel"] = t5x_enc_dec_attention_query
|
111 |
+
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["v"]["kernel"] = t5x_enc_dec_attention_value
|
112 |
|
113 |
flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"]["weight"] = t5x_cross_layer_norm
|
114 |
|
|
|
127 |
flax_model.params["decoder"]["final_layer_norm"]["weight"] = tx5_decoder_norm
|
128 |
|
129 |
# Only for layer 0:
|
130 |
+
t5x_decoder_rel_embedding = t5x_model["target"]["decoder"]["relpos_bias"]["rel_embedding"].T
|
131 |
+
flax_model.params["decoder"]["block"]["0"]["layer"]["0"]["SelfAttention"]["relative_attention_bias"]["embedding"] = t5x_decoder_rel_embedding
|
132 |
|
133 |
# Token Embeddings
|
134 |
tx5_token_embeddings = t5x_model["target"]["token_embedder"]["embedding"]
|
|
|
160 |
args = parser.parse_args()
|
161 |
convert_t5x_checkpoint_to_flax(args.t5x_checkpoint_path, args.config_name, args.flax_dump_folder_path)
|
162 |
convert_flax_to_pytorch(args.flax_dump_folder_path, args.flax_dump_folder_path)
|
|
|
163 |
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 735762207
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d82a40e2a628be30c18aec9a0e0a7dd7e73eef7ec43ec213ad0c883f616c1b69
|
3 |
size 735762207
|
model-info.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 735867349
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94b61f6dc4f056da83ffa8f0786219a3eb1a297e88096524ed6a2c003200249b
|
3 |
size 735867349
|
small_nl16.gin
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
# T5.1.1 Efficient small nl16 model.
|
2 |
|
3 |
import seqio
|
4 |
-
include 't5x/examples/
|
5 |
|
6 |
# ------------------- Network specification overrides --------------------------
|
7 |
network.Transformer.config = @network.T5Config()
|
|
|
1 |
# T5.1.1 Efficient small nl16 model.
|
2 |
|
3 |
import seqio
|
4 |
+
include 't5x/examples/t5/t5_1_1/base.gin' # imports vocab, optimizer and model.
|
5 |
|
6 |
# ------------------- Network specification overrides --------------------------
|
7 |
network.Transformer.config = @network.T5Config()
|
small_nl16_pretrain.gin
CHANGED
@@ -12,7 +12,6 @@ include 't5x/configs/runs/pretrain.gin'
|
|
12 |
# ------------------- Training specification overrides --------------------------
|
13 |
train_script.train:
|
14 |
eval_period = 10000
|
15 |
-
use_gda = False
|
16 |
|
17 |
utils.SaveCheckpointConfig:
|
18 |
period = 10000
|
|
|
12 |
# ------------------- Training specification overrides --------------------------
|
13 |
train_script.train:
|
14 |
eval_period = 10000
|
|
|
15 |
|
16 |
utils.SaveCheckpointConfig:
|
17 |
period = 10000
|
train/{events.out.tfevents.1661710468.t1v-n-12f94ad0-w-0.60675.0.v2 → events.out.tfevents.1664039578.t1v-n-12f94ad0-w-0.2066226.0.v2}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efe928bebef1c949dca5e29eec1f2b4b26d042761ef3f6aa77f029a6d9538624
|
3 |
+
size 16734
|
training_eval/pretrain_finnish/{events.out.tfevents.1661710468.t1v-n-12f94ad0-w-0.60675.1.v2 → events.out.tfevents.1664039578.t1v-n-12f94ad0-w-0.2066226.1.v2}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb328b235587d5a36430d35207227a5c66091fc9e2b52bcb127b089b112215ca
|
3 |
+
size 9244
|