aapot commited on
Commit
bab84ab
·
1 Parent(s): 752f635

Saving weights and logs of step 10000

Browse files
config.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
- "_name_or_path": "/researchdisk/roberta-large-finnish-wechsel",
3
  "architectures": [
4
- "RobertaModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
 
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
@@ -19,7 +20,7 @@
19
  "num_hidden_layers": 24,
20
  "pad_token_id": 1,
21
  "position_embedding_type": "absolute",
22
- "torch_dtype": "float64",
23
  "transformers_version": "4.13.0.dev0",
24
  "type_vocab_size": 1,
25
  "use_cache": true,
 
1
  {
2
+ "_name_or_path": "./",
3
  "architectures": [
4
+ "RobertaForMaskedLM"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 1024,
 
20
  "num_hidden_layers": 24,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
+ "torch_dtype": "float32",
24
  "transformers_version": "4.13.0.dev0",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
events.out.tfevents.1639865567.t1v-n-8eba1090-w-0.1317510.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90044db50f3fbfe6af4870c6eb29f5f290fbda578d2fffb342644aff2fa5a1cf
3
+ size 1471447
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9a123577826ae147f24d257b4f877eaa05fd6b67d294bef5786cd5b174f7eb7
3
- size 1421452955
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2451f3cd9fff0108476a448868d5478995e86c8bac4935e33645d62109b5de
3
+ size 1421662309
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741983451ebd3f767044f9f28f8ad4621e946e22b9dac19ea0612e304300c307
3
+ size 1421807019
run_mlm_flax.py CHANGED
@@ -164,6 +164,10 @@ class ModelArguments:
164
  "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
165
  },
166
  )
 
 
 
 
167
 
168
 
169
  @dataclass
@@ -608,7 +612,7 @@ def main():
608
 
609
  if model_args.model_name_or_path:
610
  model = FlaxAutoModelForMaskedLM.from_pretrained(
611
- model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
612
  )
613
  else:
614
  model = FlaxAutoModelForMaskedLM.from_config(
 
164
  "help": "Floating-point format in which the model weights should be initialized and trained. Choose one of `[float32, float16, bfloat16]`."
165
  },
166
  )
167
+ from_pytorch: bool = field(
168
+ default=False,
169
+ metadata={"help": "Whether to use Pytorch model checkpoint for weight initialization or not."},
170
+ )
171
 
172
 
173
  @dataclass
 
612
 
613
  if model_args.model_name_or_path:
614
  model = FlaxAutoModelForMaskedLM.from_pretrained(
615
+ model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), from_pt=model_args.from_pytorch
616
  )
617
  else:
618
  model = FlaxAutoModelForMaskedLM.from_config(
run_wechsel.py CHANGED
@@ -1,10 +1,10 @@
1
  import torch
2
- from transformers import AutoModel, AutoTokenizer, FlaxAutoModel
3
  from datasets import load_dataset
4
  from wechsel import WECHSEL, load_embeddings
5
 
6
  source_tokenizer = AutoTokenizer.from_pretrained("roberta-large")
7
- model = AutoModel.from_pretrained("roberta-large")
8
 
9
  target_tokenizer = AutoTokenizer.from_pretrained("./")
10
 
@@ -20,9 +20,8 @@ target_embeddings, info = wechsel.apply(
20
  model.get_input_embeddings().weight.detach().numpy(),
21
  )
22
 
23
- model.get_input_embeddings().weight.data = torch.from_numpy(target_embeddings)
24
-
25
  model.save_pretrained("./")
26
 
27
- flax_model = FlaxAutoModel.from_pretrained("./", from_pt=True)
28
- flax_model.save_pretrained("./")
 
1
  import torch
2
+ from transformers import AutoModelForMaskedLM, AutoTokenizer, FlaxAutoModelForMaskedLM
3
  from datasets import load_dataset
4
  from wechsel import WECHSEL, load_embeddings
5
 
6
  source_tokenizer = AutoTokenizer.from_pretrained("roberta-large")
7
+ model = AutoModelForMaskedLM.from_pretrained("roberta-large")
8
 
9
  target_tokenizer = AutoTokenizer.from_pretrained("./")
10
 
 
20
  model.get_input_embeddings().weight.detach().numpy(),
21
  )
22
 
23
+ model.get_input_embeddings().weight.data = torch.from_numpy(target_embeddings).to(torch.float32)
 
24
  model.save_pretrained("./")
25
 
26
+ # flax_model = FlaxAutoModelForMaskedLM.from_pretrained("./", from_pt=True)
27
+ # flax_model.save_pretrained("./")
start_train.sh CHANGED
@@ -5,9 +5,10 @@ export USE_TORCH=0
5
  python3 run_mlm_flax.py \
6
  --output_dir="./" \
7
  --model_name_or_path="./" \
 
8
  --config_name="./" \
9
  --tokenizer_name="./" \
10
- --dataset_filepath="/researchdisk/training_dataset_full" \
11
  --max_seq_length="128" \
12
  --pad_to_max_length \
13
  --preprocessing_num_workers="64" \
@@ -25,5 +26,6 @@ python3 run_mlm_flax.py \
25
  --eval_steps="10000" \
26
  --logging_steps="500" \
27
  --dtype="bfloat16" \
 
28
  --push_to_hub \
29
  --hub_model_id="Finnish-NLP/roberta-large-wechsel-finnish"
 
5
  python3 run_mlm_flax.py \
6
  --output_dir="./" \
7
  --model_name_or_path="./" \
8
+ --from_pytorch \
9
  --config_name="./" \
10
  --tokenizer_name="./" \
11
+ --dataset_filepath="/researchdisk/training_dataset_full_deduplicated" \
12
  --max_seq_length="128" \
13
  --pad_to_max_length \
14
  --preprocessing_num_workers="64" \
 
26
  --eval_steps="10000" \
27
  --logging_steps="500" \
28
  --dtype="bfloat16" \
29
+ --adafactor \
30
  --push_to_hub \
31
  --hub_model_id="Finnish-NLP/roberta-large-wechsel-finnish"