diff --git a/eng-nah-svo-translation/README.md b/eng-nah-svo-translation/README.md index ce0eb17a7e3f64e535bde10aecf80f6e420ae0d5..595cd1c823e82d0fb941138ae5d1c2db2bcf9fcf 100644 --- a/eng-nah-svo-translation/README.md +++ b/eng-nah-svo-translation/README.md @@ -16,8 +16,9 @@ should probably proofread and complete it, then remove this comment. --> This model was trained from scratch on an unknown dataset. It achieves the following results on the evaluation set: -- Loss: 0.3041 -- Bleu: 0.0 +- Loss: 0.2640 +- Bleu: 0.0231 +- Chrf: 26.4891 ## Model description diff --git a/eng-nah-svo-translation/checkpoint-228/optimizer.pt b/eng-nah-svo-translation/checkpoint-228/optimizer.pt index 367852daa5323185ad585a47eaf94868eb9f51bc..731d033565bbb414bd9c0752c8e7822a64441f5e 100644 --- a/eng-nah-svo-translation/checkpoint-228/optimizer.pt +++ b/eng-nah-svo-translation/checkpoint-228/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5190a17f19275c1af2d846140e4909e9d083e4c4f44fbfb42acad0530b174290 +oid sha256:b8c16511ba92e2355ca80c70207aa4e9e2f7fbdd9342fc1670e5380107108d33 size 597088389 diff --git a/eng-nah-svo-translation/checkpoint-228/pytorch_model.bin b/eng-nah-svo-translation/checkpoint-228/pytorch_model.bin index 6817a00807fe7434691386a71277c0602a3079f3..13aa597fce04805efbaf720d316390e64f4060b4 100644 --- a/eng-nah-svo-translation/checkpoint-228/pytorch_model.bin +++ b/eng-nah-svo-translation/checkpoint-228/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:140fc7491625eee2830a2bfde50a63b78cd6939bed1b4abeb189be50ab10980f +oid sha256:37bcbd2252fb522e3c5ed42fb094b45769438bea8b0c374cc08a19277b5576cc size 298763205 diff --git a/eng-nah-svo-translation/checkpoint-228/trainer_state.json b/eng-nah-svo-translation/checkpoint-228/trainer_state.json index 5b8815d4d0a1ead69943d491ff585e2e38488a38..85c3124b21752a2e98fb61253c068c3d94a6ab51 100644 --- a/eng-nah-svo-translation/checkpoint-228/trainer_state.json +++ b/eng-nah-svo-translation/checkpoint-228/trainer_state.json @@ -12,7 +12,7 @@ "max_steps": 684, "num_train_epochs": 3, "save_steps": 500, - "total_flos": 50445004308480.0, + "total_flos": 52376151785472.0, "trial_name": null, "trial_params": null } diff --git a/eng-nah-svo-translation/checkpoint-228/training_args.bin b/eng-nah-svo-translation/checkpoint-228/training_args.bin index 9994891ab867aee0045265c08dc1422280fe4490..bc1c45f9050b16d948a8f770690c1c16f5b66a30 100644 --- a/eng-nah-svo-translation/checkpoint-228/training_args.bin +++ b/eng-nah-svo-translation/checkpoint-228/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06a45c37f0e7284c095b4b503ffa422c6b8f2791990e008912c3c06d6db7d538 +oid sha256:e056894aace19b2ad0614ec4c70099b961a9aaa0c0c2b1edfbe8e70c015294d2 size 4219 diff --git a/eng-nah-svo-translation/checkpoint-456/optimizer.pt b/eng-nah-svo-translation/checkpoint-456/optimizer.pt index 0a13ad28b840b4765c6190add7bcfe58651153ef..7e7dd0eaf15d89ca16f3a94f75a5661a6827ce95 100644 --- a/eng-nah-svo-translation/checkpoint-456/optimizer.pt +++ b/eng-nah-svo-translation/checkpoint-456/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7de8c09d2c5d4f0096b953c55e83345013e4bcfb30df1f8899d8a579b549de65 +oid sha256:4f2fb78ceb69469a9888669142a52a24a22dfc567e2e35859abb89dcefc5c87f size 597088389 diff --git a/eng-nah-svo-translation/checkpoint-456/pytorch_model.bin b/eng-nah-svo-translation/checkpoint-456/pytorch_model.bin index 7f4475ff8a860ad9d05afc5340714285ace04577..3c35017098b5c3d3e1d43d8f4b4e2373bb0ea919 100644 --- a/eng-nah-svo-translation/checkpoint-456/pytorch_model.bin +++ b/eng-nah-svo-translation/checkpoint-456/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef15720547c6e860442101e376eaa81dc981ef8521399ce751cfaf29fc229213 +oid sha256:46c8cd75372092eb227b14656a6abb4aede43d054867626ed0b59437dba56b84 size 298763205 diff --git a/eng-nah-svo-translation/checkpoint-456/trainer_state.json b/eng-nah-svo-translation/checkpoint-456/trainer_state.json index 1d8cdbc8c30e3bac7473146876de9199049ac3b0..cfb44e6f5d588e13471b5de0d1289d0c5409197d 100644 --- a/eng-nah-svo-translation/checkpoint-456/trainer_state.json +++ b/eng-nah-svo-translation/checkpoint-456/trainer_state.json @@ -12,7 +12,7 @@ "max_steps": 684, "num_train_epochs": 3, "save_steps": 500, - "total_flos": 100891067940864.0, + "total_flos": 104753362894848.0, "trial_name": null, "trial_params": null } diff --git a/eng-nah-svo-translation/checkpoint-456/training_args.bin b/eng-nah-svo-translation/checkpoint-456/training_args.bin index 9994891ab867aee0045265c08dc1422280fe4490..bc1c45f9050b16d948a8f770690c1c16f5b66a30 100644 --- a/eng-nah-svo-translation/checkpoint-456/training_args.bin +++ b/eng-nah-svo-translation/checkpoint-456/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06a45c37f0e7284c095b4b503ffa422c6b8f2791990e008912c3c06d6db7d538 +oid sha256:e056894aace19b2ad0614ec4c70099b961a9aaa0c0c2b1edfbe8e70c015294d2 size 4219 diff --git a/eng-nah-svo-translation/checkpoint-684/optimizer.pt b/eng-nah-svo-translation/checkpoint-684/optimizer.pt index 1841d2af2b5d6fd70b37fcbe0ee34ce7dcd2760b..a97d5ecb97e4cd85f70ba786ddc66aea39423b88 100644 --- a/eng-nah-svo-translation/checkpoint-684/optimizer.pt +++ b/eng-nah-svo-translation/checkpoint-684/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:19578c8bd50d0d5922742f052fdb7391eec55dd2ab281226cdcae8cd78356dd9 +oid sha256:330d85b27bca67820bebe7c7bd87bbfd73edfde6172890ffd2c8298d597a3c57 size 597088389 diff --git a/eng-nah-svo-translation/checkpoint-684/pytorch_model.bin b/eng-nah-svo-translation/checkpoint-684/pytorch_model.bin index 8cfef71fafeaa52d0e82916072a440084dce6704..afcd596c8b8632dcae71b3dc25fdd3e7a2ea3bb4 100644 --- a/eng-nah-svo-translation/checkpoint-684/pytorch_model.bin +++ b/eng-nah-svo-translation/checkpoint-684/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f62408ea2e275aed7548ea31e37c534dd035154b3999483e0ed481c2dd1f9e2 +oid sha256:174c26a830bb38c835122d896e568f06abcf2e72976220ff513f32f769be682a size 298763205 diff --git a/eng-nah-svo-translation/checkpoint-684/trainer_state.json b/eng-nah-svo-translation/checkpoint-684/trainer_state.json index e96724abfb0fd39ea715ff1d174d2912aed6c2dd..74a625dfe2e14489496fc7f4b120721df0419c53 100644 --- a/eng-nah-svo-translation/checkpoint-684/trainer_state.json +++ b/eng-nah-svo-translation/checkpoint-684/trainer_state.json @@ -11,7 +11,7 @@ { "epoch": 2.19, "learning_rate": 5.380116959064328e-06, - "loss": 0.2276, + "loss": 0.1428, "step": 500 } ], @@ -19,7 +19,7 @@ "max_steps": 684, "num_train_epochs": 3, "save_steps": 500, - "total_flos": 151058529386496.0, + "total_flos": 156851971817472.0, "trial_name": null, "trial_params": null } diff --git a/eng-nah-svo-translation/checkpoint-684/training_args.bin b/eng-nah-svo-translation/checkpoint-684/training_args.bin index 9994891ab867aee0045265c08dc1422280fe4490..bc1c45f9050b16d948a8f770690c1c16f5b66a30 100644 --- a/eng-nah-svo-translation/checkpoint-684/training_args.bin +++ b/eng-nah-svo-translation/checkpoint-684/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06a45c37f0e7284c095b4b503ffa422c6b8f2791990e008912c3c06d6db7d538 +oid sha256:e056894aace19b2ad0614ec4c70099b961a9aaa0c0c2b1edfbe8e70c015294d2 size 4219 diff --git a/eng-nah-svo-translation/pytorch_model.bin b/eng-nah-svo-translation/pytorch_model.bin index 8cfef71fafeaa52d0e82916072a440084dce6704..afcd596c8b8632dcae71b3dc25fdd3e7a2ea3bb4 100644 --- a/eng-nah-svo-translation/pytorch_model.bin +++ b/eng-nah-svo-translation/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f62408ea2e275aed7548ea31e37c534dd035154b3999483e0ed481c2dd1f9e2 +oid sha256:174c26a830bb38c835122d896e568f06abcf2e72976220ff513f32f769be682a size 298763205 diff --git a/eng-nah-svo-translation/training_args.bin b/eng-nah-svo-translation/training_args.bin index 9994891ab867aee0045265c08dc1422280fe4490..bc1c45f9050b16d948a8f770690c1c16f5b66a30 100644 --- a/eng-nah-svo-translation/training_args.bin +++ b/eng-nah-svo-translation/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:06a45c37f0e7284c095b4b503ffa422c6b8f2791990e008912c3c06d6db7d538 +oid sha256:e056894aace19b2ad0614ec4c70099b961a9aaa0c0c2b1edfbe8e70c015294d2 size 4219 diff --git a/myerrors_1551.out b/myerrors_1551.out deleted file mode 100644 index ab65a5a7401763e92b51f249db9d4702764cdce0..0000000000000000000000000000000000000000 --- a/myerrors_1551.out +++ /dev/null @@ -1,34 +0,0 @@ -You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. -Using pad_token, but it is not set yet. -Traceback (most recent call last): - File "/mnt/storage/aatherton/hf_synth_trans/synth_translation.py", line 130, in - trainer.evaluate(max_length=max_length) - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate - return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate - output = eval_loop( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3151, in evaluation_loop - for step, inputs in enumerate(dataloader): - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/accelerate/data_loader.py", line 384, in __iter__ - current_batch = next(dataloader_iter) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 633, in __next__ - data = self._next_data() - ^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 677, in _next_data - data = self._dataset_fetcher.fetch(index) # may raise StopIteration - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch - return self.collate_fn(data) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/data/data_collator.py", line 586, in __call__ - features = self.tokenizer.pad( - ^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 3059, in pad - padding_strategy, _, max_length, _ = self._get_padding_truncation_strategies( - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 2507, in _get_padding_truncation_strategies - raise ValueError( -ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`. diff --git a/myerrors_1552.out b/myerrors_1552.out deleted file mode 100644 index f6f885aa01e57e2d5410aa3d681f4a024427cb0c..0000000000000000000000000000000000000000 --- a/myerrors_1552.out +++ /dev/null @@ -1,29 +0,0 @@ - Map: 0%| | 0/7292 [00:00 - trainer.evaluate(max_length=max_length) - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate - return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate - output = eval_loop( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3161, in evaluation_loop - loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 282, in prediction_step - generated_tokens = self.model.generate(**inputs, **gen_kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context - return func(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1402, in generate - self._validate_model_class() - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1197, in _validate_model_class - raise TypeError(exception_message) -TypeError: The current model class (BertModel) is not compatible with `.generate()`, as it doesn't have a language model head. Please use one of the following classes instead: {'BertLMHeadModel'} diff --git a/myerrors_1553.out b/myerrors_1553.out deleted file mode 100644 index f0089959e2078c121c14d02c46798e99bb2f5bc7..0000000000000000000000000000000000000000 --- a/myerrors_1553.out +++ /dev/null @@ -1,25 +0,0 @@ - Map: 0%| | 0/1001 [00:00 - trainer.evaluate(max_length=max_length) - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate - return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate - output = eval_loop( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3161, in evaluation_loop - loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 282, in prediction_step - generated_tokens = self.model.generate(**inputs, **gen_kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context - return func(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1402, in generate - self._validate_model_class() - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1197, in _validate_model_class - raise TypeError(exception_message) -TypeError: The current model class (BertModel) is not compatible with `.generate()`, as it doesn't have a language model head. Please use one of the following classes instead: {'BertLMHeadModel'} diff --git a/myerrors_1554.out b/myerrors_1554.out deleted file mode 100644 index 4c8a4883571c0064f7dd045ef0e867085b2f6fa5..0000000000000000000000000000000000000000 --- a/myerrors_1554.out +++ /dev/null @@ -1,165 +0,0 @@ - Map: 0%| | 0/1001 [00:00 - trainer.evaluate(max_length=max_length) - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate - return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate - output = eval_loop( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3161, in evaluation_loop - loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 282, in prediction_step - generated_tokens = self.model.generate(**inputs, **gen_kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context - return func(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1596, in generate - return self.greedy_search( - ^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 2444, in greedy_search - outputs = self( - ^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 1235, in forward - outputs = self.bert( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 1022, in forward - encoder_outputs = self.encoder( - ^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 612, in forward - layer_outputs = layer_module( - ^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward - self_attention_outputs = self.attention( - ^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward - self_outputs = self.self( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 286, in forward - mixed_query_layer = self.query(hidden_states) - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/linear.py", line 114, in forward - return F.linear(input, self.weight, self.bias) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)` -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [96,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [97,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [98,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [99,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [100,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [101,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [102,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [103,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [104,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [105,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [106,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [107,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [108,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [109,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [110,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [111,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [112,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [113,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [114,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [115,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [116,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [117,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [118,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [119,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [120,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [121,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [122,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [123,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [124,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [125,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [126,0,0] Assertion `srcIndex < srcSelectDimSize` failed. -/opt/conda/conda-bld/pytorch_1682343995622/work/aten/src/ATen/native/cuda/Indexing.cu:1146: indexSelectLargeIndex: block: [161,0,0], thread: [127,0,0] Assertion `srcIndex < srcSelectDimSize` failed. diff --git a/myerrors_1555.out b/myerrors_1555.out deleted file mode 100644 index 85e68c9fc635ed6872b05f9a4dbb9af44f37d515..0000000000000000000000000000000000000000 --- a/myerrors_1555.out +++ /dev/null @@ -1,46 +0,0 @@ -You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. - 0%| | 0/16 [00:00 - model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 482, in from_pretrained - config, kwargs = AutoConfig.from_pretrained( - ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py", line 1007, in from_pretrained - config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/configuration_utils.py", line 620, in get_config_dict - config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/configuration_utils.py", line 675, in _get_config_dict - resolved_config_file = cached_file( - ^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/utils/hub.py", line 479, in cached_file - raise EnvironmentError( -OSError: aatherton2024/eng-nah-svo-cpt does not appear to have a file named config.json. Checkout 'https://huggingface.co/aatherton2024/eng-nah-svo-cpt/main' for available files. diff --git a/myerrors_1556.out b/myerrors_1556.out deleted file mode 100644 index a2a0dbf83f155ceaa5d8b2e02eee98ac9c32aab9..0000000000000000000000000000000000000000 --- a/myerrors_1556.out +++ /dev/null @@ -1,22 +0,0 @@ - Map: 0%| | 0/7292 [00:00 - model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 482, in from_pretrained - config, kwargs = AutoConfig.from_pretrained( - ^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py", line 1007, in from_pretrained - config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/configuration_utils.py", line 620, in get_config_dict - config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/configuration_utils.py", line 675, in _get_config_dict - resolved_config_file = cached_file( - ^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/utils/hub.py", line 399, in cached_file - raise EnvironmentError( -OSError: eng-nah-svo-cpt does not appear to have a file named config.json. Checkout 'https://huggingface.co/eng-nah-svo-cpt/None' for available files. diff --git a/myerrors_1557.out b/myerrors_1557.out deleted file mode 100644 index c521abbc5051a589ac5f4c325089c8f44a320fe2..0000000000000000000000000000000000000000 --- a/myerrors_1557.out +++ /dev/null @@ -1,40 +0,0 @@ -Traceback (most recent call last): - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/huggingface_hub-0.17.1-py3.8.egg/huggingface_hub/utils/_errors.py", line 261, in hf_raise_for_status - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status - raise HTTPError(http_error_msg, response=self) -requests.exceptions.HTTPError: 404 Client Error: Not Found for url: https://huggingface.co/eng-nah-svo-cpt/resolve/main/tokenizer_config.json - -The above exception was the direct cause of the following exception: - -Traceback (most recent call last): - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/utils/hub.py", line 428, in cached_file - resolved_file = hf_hub_download( - ^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/huggingface_hub-0.17.1-py3.8.egg/huggingface_hub/utils/_validators.py", line 118, in _inner_fn - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/huggingface_hub-0.17.1-py3.8.egg/huggingface_hub/file_download.py", line 1344, in hf_hub_download - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/huggingface_hub-0.17.1-py3.8.egg/huggingface_hub/file_download.py", line 1230, in hf_hub_download - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/huggingface_hub-0.17.1-py3.8.egg/huggingface_hub/utils/_validators.py", line 118, in _inner_fn - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/huggingface_hub-0.17.1-py3.8.egg/huggingface_hub/file_download.py", line 1606, in get_hf_file_metadata - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/huggingface_hub-0.17.1-py3.8.egg/huggingface_hub/utils/_errors.py", line 293, in hf_raise_for_status -huggingface_hub.utils._errors.RepositoryNotFoundError: 404 Client Error. (Request ID: Root=1-65078ad3-69f0105f790b8b472eceb3d6;305708a1-7612-4fce-92cd-fb302d6700cd) - -Repository Not Found for url: https://huggingface.co/eng-nah-svo-cpt/resolve/main/tokenizer_config.json. -Please make sure you specified the correct `repo_id` and `repo_type`. -If you are trying to access a private or gated repo, make sure you are authenticated. - -The above exception was the direct cause of the following exception: - -Traceback (most recent call last): - File "/mnt/storage/aatherton/hf_synth_trans/synth_translation.py", line 40, in - tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py", line 677, in from_pretrained - tokenizer_config = get_tokenizer_config(pretrained_model_name_or_path, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py", line 510, in get_tokenizer_config - resolved_config_file = cached_file( - ^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/utils/hub.py", line 449, in cached_file - raise EnvironmentError( -OSError: eng-nah-svo-cpt is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models' -If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=` diff --git a/myerrors_1558.out b/myerrors_1558.out deleted file mode 100644 index ab4f661fb27c4a7ed0f771d2bb1c996028dd1f35..0000000000000000000000000000000000000000 --- a/myerrors_1558.out +++ /dev/null @@ -1,101 +0,0 @@ - Map: 0%| | 0/1001 [00:00 - trainer.evaluate(max_length=max_length) - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 159, in evaluate - return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 2972, in evaluate - output = eval_loop( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer.py", line 3161, in evaluation_loop - loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/trainer_seq2seq.py", line 282, in prediction_step - generated_tokens = self.model.generate(**inputs, **gen_kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context - return func(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 1596, in generate - return self.greedy_search( - ^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/generation/utils.py", line 2444, in greedy_search - outputs = self( - ^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 1235, in forward - outputs = self.bert( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 1022, in forward - encoder_outputs = self.encoder( - ^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 612, in forward - layer_outputs = layer_module( - ^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 497, in forward - self_attention_outputs = self.attention( - ^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 427, in forward - self_outputs = self.self( - ^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 286, in forward - mixed_query_layer = self.query(hidden_states) - ^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/nn/modules/linear.py", line 114, in forward - return F.linear(input, self.weight, self.bias) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)` diff --git a/myerrors_1559.out b/myerrors_1559.out deleted file mode 100644 index ee64d0e7b9593bf5b85acf9fa09650cd78c5d49b..0000000000000000000000000000000000000000 --- a/myerrors_1559.out +++ /dev/null @@ -1,3 +0,0 @@ - Map: 0%| | 0/1001 [00:00 - for batch in tqdm(eval_dataloader): - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/tqdm/std.py", line 1178, in __iter__ - for obj in iterable: - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/accelerate/data_loader.py", line 394, in __iter__ - next_batch = next(dataloader_iter) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 633, in __next__ - data = self._next_data() - ^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 677, in _next_data - data = self._dataset_fetcher.fetch(index) # may raise StopIteration - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch - return self.collate_fn(data) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/data/data_collator.py", line 600, in __call__ - decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels(labels=features["labels"]) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/marian/modeling_marian.py", line 1527, in prepare_decoder_input_ids_from_labels - return shift_tokens_right(labels, self.config.pad_token_id, self.config.decoder_start_token_id) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/marian/modeling_marian.py", line 66, in shift_tokens_right - shifted_input_ids[:, 0] = decoder_start_token_id - ~~~~~~~~~~~~~~~~~^^^^^^ -IndexError: index 0 is out of bounds for dimension 1 with size 0 - 33%|███▎ | 912/2736 [03:46<07:32, 4.03it/s] diff --git a/myerrors_1562.out b/myerrors_1562.out deleted file mode 100644 index 826ae5ca517836b878ef48a25bf7264e06a44b3b..0000000000000000000000000000000000000000 --- a/myerrors_1562.out +++ /dev/null @@ -1,8 +0,0 @@ -You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. - 0%| | 0/16 [00:00 - for batch in tqdm(eval_dataloader): - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/tqdm/std.py", line 1178, in __iter__ - for obj in iterable: - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/accelerate/data_loader.py", line 394, in __iter__ - next_batch = next(dataloader_iter) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 633, in __next__ - data = self._next_data() - ^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 677, in _next_data - data = self._dataset_fetcher.fetch(index) # may raise StopIteration - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch - return self.collate_fn(data) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/data/data_collator.py", line 600, in __call__ - decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels(labels=features["labels"]) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/marian/modeling_marian.py", line 1527, in prepare_decoder_input_ids_from_labels - return shift_tokens_right(labels, self.config.pad_token_id, self.config.decoder_start_token_id) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/marian/modeling_marian.py", line 66, in shift_tokens_right - shifted_input_ids[:, 0] = decoder_start_token_id - ~~~~~~~~~~~~~~~~~^^^^^^ -IndexError: index 0 is out of bounds for dimension 1 with size 0 - 33%|███▎ | 912/2736 [03:45<07:31, 4.04it/s] diff --git a/myerrors_1564.out b/myerrors_1564.out deleted file mode 100644 index 0e1bedd4886c3dc4cf46438aaf215abe70343958..0000000000000000000000000000000000000000 --- a/myerrors_1564.out +++ /dev/null @@ -1,161 +0,0 @@ -You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. - 0%| | 0/16 [00:00 - for batch in tqdm(eval_dataloader): - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/tqdm/std.py", line 1178, in __iter__ - for obj in iterable: - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/accelerate/data_loader.py", line 394, in __iter__ - next_batch = next(dataloader_iter) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 633, in __next__ - data = self._next_data() - ^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 677, in _next_data - data = self._dataset_fetcher.fetch(index) # may raise StopIteration - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch - return self.collate_fn(data) - ^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/data/data_collator.py", line 600, in __call__ - decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels(labels=features["labels"]) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/marian/modeling_marian.py", line 1527, in prepare_decoder_input_ids_from_labels - return shift_tokens_right(labels, self.config.pad_token_id, self.config.decoder_start_token_id) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/marian/modeling_marian.py", line 66, in shift_tokens_right - shifted_input_ids[:, 0] = decoder_start_token_id - ~~~~~~~~~~~~~~~~~^^^^^^ -IndexError: index 0 is out of bounds for dimension 1 with size 0 - 33%|███▎ | 912/2736 [03:44<07:29, 4.06it/s] diff --git a/myerrors_1600.out b/myerrors_1600.out new file mode 100644 index 0000000000000000000000000000000000000000..f379eca7f6f68455bda5040f70e6ff68d7078d37 --- /dev/null +++ b/myerrors_1600.out @@ -0,0 +1,49 @@ +You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. + 0%| | 0/16 [00:00 + translator = pipeline("translation", model=model_checkpoint) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/pipelines/__init__.py", line 724, in pipeline + config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/models/auto/configuration_auto.py", line 1007, in from_pretrained + config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/configuration_utils.py", line 620, in get_config_dict + config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/configuration_utils.py", line 675, in _get_config_dict + resolved_config_file = cached_file( + ^^^^^^^^^^^^ + File "/mnt/storage/aatherton/anaconda3/envs/nmt/lib/python3.11/site-packages/transformers/utils/hub.py", line 479, in cached_file + raise EnvironmentError( +OSError: aatherton2024/hf_synth_trans does not appear to have a file named config.json. Checkout 'https://huggingface.co/aatherton2024/hf_synth_trans/main' for available files. diff --git a/myerrors_1601.out b/myerrors_1601.out new file mode 100644 index 0000000000000000000000000000000000000000..e6b212d430b00355805f9df87ae54b9e85efa500 --- /dev/null +++ b/myerrors_1601.out @@ -0,0 +1,16 @@ +You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding. + 0%| | 0/16 [00:00", + "eos_token": "<|endoftext|>", + "pad_token": "[PAD]", + "unk_token": "<|endoftext|>" +} diff --git a/output/tokenizer.json b/output/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5da92f3a16a9f960b6c8e3e0f9508c7e607ca5 --- /dev/null +++ b/output/tokenizer.json @@ -0,0 +1,317 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "vocab": { + "<|endoftext|>": 0, + "!": 1, + "\"": 2, + "#": 3, + "$": 4, + "%": 5, + "&": 6, + "'": 7, + "(": 8, + ")": 9, + "*": 10, + "+": 11, + ",": 12, + "-": 13, + ".": 14, + "/": 15, + "0": 16, + "1": 17, + "2": 18, + "3": 19, + "4": 20, + "5": 21, + "6": 22, + "7": 23, + "8": 24, + "9": 25, + ":": 26, + ";": 27, + "<": 28, + "=": 29, + ">": 30, + "?": 31, + "@": 32, + "A": 33, + "B": 34, + "C": 35, + "D": 36, + "E": 37, + "F": 38, + "G": 39, + "H": 40, + "I": 41, + "J": 42, + "K": 43, + "L": 44, + "M": 45, + "N": 46, + "O": 47, + "P": 48, + "Q": 49, + "R": 50, + "S": 51, + "T": 52, + "U": 53, + "V": 54, + "W": 55, + "X": 56, + "Y": 57, + "Z": 58, + "[": 59, + "\\": 60, + "]": 61, + "^": 62, + "_": 63, + "`": 64, + "a": 65, + "b": 66, + "c": 67, + "d": 68, + "e": 69, + "f": 70, + "g": 71, + "h": 72, + "i": 73, + "j": 74, + "k": 75, + "l": 76, + "m": 77, + "n": 78, + "o": 79, + "p": 80, + "q": 81, + "r": 82, + "s": 83, + "t": 84, + "u": 85, + "v": 86, + "w": 87, + "x": 88, + "y": 89, + "z": 90, + "{": 91, + "|": 92, + "}": 93, + "~": 94, + "¡": 95, + "¢": 96, + "£": 97, + "¤": 98, + "¥": 99, + "¦": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "¬": 106, + "®": 107, + "¯": 108, + "°": 109, + "±": 110, + "²": 111, + "³": 112, + "´": 113, + "µ": 114, + "¶": 115, + "·": 116, + "¸": 117, + "¹": 118, + "º": 119, + "»": 120, + "¼": 121, + "½": 122, + "¾": 123, + "¿": 124, + "À": 125, + "Á": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "È": 133, + "É": 134, + "Ê": 135, + "Ë": 136, + "Ì": 137, + "Í": 138, + "Î": 139, + "Ï": 140, + "Ð": 141, + "Ñ": 142, + "Ò": 143, + "Ó": 144, + "Ô": 145, + "Õ": 146, + "Ö": 147, + "×": 148, + "Ø": 149, + "Ù": 150, + "Ú": 151, + "Û": 152, + "Ü": 153, + "Ý": 154, + "Þ": 155, + "ß": 156, + "à": 157, + "á": 158, + "â": 159, + "ã": 160, + "ä": 161, + "å": 162, + "æ": 163, + "ç": 164, + "è": 165, + "é": 166, + "ê": 167, + "ë": 168, + "ì": 169, + "í": 170, + "î": 171, + "ï": 172, + "ð": 173, + "ñ": 174, + "ò": 175, + "ó": 176, + "ô": 177, + "õ": 178, + "ö": 179, + "÷": 180, + "ø": 181, + "ù": 182, + "ú": 183, + "û": 184, + "ü": 185, + "ý": 186, + "þ": 187, + "ÿ": 188, + "Ā": 189, + "ā": 190, + "Ă": 191, + "ă": 192, + "Ą": 193, + "ą": 194, + "Ć": 195, + "ć": 196, + "Ĉ": 197, + "ĉ": 198, + "Ċ": 199, + "ċ": 200, + "Č": 201, + "č": 202, + "Ď": 203, + "ď": 204, + "Đ": 205, + "đ": 206, + "Ē": 207, + "ē": 208, + "Ĕ": 209, + "ĕ": 210, + "Ė": 211, + "ė": 212, + "Ę": 213, + "ę": 214, + "Ě": 215, + "ě": 216, + "Ĝ": 217, + "ĝ": 218, + "Ğ": 219, + "ğ": 220, + "Ġ": 221, + "ġ": 222, + "Ģ": 223, + "ģ": 224, + "Ĥ": 225, + "ĥ": 226, + "Ħ": 227, + "ħ": 228, + "Ĩ": 229, + "ĩ": 230, + "Ī": 231, + "ī": 232, + "Ĭ": 233, + "ĭ": 234, + "Į": 235, + "į": 236, + "İ": 237, + "ı": 238, + "IJ": 239, + "ij": 240, + "Ĵ": 241, + "ĵ": 242, + "Ķ": 243, + "ķ": 244, + "ĸ": 245, + "Ĺ": 246, + "ĺ": 247, + "Ļ": 248, + "ļ": 249, + "Ľ": 250, + "ľ": 251, + "Ŀ": 252, + "ŀ": 253, + "Ł": 254, + "ł": 255, + "Ń": 256, + "en": 257, + "fr": 258 + }, + "merges": [ + "e n", + "f r" + ] + } +} \ No newline at end of file diff --git a/output/tokenizer_config.json b/output/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..546efe6d18ae2ad7758d0c9ef51cacdb81c8dc9d --- /dev/null +++ b/output/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "add_prefix_space": false, + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|endoftext|>", + "model_max_length": 1024, + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "<|endoftext|>" +} diff --git a/output/vocab.json b/output/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..9cd5bf34d923be02a0eae563f9c595833f08bef3 --- /dev/null +++ b/output/vocab.json @@ -0,0 +1 @@ +{"<|endoftext|>":0,"!":1,"\"":2,"#":3,"$":4,"%":5,"&":6,"'":7,"(":8,")":9,"*":10,"+":11,",":12,"-":13,".":14,"/":15,"0":16,"1":17,"2":18,"3":19,"4":20,"5":21,"6":22,"7":23,"8":24,"9":25,":":26,";":27,"<":28,"=":29,">":30,"?":31,"@":32,"A":33,"B":34,"C":35,"D":36,"E":37,"F":38,"G":39,"H":40,"I":41,"J":42,"K":43,"L":44,"M":45,"N":46,"O":47,"P":48,"Q":49,"R":50,"S":51,"T":52,"U":53,"V":54,"W":55,"X":56,"Y":57,"Z":58,"[":59,"\\":60,"]":61,"^":62,"_":63,"`":64,"a":65,"b":66,"c":67,"d":68,"e":69,"f":70,"g":71,"h":72,"i":73,"j":74,"k":75,"l":76,"m":77,"n":78,"o":79,"p":80,"q":81,"r":82,"s":83,"t":84,"u":85,"v":86,"w":87,"x":88,"y":89,"z":90,"{":91,"|":92,"}":93,"~":94,"¡":95,"¢":96,"£":97,"¤":98,"¥":99,"¦":100,"§":101,"¨":102,"©":103,"ª":104,"«":105,"¬":106,"®":107,"¯":108,"°":109,"±":110,"²":111,"³":112,"´":113,"µ":114,"¶":115,"·":116,"¸":117,"¹":118,"º":119,"»":120,"¼":121,"½":122,"¾":123,"¿":124,"À":125,"Á":126,"Â":127,"Ã":128,"Ä":129,"Å":130,"Æ":131,"Ç":132,"È":133,"É":134,"Ê":135,"Ë":136,"Ì":137,"Í":138,"Î":139,"Ï":140,"Ð":141,"Ñ":142,"Ò":143,"Ó":144,"Ô":145,"Õ":146,"Ö":147,"×":148,"Ø":149,"Ù":150,"Ú":151,"Û":152,"Ü":153,"Ý":154,"Þ":155,"ß":156,"à":157,"á":158,"â":159,"ã":160,"ä":161,"å":162,"æ":163,"ç":164,"è":165,"é":166,"ê":167,"ë":168,"ì":169,"í":170,"î":171,"ï":172,"ð":173,"ñ":174,"ò":175,"ó":176,"ô":177,"õ":178,"ö":179,"÷":180,"ø":181,"ù":182,"ú":183,"û":184,"ü":185,"ý":186,"þ":187,"ÿ":188,"Ā":189,"ā":190,"Ă":191,"ă":192,"Ą":193,"ą":194,"Ć":195,"ć":196,"Ĉ":197,"ĉ":198,"Ċ":199,"ċ":200,"Č":201,"č":202,"Ď":203,"ď":204,"Đ":205,"đ":206,"Ē":207,"ē":208,"Ĕ":209,"ĕ":210,"Ė":211,"ė":212,"Ę":213,"ę":214,"Ě":215,"ě":216,"Ĝ":217,"ĝ":218,"Ğ":219,"ğ":220,"Ġ":221,"ġ":222,"Ģ":223,"ģ":224,"Ĥ":225,"ĥ":226,"Ħ":227,"ħ":228,"Ĩ":229,"ĩ":230,"Ī":231,"ī":232,"Ĭ":233,"ĭ":234,"Į":235,"į":236,"İ":237,"ı":238,"IJ":239,"ij":240,"Ĵ":241,"ĵ":242,"Ķ":243,"ķ":244,"ĸ":245,"Ĺ":246,"ĺ":247,"Ļ":248,"ļ":249,"Ľ":250,"ľ":251,"Ŀ":252,"ŀ":253,"Ł":254,"ł":255,"Ń":256,"en":257,"fr":258} \ No newline at end of file diff --git a/synth_translation.py b/synth_translation.py index 5d2546ae8bd36ecd7eceb891f376f71731c0e9ab..b1701309eba209b877f1c0f3a59a673464322e1a 100644 --- a/synth_translation.py +++ b/synth_translation.py @@ -1,6 +1,6 @@ import transformers import numpy as np -from datasets import load_dataset +from datasets import load_dataset, DatasetDict from transformers import AutoModelForSeq2SeqLM from transformers import AutoTokenizer from transformers import DataCollatorForSeq2Seq @@ -17,13 +17,24 @@ from huggingface_hub import Repository, get_full_repo_name from tqdm.auto import tqdm import torch from torch import Tensor +import os #load in dataset, setup tokenizer +def addperiod(entry): + entry['en'] += '.' + entry['fr'] += '.' + return entry + raw_datasets = load_dataset("aatherton2024/eng-nah-svo") +train_ds = raw_datasets['train'].map(addperiod) +validation_ds = raw_datasets['validation'].map(addperiod) +test_ds = raw_datasets['test'].map(addperiod) + +raw_datasets = DatasetDict({"train" : train_ds, "validation" : validation_ds, "test" : test_ds}) model_checkpoint = "eng-nah-svo-cpt" -if True: +if False: #data processing only needs to run once def get_training_corpus(raw_datasets): return ( raw_datasets["train"][i : i + 1000] @@ -37,10 +48,9 @@ if True: tokenizer.save_pretrained("eng-nah-svo-cpt") tokenizer.push_to_hub("eng-nah-svo-cpt") +max_length = 128 tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) tokenizer.add_special_tokens({'pad_token': '[PAD]'}) -#contants -max_length = 128 #scan dataset, storing lists of english and french words then returning the tokenization of them def preprocess_function(examples): @@ -55,26 +65,27 @@ def preprocess_function(examples): tokenized_datasets = raw_datasets.map( preprocess_function, batched=True, - remove_columns=raw_datasets["train"].column_names, + remove_columns=raw_datasets["train"].column_names ) # #model choice for this problem -if False: - model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint) +if False: #load pretrained model + model = AutoModelForSeq2SeqLM.from_pretrained("eng-nah-svo-translation") else: from transformers import BertConfig, BertLMHeadModel from transformers import AutoModel model = AutoModelForSeq2SeqLM.from_pretrained("eng-nah-svo-translation") - print(model) - #model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-fr") + + #data collator takes tokenizer and the model to deal with padding for dynamic batching data_collator = DataCollatorForSeq2Seq(tokenizer, model=model) #Using BLEU as our metric for this problem -metric = evaluate.load("sacrebleu") +metric_bleu = evaluate.load("sacrebleu") +metric_chrf = evaluate.load("chrf") #simple method to return test metrics def compute_metrics(eval_preds): @@ -93,8 +104,9 @@ def compute_metrics(eval_preds): decoded_preds = [pred.strip() for pred in decoded_preds] decoded_labels = [[label.strip()] for label in decoded_labels] - result = metric.compute(predictions=decoded_preds, references=decoded_labels) - return {"bleu": result["score"]} + result_bleu = metric_bleu.compute(predictions=decoded_preds, references=decoded_labels) + result_chrf = metric_chrf.compute(predictions=decoded_preds, references=decoded_labels) + return {"bleu": result_bleu["score"], "chrf": result_chrf["score"]} ### We now enter the fine-tuning phase of our model structure ### @@ -133,7 +145,8 @@ trainer.train() print("evaluate 2") print(trainer.evaluate(max_length=max_length)) trainer.push_to_hub(tags="translation", commit_message="Training complete") - +print("training model now") +model.train() tokenized_datasets.set_format("torch") @@ -144,7 +157,7 @@ train_dataloader = DataLoader( batch_size=8, ) eval_dataloader = DataLoader( - tokenized_datasets["test"], collate_fn=data_collator, batch_size=8 + tokenized_datasets["test"], collate_fn=data_collator, batch_size=8, drop_last=True ) model = AutoModelForSeq2SeqLM.from_pretrained("eng-nah-svo-translation") @@ -197,63 +210,63 @@ def postprocess(predictions, labels): -progress_bar = tqdm(range(num_training_steps)) - -for epoch in range(num_train_epochs): - # Training - model.train() - for batch in train_dataloader: - outputs = model(**batch) - loss = outputs.loss - accelerator.backward(loss) - - optimizer.step() - lr_scheduler.step() - optimizer.zero_grad() - progress_bar.update(1) - - # Evaluation - model.eval() - for batch in tqdm(eval_dataloader): - with torch.no_grad(): - generated_tokens = accelerator.unwrap_model(model).generate( - batch["input_ids"], - attention_mask=batch["attention_mask"], - max_length=128, - ) - labels = batch["labels"] - - # Necessary to pad predictions and labels for being gathered - generated_tokens = accelerator.pad_across_processes( - generated_tokens, dim=1, pad_index=tokenizer.pad_token_id - ) - labels = accelerator.pad_across_processes(labels, dim=1, pad_index=-100) +# progress_bar = tqdm(range(num_training_steps)) - predictions_gathered = accelerator.gather(generated_tokens) - labels_gathered = accelerator.gather(labels) +# for epoch in range(num_train_epochs): +# # Training +# model.train() +# for batch in train_dataloader: +# outputs = model(**batch) +# loss = outputs.loss +# accelerator.backward(loss) - decoded_preds, decoded_labels = postprocess(predictions_gathered, labels_gathered) - metric.add_batch(predictions=decoded_preds, references=decoded_labels) +# optimizer.step() +# lr_scheduler.step() +# optimizer.zero_grad() +# progress_bar.update(1) - results = metric.compute() - print(f"epoch {epoch}, BLEU score: {results['score']:.2f}") +# # Evaluation +# model.eval() +# for batch in tqdm(eval_dataloader): +# with torch.no_grad(): +# generated_tokens = accelerator.unwrap_model(model).generate( +# batch["input_ids"], +# attention_mask=batch["attention_mask"], +# max_length=128, +# ) +# labels = batch["labels"] - # Save and upload - accelerator.wait_for_everyone() - unwrapped_model = accelerator.unwrap_model(model) - unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save) - if accelerator.is_main_process: - tokenizer.save_pretrained(output_dir) - repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False - ) +# # Necessary to pad predictions and labels for being gathered +# generated_tokens = accelerator.pad_across_processes( +# generated_tokens, dim=1, pad_index=tokenizer.pad_token_id +# ) +# labels = accelerator.pad_across_processes(labels, dim=1, pad_index=-100) + +# predictions_gathered = accelerator.gather(generated_tokens) +# labels_gathered = accelerator.gather(labels) + +# decoded_preds, decoded_labels = postprocess(predictions_gathered, labels_gathered) +# metric_bleu.add_batch(predictions=decoded_preds, references=decoded_labels) + +# results = metric_bleu.compute() +# print(f"epoch {epoch}, BLEU score: {results['score']:.2f}") + +# # Save and upload +# accelerator.wait_for_everyone() +# unwrapped_model = accelerator.unwrap_model(model) +# unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save) +# if accelerator.is_main_process: +# tokenizer.save_pretrained(output_dir) +# repo.push_to_hub( +# commit_message=f"Training in progress epoch {epoch}", blocking=False +# ) # Replace this with your own checkpoint -model_checkpoint = "aatherton2024/hf_eng_fra_reproduction" +model_checkpoint = "aatherton2024/eng-nah-svo-translation" translator = pipeline("translation", model=model_checkpoint) translator("Default to expanded threads") -translator( - "Unable to import %1 using the OFX importer plugin. This file is not the correct format." -) \ No newline at end of file +print(translator( + "you did not frichopize him" +)) \ No newline at end of file