File size: 1,193 Bytes
afbc4ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
checkpointing_steps: '15000'
config_name: null
context_size: 4
context_word_dropout: 0.0
dataset_config_name: iwslt2017-en-fr
dataset_name: gsarti/iwslt2017_context
gradient_accumulation_steps: 2
hub_model_id: context-mt/iwslt17-mbart50-1toM-target-ctx4-cwd0-en-fr
hub_token: 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
ignore_pad_token_for_loss: true
learning_rate: 5.0e-05
logging_steps: '600'
lr_scheduler_type: linear
max_length: 128
max_source_length: 512
max_target_length: 128
max_train_steps: 145520
model_name_or_path: facebook/mbart-large-50-one-to-many-mmt
model_type: null
num_beams: 5
num_train_epochs: 5
num_warmup_steps: 1000
output_dir: /scratch/p305238/iwslt17-mbart50-1toM-target-ctx4-cwd0-en-fr
overwrite_cache: false
pad_to_max_length: true
per_device_eval_batch_size: 8
per_device_train_batch_size: 4
predict_with_generate: true
preprocessing_num_workers: null
push_to_hub: true
report_to: tensorboard
resume_from_checkpoint: null
sample_context: true
seed: null
source_lang: en_XX
target_lang: fr_XX
tokenizer_name: null
train_file: null
use_slow_tokenizer: false
use_target_context: true
val_max_target_length: null
validation_file: null
weight_decay: 0.0
with_tracking: true