|
model_name_or_path: gpt2-xl |
|
config_name: None |
|
tokenizer_name: None |
|
use_slow_tokenizer: False |
|
per_device_train_batch_size: 1 |
|
per_device_eval_batch_size: 1 |
|
learning_rate: 5e-06 |
|
weight_decay: 0.1 |
|
num_train_epochs: 5 |
|
patience: 2 |
|
max_train_steps: None |
|
gradient_accumulation_steps: 1 |
|
lr_scheduler_type: SchedulerType.LINEAR |
|
num_warmup_steps: 0 |
|
output_dir: gpt_xl_squad_decay_0.1 |
|
seed: None |
|
finetune_type: vanilla |
|
beta: 0.1 |
|
model_type: None |
|
max_seq_length: 600 |
|
max_context_length: 600 |
|
num_beams: 1 |
|
preprocessing_num_workers: 16 |
|
overwrite_cache: False |
|
no_keep_linebreaks: False |
|
push_to_hub: False |
|
hub_model_id: None |
|
hub_token: None |
|
checkpointing_steps: epoch |
|
resume_from_checkpoint: None |
|
with_tracking: False |
|
local_rank: -1 |
|
|