model_name_or_path: gpt2-xl
config_name: None
tokenizer_name: None
use_slow_tokenizer: False
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
learning_rate: 5e-06
weight_decay: 0.1
num_train_epochs: 5
patience: 2
max_train_steps: None
gradient_accumulation_steps: 1
lr_scheduler_type: SchedulerType.LINEAR
num_warmup_steps: 0
output_dir: gpt_xl_squad_decay_0.1
seed: None
finetune_type: vanilla
beta: 0.1
model_type: None
max_seq_length: 600
max_context_length: 600
num_beams: 1
preprocessing_num_workers: 16
overwrite_cache: False
no_keep_linebreaks: False
push_to_hub: False
hub_model_id: None
hub_token: None
checkpointing_steps: epoch
resume_from_checkpoint: None
with_tracking: False
local_rank: -1