model_name_or_path: gpt2-xl config_name: None tokenizer_name: None use_slow_tokenizer: False per_device_train_batch_size: 1 per_device_eval_batch_size: 1 learning_rate: 5e-06 weight_decay: 0.1 num_train_epochs: 5 patience: 2 max_train_steps: None gradient_accumulation_steps: 1 lr_scheduler_type: SchedulerType.LINEAR num_warmup_steps: 0 output_dir: gpt_xl_squad_decay_0.1 seed: None finetune_type: vanilla beta: 0.1 model_type: None max_seq_length: 600 max_context_length: 600 num_beams: 1 preprocessing_num_workers: 16 overwrite_cache: False no_keep_linebreaks: False push_to_hub: False hub_model_id: None hub_token: None checkpointing_steps: epoch resume_from_checkpoint: None with_tracking: False local_rank: -1