add_bos: true checkpointing_steps: epoch clip_grad_norm: -1 config_name: null dataset_config_name: null dataset_name: argilla/ultrafeedback-binarized-preferences-cleaned dpo_beta: 0.1 dpo_use_paged_optimizer: false gradient_accumulation_steps: 4 gradient_checkpointing: true learning_rate: 5.0e-07 logging_steps: 1 lora_alpha: 16 lora_dropout: 0.1 lora_rank: 64 low_cpu_mem_usage: false lr_scheduler_type: linear max_seq_length: 4096 max_train_samples: null max_train_steps: 5712 model_name_or_path: /model model_revision: main num_train_epochs: 3 output_dir: /output overwrite_cache: false per_device_train_batch_size: 1 preprocessing_num_workers: 16 reduce_loss: mean report_to: tensorboard resume_from_checkpoint: null seed: 42 timeout: 1800 tokenizer_name: /model tokenizer_revision: main train_file: null trust_remote_code: false use_8bit_optimizer: false use_flash_attn: true use_lora: false use_qlora: false use_slow_tokenizer: true wandb_entity: null warmup_ratio: 0.1 weight_decay: 0.0 with_tracking: true