dataset_args: path: argilla/dpo-mix-7k format_args: prompt_format: zephyr-gemma model_args: pretrained_model_name_or_path: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1 torch_dtype: bfloat16 wandb_args: entity: argilla-io project: zephyr-gemma-dpo name: 16bit training_args: # DPOTrainer beta: 0.05 loss_type: sigmoid max_length: 1024 max_prompt_length: 512 # Trainer (train) bf16: true do_train: true gradient_accumulation_steps: 8 gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false learning_rate: 5.0e-7 logging_steps: 10 lr_scheduler_type: cosine num_train_epochs: 2 optim: adamw_torch output_dir: data/gemma-7b-it-dpo per_device_train_batch_size: 2 seed: 42 warmup_ratio: 0.1 warmup_steps: 100 report_to: - wandb - tensorboard # Trainer (eval) do_eval: true evaluation_strategy: steps eval_steps: 100 per_device_eval_batch_size: 4 # Trainer (save) hub_model_id: alvarobartt/zephyr-gemma-dpo hub_private_repo: true push_to_hub: true save_strategy: "no" # Quoted, otherwise is casted to `False` save_total_limit: null use_accelerate: true use_unsloth: false