{ "beta": 0.05, "bf16": true, "dataset_eval_split": "validation", "dataset_name": "mnoukhov/summarize_from_feedback_oai_preprocessing_1706381144_relabel_pythia1b", "ddp_find_unused_parameters": false, "eval_steps": 0.2, "evaluation_strategy": "steps", "git": "c5824b0", "gradient_accumulation_steps": 4, "gradient_checkpointing": false, "hub_model_id": "mnoukhov/pythia410m-dpo-tldr", "learning_rate": "3e-5", "logging_steps": 100, "lora_alpha": 32, "lora_r": 16, "lr_scheduler_type": "cosine", "max_length": 640, "max_prompt_length": 512, "max_target_length": 128, "model_name": "mnoukhov/pythia410m-sft-tldr", "name": "newdpo_pythia410m_tldr.yml", "num_train_epochs": 1, "per_device_eval_batch_size": 8, "per_device_train_batch_size": 16, "task_type": "tldr", "use_peft": true }