{ | |
"dpo_beta": 0.1, | |
"finetuning_type": "lora", | |
"lora_alpha": 32.0, | |
"lora_dropout": 0.1, | |
"lora_rank": 64, | |
"lora_target": [ | |
"q_proj", | |
"v_proj" | |
], | |
"name_module_trainable": "mlp", | |
"num_hidden_layers": 32, | |
"num_layer_trainable": 3, | |
"ppo_score_norm": false, | |
"resume_lora_training": true | |
} | |