USE_HF=1 \ | |
HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
swift rlhf \ | |
--rlhf_type kto \ | |
--model_type aya-expanse-8b \ | |
--beta 0.1 \ | |
--desirable_weight 1.0 \ | |
--undesirable_weight 1.0 \ | |
--model_revision master \ | |
--sft_type lora \ | |
--tuner_backend peft \ | |
--template_type AUTO \ | |
--dtype AUTO \ | |
--output_dir output \ | |
--dataset Cossale/informal-to-professional-kto \ | |
--train_dataset_sample -1 \ | |
--num_train_epochs 1 \ | |
--max_length 8192 \ | |
--check_dataset_strategy warning \ | |
--lora_rank 32 \ | |
--lora_alpha 64 \ | |
--lora_dropout_p 0.05 \ | |
--lora_target_modules ALL \ | |
--gradient_checkpointing true \ | |
--batch_size 1 \ | |
--weight_decay 0.1 \ | |
--learning_rate 2e-4 \ | |
--use_dora True \ | |
--neftune_noise_alpha 5 \ | |
--gradient_accumulation_steps 4 \ | |
--max_grad_norm 0.5 \ | |
--warmup_ratio 0.03 \ | |
--eval_steps 100 \ | |
--save_steps 100 \ | |
--save_total_limit 2 \ | |
--logging_steps 10 \ | |
--use_flash_attn true | |
[INFO:swift] last_model_checkpoint: /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35 | |
[INFO:swift] best_model_checkpoint: /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35 | |
USE_HF=1 \ | |
swift export \ | |
--model_type aya-expanse-8b \ | |
--ckpt_dir '/root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35' \ | |
--merge_lora true | |
vllm serve /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35-merged --served-model-name aya-expanse-8b-formal | |