USE_HF=1 \ HF_HUB_ENABLE_HF_TRANSFER=1 \ swift rlhf \ --rlhf_type kto \ --model_type aya-expanse-8b \ --beta 0.1 \ --desirable_weight 1.0 \ --undesirable_weight 1.0 \ --model_revision master \ --sft_type lora \ --tuner_backend peft \ --template_type AUTO \ --dtype AUTO \ --output_dir output \ --dataset Cossale/informal-to-professional-kto \ --train_dataset_sample -1 \ --num_train_epochs 1 \ --max_length 8192 \ --check_dataset_strategy warning \ --lora_rank 32 \ --lora_alpha 64 \ --lora_dropout_p 0.05 \ --lora_target_modules ALL \ --gradient_checkpointing true \ --batch_size 1 \ --weight_decay 0.1 \ --learning_rate 2e-4 \ --use_dora True \ --neftune_noise_alpha 5 \ --gradient_accumulation_steps 4 \ --max_grad_norm 0.5 \ --warmup_ratio 0.03 \ --eval_steps 100 \ --save_steps 100 \ --save_total_limit 2 \ --logging_steps 10 \ --use_flash_attn true [INFO:swift] last_model_checkpoint: /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35 [INFO:swift] best_model_checkpoint: /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35 USE_HF=1 \ swift export \ --model_type aya-expanse-8b \ --ckpt_dir '/root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35' \ --merge_lora true vllm serve /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35-merged --served-model-name aya-expanse-8b-formal