USE_HF=1 \
HF_HUB_ENABLE_HF_TRANSFER=1 \
swift rlhf \
    --rlhf_type kto \
    --model_type aya-expanse-8b \
    --beta 0.1 \
    --desirable_weight 1.0 \
    --undesirable_weight 1.0 \
    --model_revision master \
    --sft_type lora \
    --tuner_backend peft \
    --template_type AUTO \
    --dtype AUTO \
    --output_dir output \
    --dataset Cossale/informal-to-professional-kto \
    --train_dataset_sample -1 \
    --num_train_epochs 1 \
    --max_length 8192 \
    --check_dataset_strategy warning \
    --lora_rank 32 \
    --lora_alpha 64 \
    --lora_dropout_p 0.05 \
    --lora_target_modules ALL \
    --gradient_checkpointing true \
    --batch_size 1 \
    --weight_decay 0.1 \
    --learning_rate 2e-4 \
    --use_dora True \
    --neftune_noise_alpha 5 \
    --gradient_accumulation_steps 4 \
    --max_grad_norm 0.5 \
    --warmup_ratio 0.03 \
    --eval_steps 100 \
    --save_steps 100 \
    --save_total_limit 2 \
    --logging_steps 10 \
    --use_flash_attn true
    
[INFO:swift] last_model_checkpoint: /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35
[INFO:swift] best_model_checkpoint: /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35
    
USE_HF=1 \
swift export \
    --model_type aya-expanse-8b \
    --ckpt_dir '/root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35' \
    --merge_lora true
    
    
vllm serve /root/llm-finetuning-setup/swift/output/aya-expanse-8b/v2-20241024-170858/checkpoint-35-merged --served-model-name aya-expanse-8b-formal