yhyu13 commited on
Commit
94197a0
·
1 Parent(s): edc5215
Files changed (2) hide show
  1. README.md +2 -0
  2. scripts/local_ft_phi2_fn.sh +111 -0
README.md CHANGED
@@ -23,6 +23,8 @@ This model is a fine-tuned version of [cognitivecomputations/dolphin-2_6-phi-2](
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.3524
25
 
 
 
26
  ## Model description
27
 
28
  More information needed
 
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.3524
25
 
26
+ Training script is availbale at ./scripts/local_ft_phi2_fn.sh
27
+
28
  ## Model description
29
 
30
  More information needed
scripts/local_ft_phi2_fn.sh ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ eval "$(conda shell.bash hook)"
4
+ conda activate llama_factory
5
+
6
+ MODEL_NAME=dolphin-2_6-phi-2
7
+ STAGE=sft
8
+ EPOCH=1 #3.0
9
+ DATA=glaive-function-calling-v2
10
+
11
+ FT_TYPE=lora
12
+ LoRA_TARGET=Wqkv #q_proj,v_proj
13
+ TEMPLATE=default
14
+ PREDICTION_SAMPLES=20
15
+
16
+ MODEL_PATH=./models/$MODEL_NAME
17
+ if [ ! -d $MODEL_PATH ]; then
18
+ echo "Model not found: $MODEL_PATH"
19
+ return 1
20
+ fi
21
+
22
+ SAVE_PATH=./models/$STAGE/$MODEL_NAME-$STAGE-$DATA-ep$EPOCH-$FT_TYPE
23
+ if [ ! -d $SAVE_PATH ]; then
24
+ mkdir -p $SAVE_PATH
25
+ fi
26
+
27
+ DO_TRAIN=false
28
+ DO_PREDICT=false
29
+ DO_EXPORT=false
30
+
31
+ for arg in "$@"
32
+ do
33
+ if [[ "$arg" == "--train" ]]; then
34
+ echo "The '--train' argument is present in an argument: $arg"
35
+ DO_TRAIN=true
36
+ fi
37
+ if [[ "$arg" == "--pred" ]]; then
38
+ echo "The '--pred' argument is present in an argument: $arg"
39
+ DO_PREDICT=true
40
+ fi
41
+ if [[ "$arg" == "--exp" ]]; then
42
+ echo "The '--exp' argument is present in an argument: $arg"
43
+ DO_EXPORT=true
44
+ fi
45
+ done
46
+
47
+ if [ $DO_TRAIN == true ]; then
48
+ accelerate launch src/train_bash.py \
49
+ --seed 42 \
50
+ --stage $STAGE \
51
+ --model_name_or_path $MODEL_PATH \
52
+ --dataset $DATA \
53
+ --val_size .1 \
54
+ --template $TEMPLATE \
55
+ --finetuning_type $FT_TYPE \
56
+ --do_train \
57
+ --lora_target $LoRA_TARGET \
58
+ --output_dir $SAVE_PATH \
59
+ --overwrite_output_dir \
60
+ --overwrite_cache \
61
+ --per_device_train_batch_size 1 \
62
+ --gradient_accumulation_steps 4 \
63
+ --lr_scheduler_type cosine \
64
+ --logging_steps 10 \
65
+ --save_steps 1000 \
66
+ --learning_rate 5e-5 \
67
+ --num_train_epochs $EPOCH \
68
+ --do_eval \
69
+ --evaluation_strategy epoch \
70
+ --per_device_eval_batch_size 1 \
71
+ --prediction_loss_only \
72
+ --plot_loss \
73
+ --quantization_bit 4 \
74
+ --report_to tensorboard \
75
+ |& tee $SAVE_PATH/train_eval_log.txt
76
+ fi
77
+
78
+ if [ $DO_PREDICT == true ]; then
79
+ SAVE_PATH_PREDICT=$SAVE_PATH/Predict_$PREDICTION_SAMPLES
80
+ if [ ! -d $SAVE_PATH_PREDICT ]; then
81
+ mkdir -p $SAVE_PATH_PREDICT
82
+ fi
83
+ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
84
+ --stage $STAGE \
85
+ --model_name_or_path $MODEL_PATH \
86
+ --do_predict \
87
+ --max_samples $PREDICTION_SAMPLES \
88
+ --predict_with_generate \
89
+ --dataset $DATA \
90
+ --template $TEMPLATE \
91
+ --finetuning_type $FT_TYPE \
92
+ --adapter_name_or_path $SAVE_PATH \
93
+ --output_dir $SAVE_PATH_PREDICT \
94
+ --per_device_eval_batch_size 1 \
95
+ |& tee $SAVE_PATH_PREDICT/predict_log.txt
96
+ fi
97
+
98
+ if [ $DO_EXPORT == true ]; then
99
+ EXPORT_PATH=./models/export/$MODEL_NAME-$STAGE-$DATA-ep$EPOCH
100
+ if [ ! -d $EXPORT_PATH ]; then
101
+ mkdir -p $EXPORT_PATH
102
+ fi
103
+ CUDA_VISIBLE_DEVICES=0 python src/export_model.py \
104
+ --model_name_or_path $MODEL_PATH \
105
+ --adapter_name_or_path $SAVE_PATH \
106
+ --template $TEMPLATE \
107
+ --finetuning_type $FT_TYPE \
108
+ --export_dir $EXPORT_PATH \
109
+ --export_size 5 \
110
+ |& tee $EXPORT_PATH/export_log.txt
111
+ fi