dh-mc commited on
Commit
0d2096e
·
1 Parent(s): dc1ac39
llama-factory/config/gemma2_9b_lora_sft-p1.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: shenzhi-wang/Gemma-2-9B-Chinese-Chat
3
+
4
+ ### method
5
+ stage: sft
6
+ do_train: true
7
+ finetuning_type: lora
8
+ lora_target: all
9
+ # quantization_bit: 4 # use 4-bit QLoRA
10
+ loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
+ upcast_layernorm: true
13
+
14
+ ### dataset
15
+ dataset: alpaca_mgtv_p1
16
+ template: gemma2
17
+ cutoff_len: 4096
18
+ max_samples: 25000
19
+ overwrite_cache: true
20
+ preprocessing_num_workers: 16
21
+
22
+ ### output
23
+ output_dir: saves/gemma2-9b/lora/sft_bf16_p1_full
24
+ logging_steps: 100
25
+ save_steps: 2109
26
+ plot_loss: true
27
+ overwrite_output_dir: true
28
+ # resume_from_checkpoint: true
29
+
30
+ ### train
31
+ per_device_train_batch_size: 16
32
+ gradient_accumulation_steps: 8
33
+ learning_rate: 1.0e-4
34
+ num_train_epochs: 3.0
35
+ lr_scheduler_type: cosine
36
+ warmup_ratio: 0.1
37
+ bf16: true
38
+ ddp_timeout: 180000000
39
+
40
+ ### eval
41
+ val_size: 0.1
42
+ per_device_eval_batch_size: 1
43
+ eval_strategy: steps
44
+ eval_steps: 2109
45
+
46
+ report_to: wandb
47
+ run_name: qwen2_72b_4bit_p1_full # optional
scripts/tune-mgtv-gemma2_9b.sh ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ BASEDIR=$(dirname "$0")
4
+ cd $BASEDIR/..
5
+ echo Current Directory:
6
+ pwd
7
+
8
+ BASEDIR=`pwd`
9
+
10
+ nvidia-smi
11
+ uname -a
12
+ cat /etc/os-release
13
+ lscpu
14
+ grep MemTotal /proc/meminfo
15
+
16
+ #pip install -r requirements.txt
17
+ #cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
18
+
19
+ export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
20
+
21
+ export MODEL_NAME=shenzhi-wang/Gemma-2-9B-Chinese-Chat
22
+ export MODEL_PREFIX=gemma2-9b_lora_sft_bf16
23
+
24
+ export CONFIG_FILE=config/$MODEL_PREFIX-p1.yaml
25
+ echo "Tuning with $CONFIG_FILE"
26
+ $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
27
+
28
+ export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1.csv
29
+ export ADAPTER_PATH_BASE=llama-factory/saves/qwen2-72b/lora/sft_4bit_p1_full
30
+ echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
31
+ python llm_toolkit/eval_logical_reasoning_all_epochs.py
32
+
33
+
34
+ export CONFIG_FILE=config/$MODEL_PREFIX-p2.yaml
35
+ echo "Tuning with $CONFIG_FILE"
36
+ $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
37
+
38
+ export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2.csv
39
+ export ADAPTER_PATH_BASE=llama-factory/saves/qwen2-72b/lora/sft_4bit_p2_full
40
+ echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
41
+ python llm_toolkit/eval_logical_reasoning_all_epochs.py