HaotianHu commited on
Commit
6dcb228
·
1 Parent(s): ae296ad

Update qwen2_7b_lora_sft_4bit.yaml

Browse files
llama-factory/config/qwen2_7b_lora_sft_4bit.yaml CHANGED
@@ -1,5 +1,5 @@
1
  ### model
2
- model_name_or_path: Qwen/Qwen2-7B-Instruct
3
 
4
  ### method
5
  stage: sft
@@ -8,39 +8,39 @@ finetuning_type: lora
8
  lora_target: all
9
  quantization_bit: 4 # use 4-bit QLoRA
10
  loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
- # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
 
13
  ### dataset
14
- dataset: alpaca_mac
15
- template: chatml
16
- cutoff_len: 1024
17
- max_samples: 50
18
  overwrite_cache: true
19
  preprocessing_num_workers: 16
20
 
21
  ### output
22
- output_dir: /content/qwen2-7b/
23
- logging_steps: 10
24
- save_steps: 10
25
  plot_loss: true
26
  overwrite_output_dir: true
27
  # resume_from_checkpoint: true
28
 
29
  ### train
30
- per_device_train_batch_size: 1
31
  gradient_accumulation_steps: 8
32
  learning_rate: 1.0e-4
33
- num_train_epochs: 6.0
34
  lr_scheduler_type: cosine
35
  warmup_ratio: 0.1
36
  bf16: true
37
  ddp_timeout: 180000000
38
 
39
  ### eval
40
- val_size: 0.02
41
  per_device_eval_batch_size: 1
42
  eval_strategy: steps
43
- eval_steps: 10
44
 
45
  report_to: wandb
46
- run_name: qwen2_7b_mac_colab # optional
 
1
  ### model
2
+ model_name_or_path: Qwen/Qwen2-7B
3
 
4
  ### method
5
  stage: sft
 
8
  lora_target: all
9
  quantization_bit: 4 # use 4-bit QLoRA
10
  loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ upcast_layernorm: true
12
 
13
  ### dataset
14
+ dataset: mgtv_train
15
+ template: qwen
16
+ cutoff_len: 4096
17
+ max_samples: 25000
18
  overwrite_cache: true
19
  preprocessing_num_workers: 16
20
 
21
  ### output
22
+ output_dir: saves/qwen2_7b/lora/sft_4bit
23
+ logging_steps: 562
24
+ save_steps: 562
25
  plot_loss: true
26
  overwrite_output_dir: true
27
  # resume_from_checkpoint: true
28
 
29
  ### train
30
+ per_device_train_batch_size: 32
31
  gradient_accumulation_steps: 8
32
  learning_rate: 1.0e-4
33
+ num_train_epochs: 3.0
34
  lr_scheduler_type: cosine
35
  warmup_ratio: 0.1
36
  bf16: true
37
  ddp_timeout: 180000000
38
 
39
  ### eval
40
+ val_size: 0.1
41
  per_device_eval_batch_size: 1
42
  eval_strategy: steps
43
+ eval_steps: 562
44
 
45
  report_to: wandb
46
+ run_name: qwen2_7b # optional