lzy510016411 commited on
Commit
dd27402
·
verified ·
1 Parent(s): 0ac5f40

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -19,10 +19,11 @@ language:
19
 
20
  ### Model Sources [optional]
21
 
22
- 使用qwen1.5 14b作为基础,进行lora训练而成
23
 
24
  训练参数如下:
25
 
 
26
  quantization_bit: 4
27
 
28
  stage: sft
@@ -54,12 +55,11 @@ overwrite_output_dir: true
54
  flash_attn: fa2
55
  per_device_train_batch_size: 2
56
  gradient_accumulation_steps: 8
57
- #之前3e-4学习率疑似有点高了,loss震荡比较厉害
58
  learning_rate: 0.0001
59
  num_train_epochs: 3
60
  weight_decay: 0.01
61
  optim: adamw_torch
62
- #似乎8bit优化器存在问题
63
  lr_scheduler_type: cosine
64
  warmup_steps: 0.01
65
  bf16: true
@@ -69,7 +69,7 @@ val_size: 0.001
69
  per_device_eval_batch_size: 1
70
  evaluation_strategy: steps
71
  eval_steps: 250
72
-
73
 
74
  ## Uses
75
 
 
19
 
20
  ### Model Sources [optional]
21
 
22
+ 使用qwen1.5 14b作为基础,进行lora训练而成,使用的llamafactory框架
23
 
24
  训练参数如下:
25
 
26
+ ```yaml
27
  quantization_bit: 4
28
 
29
  stage: sft
 
55
  flash_attn: fa2
56
  per_device_train_batch_size: 2
57
  gradient_accumulation_steps: 8
 
58
  learning_rate: 0.0001
59
  num_train_epochs: 3
60
  weight_decay: 0.01
61
  optim: adamw_torch
62
+ #8bit优化器似乎存在问题
63
  lr_scheduler_type: cosine
64
  warmup_steps: 0.01
65
  bf16: true
 
69
  per_device_eval_batch_size: 1
70
  evaluation_strategy: steps
71
  eval_steps: 250
72
+ ```
73
 
74
  ## Uses
75