dh-mc commited on
Commit
fcc0071
1 Parent(s): 93c36fb

ready for llama3 r4

Browse files
competition/00d_Llama3_Results.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/11d_Llama-3_8b_p1_r3_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/11d_Llama-3_8b_p2_r3_analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
llama-factory/config/llama3-8b_lora_sft_bf16-p1_r4.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: shenzhi-wang/Llama3-8B-Chinese-Chat
3
+ #model_name_or_path: FlagAlpha/Llama3-Chinese-8B-Instruct
4
+
5
+ ### method
6
+ stage: sft
7
+ do_train: true
8
+ finetuning_type: lora
9
+ lora_target: all
10
+ # quantization_bit: 4 # use 4-bit QLoRA
11
+ loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
12
+ # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
13
+ upcast_layernorm: true
14
+
15
+ ### dataset
16
+ dataset: alpaca_mgtv_p1
17
+ template: llama3
18
+ cutoff_len: 4096
19
+ max_samples: 25000
20
+ overwrite_cache: true
21
+ preprocessing_num_workers: 16
22
+
23
+ ### output
24
+ output_dir: saves/llama3-8b/lora/sft_bf16_p1_full_r4
25
+ logging_steps: 10
26
+ save_steps: 35
27
+ plot_loss: true
28
+ # overwrite_output_dir: true
29
+
30
+ ### train
31
+ per_device_train_batch_size: 16
32
+ gradient_accumulation_steps: 8
33
+ learning_rate: 1.0e-4
34
+ num_train_epochs: 2.0
35
+ lr_scheduler_type: cosine
36
+ warmup_ratio: 0.1
37
+ bf16: true
38
+ ddp_timeout: 180000000
39
+
40
+ ### eval
41
+ val_size: 0.1
42
+ per_device_eval_batch_size: 1
43
+ eval_strategy: steps
44
+ eval_steps: 35
45
+
46
+ report_to: wandb
47
+ run_name: llama3_8b_p1_full_r4 # optional
results/mgtv-llama3_p2_r3_full_metrics.csv CHANGED
@@ -2,3 +2,6 @@ epoch,model,accuracy,precision,recall,f1
2
  0.0,hfl/llama-3-chinese-8b-instruct-v3_torch.bfloat16_lf,0.25066666666666665,0.6852419041932336,0.25066666666666665,0.32636449818329016
3
  0.2,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-35_torch.bfloat16_lf,0.7283333333333334,0.7722393813259697,0.7283333333333334,0.7426450360790026
4
  0.4,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-70_torch.bfloat16_lf,0.741,0.7868300593752113,0.741,0.7514058688729928
 
 
 
 
2
  0.0,hfl/llama-3-chinese-8b-instruct-v3_torch.bfloat16_lf,0.25066666666666665,0.6852419041932336,0.25066666666666665,0.32636449818329016
3
  0.2,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-35_torch.bfloat16_lf,0.7283333333333334,0.7722393813259697,0.7283333333333334,0.7426450360790026
4
  0.4,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-70_torch.bfloat16_lf,0.741,0.7868300593752113,0.741,0.7514058688729928
5
+ 0.6,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-105_torch.bfloat16_lf,0.6223333333333333,0.7771706776754249,0.6223333333333333,0.6762790454549326
6
+ 0.8,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-140_torch.bfloat16_lf,0.7,0.7767966010489314,0.7,0.7298480873851099
7
+ 1.0,hfl/llama-3-chinese-8b-instruct-v3/checkpoint-175_torch.bfloat16_lf,0.697,0.78712001874989,0.697,0.7309586130328194
scripts/eval-mgtv-llama3_8b.sh CHANGED
@@ -21,19 +21,19 @@ export RESIZE_TOKEN_EMBEDDINGS=true
21
  export START_EPOCH=0
22
  export USING_LLAMA_FACTORY=true
23
 
24
- # export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
25
- export MODEL_NAME=hfl/llama-3-chinese-8b-instruct-v3
26
 
27
  export MODEL_PREFIX=llama3-8b_lora_sft_bf16
28
 
29
- export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1_r3.csv
30
- export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_r3
31
  export USING_P1_PROMPT_TEMPLATE=true
32
  echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
33
  python llm_toolkit/eval_logical_reasoning_all_epochs.py
34
 
35
- export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2_r3.csv
36
- export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p2_full_r3
37
- export USING_P1_PROMPT_TEMPLATE=false
38
- echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
39
- python llm_toolkit/eval_logical_reasoning_all_epochs.py
 
21
  export START_EPOCH=0
22
  export USING_LLAMA_FACTORY=true
23
 
24
+ export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
25
+ # export MODEL_NAME=hfl/llama-3-chinese-8b-instruct-v3
26
 
27
  export MODEL_PREFIX=llama3-8b_lora_sft_bf16
28
 
29
+ export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1_r4.csv
30
+ export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_r4
31
  export USING_P1_PROMPT_TEMPLATE=true
32
  echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
33
  python llm_toolkit/eval_logical_reasoning_all_epochs.py
34
 
35
+ # export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2_r4.csv
36
+ # export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p2_full_r4
37
+ # export USING_P1_PROMPT_TEMPLATE=false
38
+ # echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
39
+ # python llm_toolkit/eval_logical_reasoning_all_epochs.py
scripts/tune-mgtv-llama3_8b.sh CHANGED
@@ -20,19 +20,19 @@ grep MemTotal /proc/meminfo
20
 
21
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
22
 
23
- #export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
24
- export MODEL_NAME=hfl/llama-3-chinese-8b-instruct-v3
25
 
26
  export MODEL_PREFIX=llama3-8b_lora_sft_bf16
27
 
28
- export CONFIG_FILE=config/$MODEL_PREFIX-p1_r3.yaml
29
  echo "Tuning $MODEL_NAME with $CONFIG_FILE"
30
  $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
31
 
32
 
33
- export CONFIG_FILE=config/$MODEL_PREFIX-p2_r3.yaml
34
- echo "Tuning $MODEL_NAME with $CONFIG_FILE"
35
- $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
36
 
37
 
38
  $BASEDIR/scripts/eval-mgtv-llama3_8b.sh
 
20
 
21
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
22
 
23
+ export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
24
+ # export MODEL_NAME=hfl/llama-3-chinese-8b-instruct-v3
25
 
26
  export MODEL_PREFIX=llama3-8b_lora_sft_bf16
27
 
28
+ export CONFIG_FILE=config/$MODEL_PREFIX-p1_r4.yaml
29
  echo "Tuning $MODEL_NAME with $CONFIG_FILE"
30
  $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
31
 
32
 
33
+ # export CONFIG_FILE=config/$MODEL_PREFIX-p2_r4.yaml
34
+ # echo "Tuning $MODEL_NAME with $CONFIG_FILE"
35
+ # $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
36
 
37
 
38
  $BASEDIR/scripts/eval-mgtv-llama3_8b.sh
scripts/tune-mgtv.sh CHANGED
@@ -1 +1 @@
1
- tune-mgtv-internlm.sh
 
1
+ tune-mgtv-llama3_8b.sh