inflaton commited on
Commit
46c075a
·
1 Parent(s): af1cdd3

llama3 r4 p2 results

Browse files
.gitattributes CHANGED
@@ -98,3 +98,4 @@ results/mgtv-results_internlm_nv4090.csv filter=lfs diff=lfs merge=lfs -text
98
  results/glm-4-9b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
99
  results/llama3-8b_lora_sft_bf16-p1_en.csv filter=lfs diff=lfs merge=lfs -text
100
  results/mgtv-results_internlm_best.csv filter=lfs diff=lfs merge=lfs -text
 
 
98
  results/glm-4-9b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
99
  results/llama3-8b_lora_sft_bf16-p1_en.csv filter=lfs diff=lfs merge=lfs -text
100
  results/mgtv-results_internlm_best.csv filter=lfs diff=lfs merge=lfs -text
101
+ results/test_b-results_r4.csv filter=lfs diff=lfs merge=lfs -text
llama-factory/config/llama3-8b_lora_sft_bf16-p1_en.yaml CHANGED
@@ -1,5 +1,6 @@
1
  ### model
2
- model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 
3
 
4
  ### method
5
  stage: sft
@@ -14,15 +15,15 @@ upcast_layernorm: true
14
  ### dataset
15
  dataset: alpaca_mgtv_p1_en
16
  template: llama3
17
- cutoff_len: 4096
18
  max_samples: 25000
19
  overwrite_cache: true
20
  preprocessing_num_workers: 16
21
 
22
  ### output
23
- output_dir: saves/llama3-8b/lora/sft_bf16_p1_full_en
24
  logging_steps: 10
25
- save_steps: 117
26
  plot_loss: true
27
  # overwrite_output_dir: true
28
 
@@ -40,7 +41,7 @@ ddp_timeout: 180000000
40
  val_size: 0.1
41
  per_device_eval_batch_size: 1
42
  eval_strategy: steps
43
- eval_steps: 175
44
 
45
  report_to: wandb
46
- run_name: llama3_8b_p1_en # optional
 
1
  ### model
2
+ #model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
3
+ model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
4
 
5
  ### method
6
  stage: sft
 
15
  ### dataset
16
  dataset: alpaca_mgtv_p1_en
17
  template: llama3
18
+ cutoff_len: 8192
19
  max_samples: 25000
20
  overwrite_cache: true
21
  preprocessing_num_workers: 16
22
 
23
  ### output
24
+ output_dir: saves/llama3-8b/lora/sft_bf16_p1_full_en_r2
25
  logging_steps: 10
26
+ save_steps: 35
27
  plot_loss: true
28
  # overwrite_output_dir: true
29
 
 
41
  val_size: 0.1
42
  per_device_eval_batch_size: 1
43
  eval_strategy: steps
44
+ eval_steps: 35
45
 
46
  report_to: wandb
47
+ run_name: llama3_8b_p1_en_r2 # optional
llama-factory/config/llama3-8b_lora_sft_bf16-p2_en.yaml CHANGED
@@ -1,5 +1,6 @@
1
  ### model
2
- model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 
3
 
4
  ### method
5
  stage: sft
@@ -14,15 +15,15 @@ upcast_layernorm: true
14
  ### dataset
15
  dataset: alpaca_mgtv_p2_en
16
  template: llama3
17
- cutoff_len: 4096
18
  max_samples: 25000
19
  overwrite_cache: true
20
  preprocessing_num_workers: 16
21
 
22
  ### output
23
- output_dir: saves/llama3-8b/lora/sft_bf16_p2_full_en
24
  logging_steps: 10
25
- save_steps: 117
26
  plot_loss: true
27
  # overwrite_output_dir: true
28
 
@@ -40,7 +41,7 @@ ddp_timeout: 180000000
40
  val_size: 0.1
41
  per_device_eval_batch_size: 1
42
  eval_strategy: steps
43
- eval_steps: 175
44
 
45
  report_to: wandb
46
- run_name: llama3_8b_p2_en # optional
 
1
  ### model
2
+ #model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
3
+ model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
4
 
5
  ### method
6
  stage: sft
 
15
  ### dataset
16
  dataset: alpaca_mgtv_p2_en
17
  template: llama3
18
+ cutoff_len: 8192
19
  max_samples: 25000
20
  overwrite_cache: true
21
  preprocessing_num_workers: 16
22
 
23
  ### output
24
+ output_dir: saves/llama3-8b/lora/sft_bf16_p2_full_en_r2
25
  logging_steps: 10
26
+ save_steps: 35
27
  plot_loss: true
28
  # overwrite_output_dir: true
29
 
 
41
  val_size: 0.1
42
  per_device_eval_batch_size: 1
43
  eval_strategy: steps
44
+ eval_steps: 35
45
 
46
  report_to: wandb
47
+ run_name: llama3_8b_p2_en_r2 # optional
results/test_b-results_r4.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8caa5c7f153782389b90356cd6a59bf1b5ba976d993df896b2d0bfe666d8b64
3
+ size 23128244
scripts/eval-mgtv-llama3_8b_en.sh CHANGED
@@ -24,17 +24,17 @@ export USING_LLAMA_FACTORY=true
24
  export USE_ENGLISH_DATASETS=true
25
  export USE_BF16_FOR_INFERENCE=true
26
 
27
- export MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
28
  export MODEL_PREFIX=llama3-8b_lora_sft_bf16
29
 
30
- export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1_en.csv
31
- #export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_en
32
  export USING_P1_PROMPT_TEMPLATE=true
33
  echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
34
  python llm_toolkit/eval_logical_reasoning_all_epochs.py
35
 
36
- export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2_en.csv
37
- #export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p2_full_en
38
  export USING_P1_PROMPT_TEMPLATE=false
39
  echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
40
  python llm_toolkit/eval_logical_reasoning_all_epochs.py
 
24
  export USE_ENGLISH_DATASETS=true
25
  export USE_BF16_FOR_INFERENCE=true
26
 
27
+ export MODEL_NAME=meta-llama/Meta-Llama-3.1-8B-Instruct
28
  export MODEL_PREFIX=llama3-8b_lora_sft_bf16
29
 
30
+ export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1_en_r2.csv
31
+ export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_en_r2
32
  export USING_P1_PROMPT_TEMPLATE=true
33
  echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
34
  python llm_toolkit/eval_logical_reasoning_all_epochs.py
35
 
36
+ export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2_en_r2.csv
37
+ export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p2_full_en_r2
38
  export USING_P1_PROMPT_TEMPLATE=false
39
  echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
40
  python llm_toolkit/eval_logical_reasoning_all_epochs.py
scripts/test-mgtv.sh CHANGED
@@ -13,16 +13,22 @@ cat /etc/os-release
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
 
 
16
  export LOAD_IN_4BIT=false
17
  export USING_LLAMA_FACTORY=true
18
 
19
- export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
20
  # export ADAPTER_NAME_OR_PATH=inflaton-ai/InternLM_2_5-7b_LoRA-Adapter
21
- export ADAPTER_NAME_OR_PATH=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r3/checkpoint-140
 
 
 
 
22
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
23
 
24
  export TEST_DATA=test_b
25
- export LOGICAL_REASONING_RESULTS_PATH=results/$TEST_DATA-results_r3.csv
26
 
27
  echo "Eval $MODEL_NAME with $ADAPTER_NAME_OR_PATH"
28
  python llm_toolkit/eval_logical_reasoning.py
 
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
16
+ pip install transformers==4.41.2
17
+
18
  export LOAD_IN_4BIT=false
19
  export USING_LLAMA_FACTORY=true
20
 
21
+ #export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
22
  # export ADAPTER_NAME_OR_PATH=inflaton-ai/InternLM_2_5-7b_LoRA-Adapter
23
+ #export ADAPTER_NAME_OR_PATH=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r3/checkpoint-140
24
+
25
+ export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
26
+ export ADAPTER_NAME_OR_PATH=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_r4/checkpoint-140
27
+
28
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
29
 
30
  export TEST_DATA=test_b
31
+ export LOGICAL_REASONING_RESULTS_PATH=results/$TEST_DATA-results_r4.csv
32
 
33
  echo "Eval $MODEL_NAME with $ADAPTER_NAME_OR_PATH"
34
  python llm_toolkit/eval_logical_reasoning.py
scripts/tune-mgtv-llama3_8b_en.sh CHANGED
@@ -18,9 +18,11 @@ grep MemTotal /proc/meminfo
18
  #pip install transformers==4.41.2
19
  #pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
20
 
 
 
21
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
22
 
23
- export MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
24
 
25
  export MODEL_PREFIX=llama3-8b_lora_sft_bf16
26
 
 
18
  #pip install transformers==4.41.2
19
  #pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
20
 
21
+ pip install -U transformers
22
+
23
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
24
 
25
+ export MODEL_NAME=meta-llama/Meta-Llama-3.1-8B-Instruct
26
 
27
  export MODEL_PREFIX=llama3-8b_lora_sft_bf16
28
 
scripts/tune-mgtv.sh CHANGED
@@ -1 +1 @@
1
- tune-mgtv-llama3_8b.sh
 
1
+ tune-mgtv-llama3_8b_en.sh