Spaces:
Build error
Build error
llama3 r4 p2 results
Browse files- .gitattributes +1 -0
- llama-factory/config/llama3-8b_lora_sft_bf16-p1_en.yaml +7 -6
- llama-factory/config/llama3-8b_lora_sft_bf16-p2_en.yaml +7 -6
- results/test_b-results_r4.csv +3 -0
- scripts/eval-mgtv-llama3_8b_en.sh +5 -5
- scripts/test-mgtv.sh +9 -3
- scripts/tune-mgtv-llama3_8b_en.sh +3 -1
- scripts/tune-mgtv.sh +1 -1
.gitattributes
CHANGED
@@ -98,3 +98,4 @@ results/mgtv-results_internlm_nv4090.csv filter=lfs diff=lfs merge=lfs -text
|
|
98 |
results/glm-4-9b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
|
99 |
results/llama3-8b_lora_sft_bf16-p1_en.csv filter=lfs diff=lfs merge=lfs -text
|
100 |
results/mgtv-results_internlm_best.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
98 |
results/glm-4-9b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
|
99 |
results/llama3-8b_lora_sft_bf16-p1_en.csv filter=lfs diff=lfs merge=lfs -text
|
100 |
results/mgtv-results_internlm_best.csv filter=lfs diff=lfs merge=lfs -text
|
101 |
+
results/test_b-results_r4.csv filter=lfs diff=lfs merge=lfs -text
|
llama-factory/config/llama3-8b_lora_sft_bf16-p1_en.yaml
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
### model
|
2 |
-
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
|
3 |
|
4 |
### method
|
5 |
stage: sft
|
@@ -14,15 +15,15 @@ upcast_layernorm: true
|
|
14 |
### dataset
|
15 |
dataset: alpaca_mgtv_p1_en
|
16 |
template: llama3
|
17 |
-
cutoff_len:
|
18 |
max_samples: 25000
|
19 |
overwrite_cache: true
|
20 |
preprocessing_num_workers: 16
|
21 |
|
22 |
### output
|
23 |
-
output_dir: saves/llama3-8b/lora/
|
24 |
logging_steps: 10
|
25 |
-
save_steps:
|
26 |
plot_loss: true
|
27 |
# overwrite_output_dir: true
|
28 |
|
@@ -40,7 +41,7 @@ ddp_timeout: 180000000
|
|
40 |
val_size: 0.1
|
41 |
per_device_eval_batch_size: 1
|
42 |
eval_strategy: steps
|
43 |
-
eval_steps:
|
44 |
|
45 |
report_to: wandb
|
46 |
-
run_name:
|
|
|
1 |
### model
|
2 |
+
#model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
3 |
+
model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
|
4 |
|
5 |
### method
|
6 |
stage: sft
|
|
|
15 |
### dataset
|
16 |
dataset: alpaca_mgtv_p1_en
|
17 |
template: llama3
|
18 |
+
cutoff_len: 8192
|
19 |
max_samples: 25000
|
20 |
overwrite_cache: true
|
21 |
preprocessing_num_workers: 16
|
22 |
|
23 |
### output
|
24 |
+
output_dir: saves/llama3-8b/lora/sft_bf16_p1_full_en_r2
|
25 |
logging_steps: 10
|
26 |
+
save_steps: 35
|
27 |
plot_loss: true
|
28 |
# overwrite_output_dir: true
|
29 |
|
|
|
41 |
val_size: 0.1
|
42 |
per_device_eval_batch_size: 1
|
43 |
eval_strategy: steps
|
44 |
+
eval_steps: 35
|
45 |
|
46 |
report_to: wandb
|
47 |
+
run_name: llama3_8b_p1_en_r2 # optional
|
llama-factory/config/llama3-8b_lora_sft_bf16-p2_en.yaml
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
### model
|
2 |
-
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
|
|
3 |
|
4 |
### method
|
5 |
stage: sft
|
@@ -14,15 +15,15 @@ upcast_layernorm: true
|
|
14 |
### dataset
|
15 |
dataset: alpaca_mgtv_p2_en
|
16 |
template: llama3
|
17 |
-
cutoff_len:
|
18 |
max_samples: 25000
|
19 |
overwrite_cache: true
|
20 |
preprocessing_num_workers: 16
|
21 |
|
22 |
### output
|
23 |
-
output_dir: saves/llama3-8b/lora/
|
24 |
logging_steps: 10
|
25 |
-
save_steps:
|
26 |
plot_loss: true
|
27 |
# overwrite_output_dir: true
|
28 |
|
@@ -40,7 +41,7 @@ ddp_timeout: 180000000
|
|
40 |
val_size: 0.1
|
41 |
per_device_eval_batch_size: 1
|
42 |
eval_strategy: steps
|
43 |
-
eval_steps:
|
44 |
|
45 |
report_to: wandb
|
46 |
-
run_name:
|
|
|
1 |
### model
|
2 |
+
#model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
3 |
+
model_name_or_path: meta-llama/Meta-Llama-3.1-8B-Instruct
|
4 |
|
5 |
### method
|
6 |
stage: sft
|
|
|
15 |
### dataset
|
16 |
dataset: alpaca_mgtv_p2_en
|
17 |
template: llama3
|
18 |
+
cutoff_len: 8192
|
19 |
max_samples: 25000
|
20 |
overwrite_cache: true
|
21 |
preprocessing_num_workers: 16
|
22 |
|
23 |
### output
|
24 |
+
output_dir: saves/llama3-8b/lora/sft_bf16_p2_full_en_r2
|
25 |
logging_steps: 10
|
26 |
+
save_steps: 35
|
27 |
plot_loss: true
|
28 |
# overwrite_output_dir: true
|
29 |
|
|
|
41 |
val_size: 0.1
|
42 |
per_device_eval_batch_size: 1
|
43 |
eval_strategy: steps
|
44 |
+
eval_steps: 35
|
45 |
|
46 |
report_to: wandb
|
47 |
+
run_name: llama3_8b_p2_en_r2 # optional
|
results/test_b-results_r4.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8caa5c7f153782389b90356cd6a59bf1b5ba976d993df896b2d0bfe666d8b64
|
3 |
+
size 23128244
|
scripts/eval-mgtv-llama3_8b_en.sh
CHANGED
@@ -24,17 +24,17 @@ export USING_LLAMA_FACTORY=true
|
|
24 |
export USE_ENGLISH_DATASETS=true
|
25 |
export USE_BF16_FOR_INFERENCE=true
|
26 |
|
27 |
-
export MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
|
28 |
export MODEL_PREFIX=llama3-8b_lora_sft_bf16
|
29 |
|
30 |
-
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-
|
31 |
-
|
32 |
export USING_P1_PROMPT_TEMPLATE=true
|
33 |
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
34 |
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
35 |
|
36 |
-
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-
|
37 |
-
|
38 |
export USING_P1_PROMPT_TEMPLATE=false
|
39 |
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
40 |
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
|
|
24 |
export USE_ENGLISH_DATASETS=true
|
25 |
export USE_BF16_FOR_INFERENCE=true
|
26 |
|
27 |
+
export MODEL_NAME=meta-llama/Meta-Llama-3.1-8B-Instruct
|
28 |
export MODEL_PREFIX=llama3-8b_lora_sft_bf16
|
29 |
|
30 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1_en_r2.csv
|
31 |
+
export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_en_r2
|
32 |
export USING_P1_PROMPT_TEMPLATE=true
|
33 |
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
34 |
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
35 |
|
36 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2_en_r2.csv
|
37 |
+
export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p2_full_en_r2
|
38 |
export USING_P1_PROMPT_TEMPLATE=false
|
39 |
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
40 |
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
scripts/test-mgtv.sh
CHANGED
@@ -13,16 +13,22 @@ cat /etc/os-release
|
|
13 |
lscpu
|
14 |
grep MemTotal /proc/meminfo
|
15 |
|
|
|
|
|
16 |
export LOAD_IN_4BIT=false
|
17 |
export USING_LLAMA_FACTORY=true
|
18 |
|
19 |
-
export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
|
20 |
# export ADAPTER_NAME_OR_PATH=inflaton-ai/InternLM_2_5-7b_LoRA-Adapter
|
21 |
-
export ADAPTER_NAME_OR_PATH=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r3/checkpoint-140
|
|
|
|
|
|
|
|
|
22 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
23 |
|
24 |
export TEST_DATA=test_b
|
25 |
-
export LOGICAL_REASONING_RESULTS_PATH=results/$TEST_DATA-
|
26 |
|
27 |
echo "Eval $MODEL_NAME with $ADAPTER_NAME_OR_PATH"
|
28 |
python llm_toolkit/eval_logical_reasoning.py
|
|
|
13 |
lscpu
|
14 |
grep MemTotal /proc/meminfo
|
15 |
|
16 |
+
pip install transformers==4.41.2
|
17 |
+
|
18 |
export LOAD_IN_4BIT=false
|
19 |
export USING_LLAMA_FACTORY=true
|
20 |
|
21 |
+
#export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
|
22 |
# export ADAPTER_NAME_OR_PATH=inflaton-ai/InternLM_2_5-7b_LoRA-Adapter
|
23 |
+
#export ADAPTER_NAME_OR_PATH=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full_r3/checkpoint-140
|
24 |
+
|
25 |
+
export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
|
26 |
+
export ADAPTER_NAME_OR_PATH=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_r4/checkpoint-140
|
27 |
+
|
28 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
29 |
|
30 |
export TEST_DATA=test_b
|
31 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$TEST_DATA-results_r4.csv
|
32 |
|
33 |
echo "Eval $MODEL_NAME with $ADAPTER_NAME_OR_PATH"
|
34 |
python llm_toolkit/eval_logical_reasoning.py
|
scripts/tune-mgtv-llama3_8b_en.sh
CHANGED
@@ -18,9 +18,11 @@ grep MemTotal /proc/meminfo
|
|
18 |
#pip install transformers==4.41.2
|
19 |
#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
20 |
|
|
|
|
|
21 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
22 |
|
23 |
-
export MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
|
24 |
|
25 |
export MODEL_PREFIX=llama3-8b_lora_sft_bf16
|
26 |
|
|
|
18 |
#pip install transformers==4.41.2
|
19 |
#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
20 |
|
21 |
+
pip install -U transformers
|
22 |
+
|
23 |
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
24 |
|
25 |
+
export MODEL_NAME=meta-llama/Meta-Llama-3.1-8B-Instruct
|
26 |
|
27 |
export MODEL_PREFIX=llama3-8b_lora_sft_bf16
|
28 |
|
scripts/tune-mgtv.sh
CHANGED
@@ -1 +1 @@
|
|
1 |
-
tune-mgtv-
|
|
|
1 |
+
tune-mgtv-llama3_8b_en.sh
|