Spaces:
Build error
Build error
train with 4gpu
Browse files
llama-factory/config/mac_template_4gpu.yaml
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: ORG_NAME/MODEL_NAME
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: CHAT_TEMPLATE
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/MODEL_NAME
|
20 |
+
logging_steps: 5
|
21 |
+
save_steps: 70
|
22 |
+
plot_loss: true
|
23 |
+
# overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 8
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 6.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 70
|
40 |
+
|
41 |
+
report_to: wandb
|
42 |
+
run_name: MODEL_NAME_lora_sft
|
llm_toolkit/eval_epochs.py
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import subprocess
|
4 |
+
from dotenv import find_dotenv, load_dotenv
|
5 |
+
|
6 |
+
from llm_toolkit.llm_utils import *
|
7 |
+
from llm_toolkit.translation_utils import *
|
8 |
+
|
9 |
+
|
10 |
+
def evaluate_model_all_epochs(
|
11 |
+
model,
|
12 |
+
tokenizer,
|
13 |
+
model_name,
|
14 |
+
adapter_path_base,
|
15 |
+
num_of_entries=-1,
|
16 |
+
result_file=None,
|
17 |
+
start_epoch=0,
|
18 |
+
end_epoch=-1,
|
19 |
+
):
|
20 |
+
new_env = os.environ.copy()
|
21 |
+
new_env["MODEL_NAME"] = model_name
|
22 |
+
model = model_name.split("/")[-1]
|
23 |
+
|
24 |
+
new_env["LOAD_IN_4BIT"] = "true" if load_in_4bit else "false"
|
25 |
+
if result_file is not None:
|
26 |
+
new_env["RESULTS_PATH"] = result_file
|
27 |
+
|
28 |
+
if adapter_path_base is None:
|
29 |
+
num_train_epochs = 0
|
30 |
+
print(f"No adapter path provided. Running with base model:{model_name}")
|
31 |
+
else:
|
32 |
+
if end_epoch >= 0:
|
33 |
+
num_train_epochs = end_epoch
|
34 |
+
print(f"Running from epoch {start_epoch} to {end_epoch}")
|
35 |
+
else:
|
36 |
+
# find subdirectories in adapter_path_base
|
37 |
+
# and sort them by epoch number
|
38 |
+
subdirs = [
|
39 |
+
d
|
40 |
+
for d in os.listdir(adapter_path_base)
|
41 |
+
if os.path.isdir(os.path.join(adapter_path_base, d))
|
42 |
+
]
|
43 |
+
|
44 |
+
subdirs = sorted(subdirs, key=lambda x: int(x.split("-")[-1]))
|
45 |
+
num_train_epochs = len(subdirs)
|
46 |
+
print(f"found {num_train_epochs} checkpoints: {subdirs}")
|
47 |
+
|
48 |
+
for i in range(start_epoch, num_train_epochs + 1):
|
49 |
+
print(f"Epoch {i}")
|
50 |
+
if i == 0:
|
51 |
+
os.unsetenv("ADAPTER_NAME_OR_PATH")
|
52 |
+
else:
|
53 |
+
adapter_path = adapter_path_base + "/" + subdirs[i - 1]
|
54 |
+
new_env["ADAPTER_NAME_OR_PATH"] = adapter_path
|
55 |
+
|
56 |
+
print(f"adapter path: {new_env.get('ADAPTER_NAME_OR_PATH')}")
|
57 |
+
|
58 |
+
log_file = "./logs/{}_epoch_{}.txt".format(model, i)
|
59 |
+
with open(log_file, "w") as f_obj:
|
60 |
+
subprocess.run(
|
61 |
+
f"python llm_toolkit/eval_shots.py {num_of_entries}",
|
62 |
+
shell=True,
|
63 |
+
env=new_env,
|
64 |
+
stdout=f_obj,
|
65 |
+
text=True,
|
66 |
+
)
|
67 |
+
|
68 |
+
|
69 |
+
if __name__ == "__main__":
|
70 |
+
found_dotenv = find_dotenv(".env")
|
71 |
+
|
72 |
+
if len(found_dotenv) == 0:
|
73 |
+
found_dotenv = find_dotenv(".env.example")
|
74 |
+
print(f"loading env vars from: {found_dotenv}")
|
75 |
+
load_dotenv(found_dotenv, override=False)
|
76 |
+
|
77 |
+
workding_dir = os.path.dirname(found_dotenv)
|
78 |
+
os.chdir(workding_dir)
|
79 |
+
sys.path.append(workding_dir)
|
80 |
+
print("workding dir:", workding_dir)
|
81 |
+
print(f"adding {workding_dir} to sys.path")
|
82 |
+
sys.path.append(workding_dir)
|
83 |
+
|
84 |
+
model_name = os.getenv("MODEL_NAME")
|
85 |
+
adapter_path_base = os.getenv("ADAPTER_PATH_BASE")
|
86 |
+
start_epoch = int(os.getenv("START_EPOCH", 0))
|
87 |
+
end_epoch = os.getenv("END_EPOCH", -1)
|
88 |
+
load_in_4bit = os.getenv("LOAD_IN_4BIT", "true").lower() == "true"
|
89 |
+
result_file = os.getenv("RESULTS_PATH", None)
|
90 |
+
|
91 |
+
num_of_entries = int(sys.argv[1]) if len(sys.argv) > 1 else -1
|
92 |
+
|
93 |
+
print(
|
94 |
+
model_name,
|
95 |
+
adapter_path_base,
|
96 |
+
load_in_4bit,
|
97 |
+
start_epoch,
|
98 |
+
result_file,
|
99 |
+
)
|
100 |
+
|
101 |
+
device = check_gpu()
|
102 |
+
is_cuda = torch.cuda.is_available()
|
103 |
+
|
104 |
+
print(f"Evaluating model: {model_name} on {device}")
|
105 |
+
|
106 |
+
if is_cuda:
|
107 |
+
torch.cuda.empty_cache()
|
108 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
109 |
+
start_gpu_memory = round(
|
110 |
+
torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
|
111 |
+
)
|
112 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
113 |
+
print(f"(0) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
114 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
115 |
+
|
116 |
+
model, tokenizer = load_model(model_name, load_in_4bit=load_in_4bit)
|
117 |
+
|
118 |
+
datasets = load_translation_dataset(data_path, tokenizer, num_shots=0)
|
119 |
+
print_row_details(datasets["test"].to_pandas())
|
120 |
+
|
121 |
+
if is_cuda:
|
122 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
123 |
+
start_gpu_memory = round(
|
124 |
+
torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
|
125 |
+
)
|
126 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
127 |
+
print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
128 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
129 |
+
|
130 |
+
evaluate_model_all_epochs(
|
131 |
+
model,
|
132 |
+
tokenizer,
|
133 |
+
model_name,
|
134 |
+
adapter_path_base,
|
135 |
+
start_epoch=start_epoch,
|
136 |
+
end_epoch=end_epoch,
|
137 |
+
load_in_4bit=load_in_4bit,
|
138 |
+
num_of_entries=num_of_entries,
|
139 |
+
result_file=result_file,
|
140 |
+
)
|
141 |
+
|
142 |
+
if is_cuda:
|
143 |
+
gpu_stats = torch.cuda.get_device_properties(0)
|
144 |
+
start_gpu_memory = round(
|
145 |
+
torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
|
146 |
+
)
|
147 |
+
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
|
148 |
+
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
|
149 |
+
print(f"{start_gpu_memory} GB of memory reserved.")
|
llm_toolkit/{eval.py → eval_shots.py}
RENAMED
File without changes
|
notebooks/00b_Data Analysis_Few_Shots.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab12535095ccc1615c5379d154c81142686e4fa3dc33fc51808b679fd6fa7116
|
3 |
+
size 1234635
|
results/mac-results_few_shots_metrics.csv
CHANGED
@@ -1,41 +1,47 @@
|
|
1 |
model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens,eval_time
|
2 |
-
Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.
|
3 |
-
Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.
|
4 |
-
Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.
|
5 |
-
Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.
|
6 |
-
Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.
|
7 |
-
Qwen/Qwen2-72B-Instruct,50,0.4401536409204816,0.1538634893900684,0.
|
8 |
-
Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.
|
9 |
-
Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.
|
10 |
-
Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.
|
11 |
-
Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.
|
12 |
-
Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.
|
13 |
-
Qwen/Qwen2-7B-Instruct,50,0.3953455943001352,0.12949951844499932,0.
|
14 |
-
internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.
|
15 |
-
internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.
|
16 |
-
internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.
|
17 |
-
internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.
|
18 |
-
internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.
|
19 |
-
internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.
|
20 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,0,0.3638704024273502,0.10874677881601094,0.
|
21 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1,0.37956764543783084,0.11805442002282653,0.
|
22 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,3,0.38622483411876246,0.12306660851355093,0.
|
23 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,5,0.3895488616778815,0.12582029733797498,0.
|
24 |
-
shenzhi-wang/
|
25 |
-
shenzhi-wang/
|
26 |
-
shenzhi-wang/
|
27 |
-
shenzhi-wang/
|
28 |
-
shenzhi-wang/
|
29 |
-
shenzhi-wang/
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
gpt-4o,0,0.
|
37 |
-
gpt-4o,1,0.
|
38 |
-
gpt-4o,3,0.
|
39 |
-
gpt-4o,5,0.
|
40 |
-
gpt-4o,10,0.
|
41 |
-
gpt-4o,50,0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens,eval_time
|
2 |
+
Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.38486660208459095,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1,8.894969108561341
|
3 |
+
Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.3946283255752747,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1,8.983230361871138
|
4 |
+
Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3979712282250465,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0,11.657546337157987
|
5 |
+
Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.40387035186513487,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0,17.167696381288614
|
6 |
+
Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.4065933507975943,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0,29.728155339805824
|
7 |
+
Qwen/Qwen2-72B-Instruct,50,0.4401536409204816,0.1538634893900684,0.41722880607716234,0.0,0.10150044130626655,0.10150044130626655,0.43823160654983345,0,112.50397175639894
|
8 |
+
Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.3678523300904837,0.0,0.07149161518093557,0.07149161518093557,0.3763128359886437,0,0.9805825242718447
|
9 |
+
Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.371517786678723,0.0,0.07413945278022947,0.07413945278022947,0.37879237953430883,0,1.0529567519858782
|
10 |
+
Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.3780278278830778,0.0,0.1412180052956752,0.1412180052956752,0.38444052153933106,0,1.6010591350397176
|
11 |
+
Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.38246119910508375,0.0,0.09179170344218888,0.09179170344218888,0.38631555618548774,0,2.2894969108561343
|
12 |
+
Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.38202725422893463,0.0,0.10326566637246248,0.10326566637246248,0.3835535147682633,0,4.006178287731686
|
13 |
+
Qwen/Qwen2-7B-Instruct,50,0.3953455943001352,0.12949951844499932,0.3899754114871057,0.0,0.10061782877316858,0.10061782877316858,0.39363409715118836,0,17.46425419240953
|
14 |
+
internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.3600058558701442,0.0,0.2144748455428067,0.2144748455428067,0.3648059323539847,0,1.2241835834068844
|
15 |
+
internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.36379026028083117,0.0,0.14033539276257723,0.14033539276257723,0.36972107700643503,0,1.3124448367166814
|
16 |
+
internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.36859373323449984,0.0,0.17740511915269197,0.17740511915269197,0.37187052462735126,0,1.8578993821712269
|
17 |
+
internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.36845885184482197,0.0,0.14827890556045895,0.14827890556045895,0.37048732274065205,0,2.860547219770521
|
18 |
+
internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.36774089220788087,0.0,0.13062665489849956,0.13062665489849956,0.37298723763770353,0,5.722859664607237
|
19 |
+
internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.3627041392544321,0.0,0.16857899382171226,0.16857899382171226,0.3694484047441432,8,42.29214474845543
|
20 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,0,0.3638704024273502,0.10874677881601094,0.35336472352140924,0.0,0.15445719329214475,0.15445719329214475,0.3614642386796342,0,7.8331862312444835
|
21 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1,0.37956764543783084,0.11805442002282653,0.36984338962652286,0.0,0.12533097969991175,0.12533097969991175,0.3775255236309064,0,8.307149161518094
|
22 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,3,0.38622483411876246,0.12306660851355093,0.37461197525974343,0.0,0.14386584289496912,0.14386584289496912,0.38384366117983154,0,11.681376875551633
|
23 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,5,0.3895488616778815,0.12582029733797498,0.37850976779334966,0.0,0.14386584289496912,0.14386584289496912,0.38714719527562863,0,17.23389232127096
|
24 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,10,0.3897515010230098,0.12957008401715697,0.3804272354384455,0.0,0.1262135922330097,0.1262135922330097,0.3876399935245347,0,
|
25 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,0,0.3476217085789916,0.09799438963103267,0.33493508618013,0.0,2.2162400706090026,2.2162400706090026,0.319817551404022,2,1.0750220653133276
|
26 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1,0.35430794534292803,0.10438367949419078,0.34360907692906495,0.0,0.19505736981465135,0.19505736981465135,0.3513601482457364,0,1.1571050308914386
|
27 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,3,0.3600739839089376,0.10709900175348612,0.35151668174502293,0.0,0.14827890556045895,0.14827890556045895,0.3577868691137631,0,1.9814651368049427
|
28 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,5,0.36231904915539526,0.11204735364530892,0.3555966636828387,0.0,0.15798764342453664,0.15798764342453664,0.3598691748988386,0,2.8146513680494265
|
29 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,10,0.3615257973929306,0.10125226501021815,0.35395169595888565,0.0,0.8314210061782877,0.8314210061782877,0.34940648099916116,22,
|
30 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,0,0.3284997502705771,0.08313795089297474,0.31837381406868526,0.0,0.12797881729920565,0.12797881729920565,0.3266954815790356,0,1.204766107678729
|
31 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1,0.3342115436248988,0.08857909016110346,0.32376944492764814,0.0,0.6690203000882613,0.6690203000882613,0.3250691235789747,1,1.4819064430714917
|
32 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,3,0.3435621946945506,0.09605927100886698,0.33411105509944494,0.0,0.1262135922330097,0.1262135922330097,0.3417009211692762,0,2.262135922330097
|
33 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,5,0.34429078135481284,0.09638489591361771,0.33818535378281456,0.0,0.07590467784642542,0.07590467784642542,0.34316381414750663,2,3.3883495145631066
|
34 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,10,0.3408431061510028,0.09735670499814125,0.3331892932821734,0.0,0.10414827890556046,0.10414827890556046,0.339316280986861,11,6.558693733451015
|
35 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,50,0.36184201368489227,0.104864029030581,0.3457669052738412,0.0,0.5136804942630185,0.5136804942630185,0.35413782502473057,2,29.659311562224183
|
36 |
+
gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.37055778050320864,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0,1.5939982347749337
|
37 |
+
gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.36752595751803463,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0,1.5666372462488967
|
38 |
+
gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3682965259271725,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0,1.2868490732568403
|
39 |
+
gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.34743907979125577,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0,1.203883495145631
|
40 |
+
gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3655455159774728,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0,1.1879964695498677
|
41 |
+
gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.3915950550621088,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0,1.289496910856134
|
42 |
+
gpt-4o,0,0.3797419877414444,0.12054600115274576,0.37050277223396,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0,1.528684907325684
|
43 |
+
gpt-4o,1,0.37588586538591867,0.12049862468096047,0.36605424160788713,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0,1.203883495145631
|
44 |
+
gpt-4o,3,0.3768512103553621,0.12408746322526747,0.3667929041403734,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0,2.05207413945278
|
45 |
+
gpt-4o,5,0.35772544915145654,0.12169683347842021,0.3484913675543446,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0,1.6840247131509267
|
46 |
+
gpt-4o,10,0.3746444651189953,0.12498238983123719,0.3667923043349673,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0,1.7899382171226832
|
47 |
+
gpt-4o,50,0.40413933252744955,0.13782450337569063,0.39078212423794856,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0,2.025595763459841
|
scripts/tune-lf-4gpu.sh
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/../llama-factory/
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
export ORG_NAME=$1
|
9 |
+
export MODEL_NAME=$2
|
10 |
+
export CHAT_TEMPLATE=$3
|
11 |
+
export DATA_PATH=../datasets/mac/mac.tsv
|
12 |
+
export YAML=config/mac_template_4gpu.yaml
|
13 |
+
|
14 |
+
python ../llm_toolkit/setup_lf.py
|
15 |
+
llamafactory-cli train config/models/$MODEL_NAME.yaml
|
scripts/tune-mac-4gpu.sh
CHANGED
@@ -14,6 +14,6 @@ grep MemTotal /proc/meminfo
|
|
14 |
#pip install -r requirements.txt
|
15 |
#cd ../LLaMA-Factory && pip install -e .[torch,metrics,vllm] && cd -
|
16 |
|
17 |
-
./scripts/tune-lf.sh Qwen Qwen2-72B-Instruct qwen
|
18 |
|
19 |
-
./scripts/tune-lf.sh shenzhi-wang Llama3.1-70B-Chinese-Chat llama3
|
|
|
14 |
#pip install -r requirements.txt
|
15 |
#cd ../LLaMA-Factory && pip install -e .[torch,metrics,vllm] && cd -
|
16 |
|
17 |
+
./scripts/tune-lf-4gpu.sh Qwen Qwen2-72B-Instruct qwen
|
18 |
|
19 |
+
./scripts/tune-lf-4gpu.sh shenzhi-wang Llama3.1-70B-Chinese-Chat llama3
|