dh-mc commited on
Commit
a2100ac
·
1 Parent(s): 22c212e

train with 4gpu

Browse files
llama-factory/config/mac_template_4gpu.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: ORG_NAME/MODEL_NAME
3
+
4
+ ### method
5
+ stage: sft
6
+ do_train: true
7
+ finetuning_type: lora
8
+ lora_target: all
9
+
10
+ ### dataset
11
+ dataset: alpaca_mac
12
+ template: CHAT_TEMPLATE
13
+ cutoff_len: 1024
14
+ max_samples: 4528
15
+ overwrite_cache: true
16
+ preprocessing_num_workers: 16
17
+
18
+ ### output
19
+ output_dir: saves/MODEL_NAME
20
+ logging_steps: 5
21
+ save_steps: 70
22
+ plot_loss: true
23
+ # overwrite_output_dir: true
24
+
25
+ ### train
26
+ per_device_train_batch_size: 8
27
+ gradient_accumulation_steps: 8
28
+ learning_rate: 1.0e-4
29
+ num_train_epochs: 6.0
30
+ lr_scheduler_type: cosine
31
+ warmup_ratio: 0.1
32
+ bf16: true
33
+ ddp_timeout: 180000000
34
+
35
+ ### eval
36
+ val_size: 0.01
37
+ per_device_eval_batch_size: 1
38
+ eval_strategy: steps
39
+ eval_steps: 70
40
+
41
+ report_to: wandb
42
+ run_name: MODEL_NAME_lora_sft
llm_toolkit/eval_epochs.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+ from dotenv import find_dotenv, load_dotenv
5
+
6
+ from llm_toolkit.llm_utils import *
7
+ from llm_toolkit.translation_utils import *
8
+
9
+
10
+ def evaluate_model_all_epochs(
11
+ model,
12
+ tokenizer,
13
+ model_name,
14
+ adapter_path_base,
15
+ num_of_entries=-1,
16
+ result_file=None,
17
+ start_epoch=0,
18
+ end_epoch=-1,
19
+ ):
20
+ new_env = os.environ.copy()
21
+ new_env["MODEL_NAME"] = model_name
22
+ model = model_name.split("/")[-1]
23
+
24
+ new_env["LOAD_IN_4BIT"] = "true" if load_in_4bit else "false"
25
+ if result_file is not None:
26
+ new_env["RESULTS_PATH"] = result_file
27
+
28
+ if adapter_path_base is None:
29
+ num_train_epochs = 0
30
+ print(f"No adapter path provided. Running with base model:{model_name}")
31
+ else:
32
+ if end_epoch >= 0:
33
+ num_train_epochs = end_epoch
34
+ print(f"Running from epoch {start_epoch} to {end_epoch}")
35
+ else:
36
+ # find subdirectories in adapter_path_base
37
+ # and sort them by epoch number
38
+ subdirs = [
39
+ d
40
+ for d in os.listdir(adapter_path_base)
41
+ if os.path.isdir(os.path.join(adapter_path_base, d))
42
+ ]
43
+
44
+ subdirs = sorted(subdirs, key=lambda x: int(x.split("-")[-1]))
45
+ num_train_epochs = len(subdirs)
46
+ print(f"found {num_train_epochs} checkpoints: {subdirs}")
47
+
48
+ for i in range(start_epoch, num_train_epochs + 1):
49
+ print(f"Epoch {i}")
50
+ if i == 0:
51
+ os.unsetenv("ADAPTER_NAME_OR_PATH")
52
+ else:
53
+ adapter_path = adapter_path_base + "/" + subdirs[i - 1]
54
+ new_env["ADAPTER_NAME_OR_PATH"] = adapter_path
55
+
56
+ print(f"adapter path: {new_env.get('ADAPTER_NAME_OR_PATH')}")
57
+
58
+ log_file = "./logs/{}_epoch_{}.txt".format(model, i)
59
+ with open(log_file, "w") as f_obj:
60
+ subprocess.run(
61
+ f"python llm_toolkit/eval_shots.py {num_of_entries}",
62
+ shell=True,
63
+ env=new_env,
64
+ stdout=f_obj,
65
+ text=True,
66
+ )
67
+
68
+
69
+ if __name__ == "__main__":
70
+ found_dotenv = find_dotenv(".env")
71
+
72
+ if len(found_dotenv) == 0:
73
+ found_dotenv = find_dotenv(".env.example")
74
+ print(f"loading env vars from: {found_dotenv}")
75
+ load_dotenv(found_dotenv, override=False)
76
+
77
+ workding_dir = os.path.dirname(found_dotenv)
78
+ os.chdir(workding_dir)
79
+ sys.path.append(workding_dir)
80
+ print("workding dir:", workding_dir)
81
+ print(f"adding {workding_dir} to sys.path")
82
+ sys.path.append(workding_dir)
83
+
84
+ model_name = os.getenv("MODEL_NAME")
85
+ adapter_path_base = os.getenv("ADAPTER_PATH_BASE")
86
+ start_epoch = int(os.getenv("START_EPOCH", 0))
87
+ end_epoch = os.getenv("END_EPOCH", -1)
88
+ load_in_4bit = os.getenv("LOAD_IN_4BIT", "true").lower() == "true"
89
+ result_file = os.getenv("RESULTS_PATH", None)
90
+
91
+ num_of_entries = int(sys.argv[1]) if len(sys.argv) > 1 else -1
92
+
93
+ print(
94
+ model_name,
95
+ adapter_path_base,
96
+ load_in_4bit,
97
+ start_epoch,
98
+ result_file,
99
+ )
100
+
101
+ device = check_gpu()
102
+ is_cuda = torch.cuda.is_available()
103
+
104
+ print(f"Evaluating model: {model_name} on {device}")
105
+
106
+ if is_cuda:
107
+ torch.cuda.empty_cache()
108
+ gpu_stats = torch.cuda.get_device_properties(0)
109
+ start_gpu_memory = round(
110
+ torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
111
+ )
112
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
113
+ print(f"(0) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
114
+ print(f"{start_gpu_memory} GB of memory reserved.")
115
+
116
+ model, tokenizer = load_model(model_name, load_in_4bit=load_in_4bit)
117
+
118
+ datasets = load_translation_dataset(data_path, tokenizer, num_shots=0)
119
+ print_row_details(datasets["test"].to_pandas())
120
+
121
+ if is_cuda:
122
+ gpu_stats = torch.cuda.get_device_properties(0)
123
+ start_gpu_memory = round(
124
+ torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
125
+ )
126
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
127
+ print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
128
+ print(f"{start_gpu_memory} GB of memory reserved.")
129
+
130
+ evaluate_model_all_epochs(
131
+ model,
132
+ tokenizer,
133
+ model_name,
134
+ adapter_path_base,
135
+ start_epoch=start_epoch,
136
+ end_epoch=end_epoch,
137
+ load_in_4bit=load_in_4bit,
138
+ num_of_entries=num_of_entries,
139
+ result_file=result_file,
140
+ )
141
+
142
+ if is_cuda:
143
+ gpu_stats = torch.cuda.get_device_properties(0)
144
+ start_gpu_memory = round(
145
+ torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
146
+ )
147
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
148
+ print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
149
+ print(f"{start_gpu_memory} GB of memory reserved.")
llm_toolkit/{eval.py → eval_shots.py} RENAMED
File without changes
notebooks/00b_Data Analysis_Few_Shots.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9659822963e465fcc321c5cc538b9e2eabefcc5492ddc1eaaafd510e854b7202
3
- size 835024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab12535095ccc1615c5379d154c81142686e4fa3dc33fc51808b679fd6fa7116
3
+ size 1234635
results/mac-results_few_shots_metrics.csv CHANGED
@@ -1,41 +1,47 @@
1
  model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens,eval_time
2
- Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.3847629463750985,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1,8.894969108561341
3
- Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.3944115167988717,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1,8.983230361871138
4
- Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.39794626590394355,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0,11.657546337157987
5
- Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.4036491307620504,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0,17.167696381288614
6
- Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.4065107334928856,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0,29.728155339805824
7
- Qwen/Qwen2-72B-Instruct,50,0.4401536409204816,0.1538634893900684,0.4172243423111057,0.0,0.10150044130626655,0.10150044130626655,0.43823160654983345,0,112.50397175639894
8
- Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.3679428026833277,0.0,0.07149161518093557,0.07149161518093557,0.3763128359886437,0,0.9805825242718447
9
- Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.3716350861124088,0.0,0.07413945278022947,0.07413945278022947,0.37879237953430883,0,1.0529567519858782
10
- Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.3775321935602687,0.0,0.1412180052956752,0.1412180052956752,0.38444052153933106,0,1.6010591350397176
11
- Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.3821016566709109,0.0,0.09179170344218888,0.09179170344218888,0.38631555618548774,0,2.2894969108561343
12
- Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.3819282327854221,0.0,0.10326566637246248,0.10326566637246248,0.3835535147682633,0,4.006178287731686
13
- Qwen/Qwen2-7B-Instruct,50,0.3953455943001352,0.12949951844499932,0.3902417317381649,0.0,0.10061782877316858,0.10061782877316858,0.39363409715118836,0,17.46425419240953
14
- internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.3600976114330957,0.0,0.2144748455428067,0.2144748455428067,0.3648059323539847,0,1.2241835834068844
15
- internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.3636179646738983,0.0,0.14033539276257723,0.14033539276257723,0.36972107700643503,0,1.3124448367166814
16
- internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.3683821652101455,0.0,0.17740511915269197,0.17740511915269197,0.37187052462735126,0,1.8578993821712269
17
- internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.3683735908407485,0.0,0.14827890556045895,0.14827890556045895,0.37048732274065205,0,2.860547219770521
18
- internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.3679980919671262,0.0,0.13062665489849956,0.13062665489849956,0.37298723763770353,0,5.722859664607237
19
- internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.36275069101944124,0.0,0.16857899382171226,0.16857899382171226,0.3694484047441432,8,42.29214474845543
20
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,0,0.3638704024273502,0.10874677881601094,0.3532404744913337,0.0,0.15445719329214475,0.15445719329214475,0.3614642386796342,0,7.8331862312444835
21
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1,0.37956764543783084,0.11805442002282653,0.3695558384587787,0.0,0.12533097969991175,0.12533097969991175,0.3775255236309064,0,8.307149161518094
22
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,3,0.38622483411876246,0.12306660851355093,0.374537863679436,0.0,0.14386584289496912,0.14386584289496912,0.38384366117983154,0,11.681376875551633
23
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,5,0.3895488616778815,0.12582029733797498,0.37834594185588616,0.0,0.14386584289496912,0.14386584289496912,0.38714719527562863,0,17.23389232127096
24
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,0,0.3284997502705771,0.08313795089297474,0.31822074756329843,0.0,0.12797881729920565,0.12797881729920565,0.3266954815790356,0,1.204766107678729
25
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1,0.3342115436248988,0.08857909016110346,0.3235832621122292,0.0,0.6690203000882613,0.6690203000882613,0.3250691235789747,1,1.4819064430714917
26
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,3,0.3435621946945506,0.09605927100886698,0.3339835066382988,0.0,0.1262135922330097,0.1262135922330097,0.3417009211692762,0,2.262135922330097
27
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,5,0.34429078135481284,0.09638489591361771,0.33821501828549394,0.0,0.07590467784642542,0.07590467784642542,0.34316381414750663,2,3.3883495145631066
28
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,10,0.3408431061510028,0.09735670499814125,0.3331263575051163,0.0,0.10414827890556046,0.10414827890556046,0.339316280986861,11,6.558693733451015
29
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,50,0.36184201368489227,0.104864029030581,0.34553515243463534,0.0,0.5136804942630185,0.5136804942630185,0.35413782502473057,2,29.659311562224183
30
- gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.37039368743666584,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0,1.5939982347749337
31
- gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.36725632920414286,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0,1.5666372462488967
32
- gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.36822411288225565,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0,1.2868490732568403
33
- gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.3472080523493449,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0,1.203883495145631
34
- gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.36566914304808296,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0,1.1879964695498677
35
- gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.39117898438510224,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0,1.289496910856134
36
- gpt-4o,0,0.3797419877414444,0.12054600115274576,0.3704197875229349,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0,1.528684907325684
37
- gpt-4o,1,0.37588586538591867,0.12049862468096047,0.36572207722254313,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0,1.203883495145631
38
- gpt-4o,3,0.3768512103553621,0.12408746322526747,0.36677555510252513,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0,2.05207413945278
39
- gpt-4o,5,0.35772544915145654,0.12169683347842021,0.3480606673293163,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0,1.6840247131509267
40
- gpt-4o,10,0.3746444651189953,0.12498238983123719,0.36695107795536486,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0,1.7899382171226832
41
- gpt-4o,50,0.40413933252744955,0.13782450337569063,0.3905031632820436,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0,2.025595763459841
 
 
 
 
 
 
 
1
  model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens,eval_time
2
+ Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.38486660208459095,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1,8.894969108561341
3
+ Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.3946283255752747,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1,8.983230361871138
4
+ Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3979712282250465,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0,11.657546337157987
5
+ Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.40387035186513487,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0,17.167696381288614
6
+ Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.4065933507975943,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0,29.728155339805824
7
+ Qwen/Qwen2-72B-Instruct,50,0.4401536409204816,0.1538634893900684,0.41722880607716234,0.0,0.10150044130626655,0.10150044130626655,0.43823160654983345,0,112.50397175639894
8
+ Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.3678523300904837,0.0,0.07149161518093557,0.07149161518093557,0.3763128359886437,0,0.9805825242718447
9
+ Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.371517786678723,0.0,0.07413945278022947,0.07413945278022947,0.37879237953430883,0,1.0529567519858782
10
+ Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.3780278278830778,0.0,0.1412180052956752,0.1412180052956752,0.38444052153933106,0,1.6010591350397176
11
+ Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.38246119910508375,0.0,0.09179170344218888,0.09179170344218888,0.38631555618548774,0,2.2894969108561343
12
+ Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.38202725422893463,0.0,0.10326566637246248,0.10326566637246248,0.3835535147682633,0,4.006178287731686
13
+ Qwen/Qwen2-7B-Instruct,50,0.3953455943001352,0.12949951844499932,0.3899754114871057,0.0,0.10061782877316858,0.10061782877316858,0.39363409715118836,0,17.46425419240953
14
+ internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.3600058558701442,0.0,0.2144748455428067,0.2144748455428067,0.3648059323539847,0,1.2241835834068844
15
+ internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.36379026028083117,0.0,0.14033539276257723,0.14033539276257723,0.36972107700643503,0,1.3124448367166814
16
+ internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.36859373323449984,0.0,0.17740511915269197,0.17740511915269197,0.37187052462735126,0,1.8578993821712269
17
+ internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.36845885184482197,0.0,0.14827890556045895,0.14827890556045895,0.37048732274065205,0,2.860547219770521
18
+ internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.36774089220788087,0.0,0.13062665489849956,0.13062665489849956,0.37298723763770353,0,5.722859664607237
19
+ internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.3627041392544321,0.0,0.16857899382171226,0.16857899382171226,0.3694484047441432,8,42.29214474845543
20
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,0,0.3638704024273502,0.10874677881601094,0.35336472352140924,0.0,0.15445719329214475,0.15445719329214475,0.3614642386796342,0,7.8331862312444835
21
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1,0.37956764543783084,0.11805442002282653,0.36984338962652286,0.0,0.12533097969991175,0.12533097969991175,0.3775255236309064,0,8.307149161518094
22
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,3,0.38622483411876246,0.12306660851355093,0.37461197525974343,0.0,0.14386584289496912,0.14386584289496912,0.38384366117983154,0,11.681376875551633
23
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,5,0.3895488616778815,0.12582029733797498,0.37850976779334966,0.0,0.14386584289496912,0.14386584289496912,0.38714719527562863,0,17.23389232127096
24
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,10,0.3897515010230098,0.12957008401715697,0.3804272354384455,0.0,0.1262135922330097,0.1262135922330097,0.3876399935245347,0,
25
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,0,0.3476217085789916,0.09799438963103267,0.33493508618013,0.0,2.2162400706090026,2.2162400706090026,0.319817551404022,2,1.0750220653133276
26
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1,0.35430794534292803,0.10438367949419078,0.34360907692906495,0.0,0.19505736981465135,0.19505736981465135,0.3513601482457364,0,1.1571050308914386
27
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,3,0.3600739839089376,0.10709900175348612,0.35151668174502293,0.0,0.14827890556045895,0.14827890556045895,0.3577868691137631,0,1.9814651368049427
28
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,5,0.36231904915539526,0.11204735364530892,0.3555966636828387,0.0,0.15798764342453664,0.15798764342453664,0.3598691748988386,0,2.8146513680494265
29
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,10,0.3615257973929306,0.10125226501021815,0.35395169595888565,0.0,0.8314210061782877,0.8314210061782877,0.34940648099916116,22,
30
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,0,0.3284997502705771,0.08313795089297474,0.31837381406868526,0.0,0.12797881729920565,0.12797881729920565,0.3266954815790356,0,1.204766107678729
31
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1,0.3342115436248988,0.08857909016110346,0.32376944492764814,0.0,0.6690203000882613,0.6690203000882613,0.3250691235789747,1,1.4819064430714917
32
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,3,0.3435621946945506,0.09605927100886698,0.33411105509944494,0.0,0.1262135922330097,0.1262135922330097,0.3417009211692762,0,2.262135922330097
33
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,5,0.34429078135481284,0.09638489591361771,0.33818535378281456,0.0,0.07590467784642542,0.07590467784642542,0.34316381414750663,2,3.3883495145631066
34
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,10,0.3408431061510028,0.09735670499814125,0.3331892932821734,0.0,0.10414827890556046,0.10414827890556046,0.339316280986861,11,6.558693733451015
35
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,50,0.36184201368489227,0.104864029030581,0.3457669052738412,0.0,0.5136804942630185,0.5136804942630185,0.35413782502473057,2,29.659311562224183
36
+ gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.37055778050320864,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0,1.5939982347749337
37
+ gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.36752595751803463,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0,1.5666372462488967
38
+ gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3682965259271725,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0,1.2868490732568403
39
+ gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.34743907979125577,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0,1.203883495145631
40
+ gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3655455159774728,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0,1.1879964695498677
41
+ gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.3915950550621088,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0,1.289496910856134
42
+ gpt-4o,0,0.3797419877414444,0.12054600115274576,0.37050277223396,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0,1.528684907325684
43
+ gpt-4o,1,0.37588586538591867,0.12049862468096047,0.36605424160788713,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0,1.203883495145631
44
+ gpt-4o,3,0.3768512103553621,0.12408746322526747,0.3667929041403734,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0,2.05207413945278
45
+ gpt-4o,5,0.35772544915145654,0.12169683347842021,0.3484913675543446,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0,1.6840247131509267
46
+ gpt-4o,10,0.3746444651189953,0.12498238983123719,0.3667923043349673,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0,1.7899382171226832
47
+ gpt-4o,50,0.40413933252744955,0.13782450337569063,0.39078212423794856,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0,2.025595763459841
scripts/tune-lf-4gpu.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ BASEDIR=$(dirname "$0")
4
+ cd $BASEDIR/../llama-factory/
5
+ echo Current Directory:
6
+ pwd
7
+
8
+ export ORG_NAME=$1
9
+ export MODEL_NAME=$2
10
+ export CHAT_TEMPLATE=$3
11
+ export DATA_PATH=../datasets/mac/mac.tsv
12
+ export YAML=config/mac_template_4gpu.yaml
13
+
14
+ python ../llm_toolkit/setup_lf.py
15
+ llamafactory-cli train config/models/$MODEL_NAME.yaml
scripts/tune-mac-4gpu.sh CHANGED
@@ -14,6 +14,6 @@ grep MemTotal /proc/meminfo
14
  #pip install -r requirements.txt
15
  #cd ../LLaMA-Factory && pip install -e .[torch,metrics,vllm] && cd -
16
 
17
- ./scripts/tune-lf.sh Qwen Qwen2-72B-Instruct qwen
18
 
19
- ./scripts/tune-lf.sh shenzhi-wang Llama3.1-70B-Chinese-Chat llama3
 
14
  #pip install -r requirements.txt
15
  #cd ../LLaMA-Factory && pip install -e .[torch,metrics,vllm] && cd -
16
 
17
+ ./scripts/tune-lf-4gpu.sh Qwen Qwen2-72B-Instruct qwen
18
 
19
+ ./scripts/tune-lf-4gpu.sh shenzhi-wang Llama3.1-70B-Chinese-Chat llama3