dh-mc commited on
Commit
36cb2cb
1 Parent(s): 1323386

llama-factory finetuning on Google Colab

Browse files
.gitignore CHANGED
@@ -148,3 +148,4 @@ dmypy.json
148
  /models
149
  /llama.cpp
150
  /huggingface_tokenizers_cache
 
 
148
  /models
149
  /llama.cpp
150
  /huggingface_tokenizers_cache
151
+ /llama-factory/huggingface_tokenizers_cache
competition/03_EDA_en.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
config/qwen2_0.5b_lora_sft.yaml DELETED
@@ -1,39 +0,0 @@
1
- ### model
2
- model_name_or_path: Qwen/Qwen2-0.5B-Instruct
3
-
4
- ### method
5
- stage: sft
6
- do_train: true
7
- finetuning_type: lora
8
- lora_target: all
9
-
10
- ### dataset
11
- dataset: alpaca_mac
12
- template: chatml
13
- cutoff_len: 1024
14
- max_samples: 4528
15
- overwrite_cache: true
16
- preprocessing_num_workers: 16
17
-
18
- ### output
19
- output_dir: saves/qwen2-0.5b/lora/sft
20
- logging_steps: 10
21
- save_steps: 560
22
- plot_loss: true
23
- overwrite_output_dir: true
24
-
25
- ### train
26
- per_device_train_batch_size: 1
27
- gradient_accumulation_steps: 8
28
- learning_rate: 1.0e-4
29
- num_train_epochs: 10.0
30
- lr_scheduler_type: cosine
31
- warmup_ratio: 0.1
32
- bf16: true
33
- ddp_timeout: 180000000
34
-
35
- ### eval
36
- val_size: 0.01
37
- per_device_eval_batch_size: 1
38
- eval_strategy: steps
39
- eval_steps: 560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config/qwen2_1.5b_lora_sft.yaml DELETED
@@ -1,39 +0,0 @@
1
- ### model
2
- model_name_or_path: Qwen/Qwen2-1.5B-Instruct
3
-
4
- ### method
5
- stage: sft
6
- do_train: true
7
- finetuning_type: lora
8
- lora_target: all
9
-
10
- ### dataset
11
- dataset: alpaca_mac
12
- template: chatml
13
- cutoff_len: 1024
14
- max_samples: 4528
15
- overwrite_cache: true
16
- preprocessing_num_workers: 16
17
-
18
- ### output
19
- output_dir: saves/qwen2-1.5b/lora/sft
20
- logging_steps: 10
21
- save_steps: 560
22
- plot_loss: true
23
- overwrite_output_dir: true
24
-
25
- ### train
26
- per_device_train_batch_size: 1
27
- gradient_accumulation_steps: 8
28
- learning_rate: 1.0e-4
29
- num_train_epochs: 10.0
30
- lr_scheduler_type: cosine
31
- warmup_ratio: 0.1
32
- bf16: true
33
- ddp_timeout: 180000000
34
-
35
- ### eval
36
- val_size: 0.01
37
- per_device_eval_batch_size: 1
38
- eval_strategy: steps
39
- eval_steps: 560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config/qwen2_7b_lora_sft.yaml DELETED
@@ -1,39 +0,0 @@
1
- ### model
2
- model_name_or_path: Qwen/Qwen2-7B-Instruct
3
-
4
- ### method
5
- stage: sft
6
- do_train: true
7
- finetuning_type: lora
8
- lora_target: all
9
-
10
- ### dataset
11
- dataset: alpaca_mac
12
- template: chatml
13
- cutoff_len: 1024
14
- max_samples: 4528
15
- overwrite_cache: true
16
- preprocessing_num_workers: 16
17
-
18
- ### output
19
- output_dir: saves/qwen2-7b/lora/sft
20
- logging_steps: 10
21
- save_steps: 560
22
- plot_loss: true
23
- overwrite_output_dir: true
24
-
25
- ### train
26
- per_device_train_batch_size: 1
27
- gradient_accumulation_steps: 8
28
- learning_rate: 1.0e-4
29
- num_train_epochs: 10.0
30
- lr_scheduler_type: cosine
31
- warmup_ratio: 0.1
32
- bf16: true
33
- ddp_timeout: 180000000
34
-
35
- ### eval
36
- val_size: 0.01
37
- per_device_eval_batch_size: 1
38
- eval_strategy: steps
39
- eval_steps: 560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/alpaca_mac.json DELETED
The diff for this file is too large to render. See raw diff
 
data/dataset_info.json DELETED
@@ -1,568 +0,0 @@
1
- {
2
- "alpaca_mac": {
3
- "file_name": "alpaca_mac.json"
4
- },
5
- "identity": {
6
- "file_name": "identity.json"
7
- },
8
- "alpaca_en_demo": {
9
- "file_name": "alpaca_en_demo.json"
10
- },
11
- "alpaca_zh_demo": {
12
- "file_name": "alpaca_zh_demo.json"
13
- },
14
- "glaive_toolcall_en_demo": {
15
- "file_name": "glaive_toolcall_en_demo.json",
16
- "formatting": "sharegpt",
17
- "columns": {
18
- "messages": "conversations",
19
- "tools": "tools"
20
- }
21
- },
22
- "glaive_toolcall_zh_demo": {
23
- "file_name": "glaive_toolcall_zh_demo.json",
24
- "formatting": "sharegpt",
25
- "columns": {
26
- "messages": "conversations",
27
- "tools": "tools"
28
- }
29
- },
30
- "mllm_demo": {
31
- "file_name": "mllm_demo.json",
32
- "formatting": "sharegpt",
33
- "columns": {
34
- "messages": "messages",
35
- "images": "images"
36
- },
37
- "tags": {
38
- "role_tag": "role",
39
- "content_tag": "content",
40
- "user_tag": "user",
41
- "assistant_tag": "assistant"
42
- }
43
- },
44
- "alpaca_en": {
45
- "hf_hub_url": "llamafactory/alpaca_en",
46
- "ms_hub_url": "llamafactory/alpaca_en"
47
- },
48
- "alpaca_zh": {
49
- "hf_hub_url": "llamafactory/alpaca_zh",
50
- "ms_hub_url": "llamafactory/alpaca_zh"
51
- },
52
- "alpaca_gpt4_en": {
53
- "hf_hub_url": "llamafactory/alpaca_gpt4_en",
54
- "ms_hub_url": "llamafactory/alpaca_gpt4_en"
55
- },
56
- "alpaca_gpt4_zh": {
57
- "hf_hub_url": "llamafactory/alpaca_gpt4_zh",
58
- "ms_hub_url": "llamafactory/alpaca_gpt4_zh"
59
- },
60
- "glaive_toolcall_en": {
61
- "hf_hub_url": "llamafactory/glaive_toolcall_en",
62
- "formatting": "sharegpt",
63
- "columns": {
64
- "messages": "conversations",
65
- "tools": "tools"
66
- }
67
- },
68
- "glaive_toolcall_zh": {
69
- "hf_hub_url": "llamafactory/glaive_toolcall_zh",
70
- "formatting": "sharegpt",
71
- "columns": {
72
- "messages": "conversations",
73
- "tools": "tools"
74
- }
75
- },
76
- "lima": {
77
- "hf_hub_url": "llamafactory/lima",
78
- "formatting": "sharegpt"
79
- },
80
- "guanaco": {
81
- "hf_hub_url": "JosephusCheung/GuanacoDataset",
82
- "ms_hub_url": "AI-ModelScope/GuanacoDataset"
83
- },
84
- "belle_2m": {
85
- "hf_hub_url": "BelleGroup/train_2M_CN",
86
- "ms_hub_url": "AI-ModelScope/train_2M_CN"
87
- },
88
- "belle_1m": {
89
- "hf_hub_url": "BelleGroup/train_1M_CN",
90
- "ms_hub_url": "AI-ModelScope/train_1M_CN"
91
- },
92
- "belle_0.5m": {
93
- "hf_hub_url": "BelleGroup/train_0.5M_CN",
94
- "ms_hub_url": "AI-ModelScope/train_0.5M_CN"
95
- },
96
- "belle_dialog": {
97
- "hf_hub_url": "BelleGroup/generated_chat_0.4M",
98
- "ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
99
- },
100
- "belle_math": {
101
- "hf_hub_url": "BelleGroup/school_math_0.25M",
102
- "ms_hub_url": "AI-ModelScope/school_math_0.25M"
103
- },
104
- "belle_multiturn": {
105
- "script_url": "belle_multiturn",
106
- "formatting": "sharegpt"
107
- },
108
- "ultra_chat": {
109
- "script_url": "ultra_chat",
110
- "formatting": "sharegpt"
111
- },
112
- "open_platypus": {
113
- "hf_hub_url": "garage-bAInd/Open-Platypus",
114
- "ms_hub_url": "AI-ModelScope/Open-Platypus"
115
- },
116
- "codealpaca": {
117
- "hf_hub_url": "sahil2801/CodeAlpaca-20k",
118
- "ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
119
- },
120
- "alpaca_cot": {
121
- "hf_hub_url": "QingyiSi/Alpaca-CoT",
122
- "ms_hub_url": "AI-ModelScope/Alpaca-CoT"
123
- },
124
- "openorca": {
125
- "hf_hub_url": "Open-Orca/OpenOrca",
126
- "ms_hub_url": "AI-ModelScope/OpenOrca",
127
- "columns": {
128
- "prompt": "question",
129
- "response": "response",
130
- "system": "system_prompt"
131
- }
132
- },
133
- "slimorca": {
134
- "hf_hub_url": "Open-Orca/SlimOrca",
135
- "formatting": "sharegpt"
136
- },
137
- "mathinstruct": {
138
- "hf_hub_url": "TIGER-Lab/MathInstruct",
139
- "ms_hub_url": "AI-ModelScope/MathInstruct",
140
- "columns": {
141
- "prompt": "instruction",
142
- "response": "output"
143
- }
144
- },
145
- "firefly": {
146
- "hf_hub_url": "YeungNLP/firefly-train-1.1M",
147
- "columns": {
148
- "prompt": "input",
149
- "response": "target"
150
- }
151
- },
152
- "wikiqa": {
153
- "hf_hub_url": "wiki_qa",
154
- "columns": {
155
- "prompt": "question",
156
- "response": "answer"
157
- }
158
- },
159
- "webqa": {
160
- "hf_hub_url": "suolyer/webqa",
161
- "ms_hub_url": "AI-ModelScope/webqa",
162
- "columns": {
163
- "prompt": "input",
164
- "response": "output"
165
- }
166
- },
167
- "webnovel": {
168
- "hf_hub_url": "zxbsmk/webnovel_cn",
169
- "ms_hub_url": "AI-ModelScope/webnovel_cn"
170
- },
171
- "nectar_sft": {
172
- "hf_hub_url": "AstraMindAI/SFT-Nectar",
173
- "ms_hub_url": "AI-ModelScope/SFT-Nectar"
174
- },
175
- "deepctrl": {
176
- "ms_hub_url": "deepctrl/deepctrl-sft-data"
177
- },
178
- "adgen": {
179
- "hf_hub_url": "HasturOfficial/adgen",
180
- "ms_hub_url": "AI-ModelScope/adgen",
181
- "columns": {
182
- "prompt": "content",
183
- "response": "summary"
184
- }
185
- },
186
- "sharegpt_hyper": {
187
- "hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
188
- "formatting": "sharegpt"
189
- },
190
- "sharegpt4": {
191
- "hf_hub_url": "shibing624/sharegpt_gpt4",
192
- "ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
193
- "formatting": "sharegpt"
194
- },
195
- "ultrachat_200k": {
196
- "hf_hub_url": "HuggingFaceH4/ultrachat_200k",
197
- "ms_hub_url": "AI-ModelScope/ultrachat_200k",
198
- "formatting": "sharegpt",
199
- "columns": {
200
- "messages": "messages"
201
- },
202
- "tags": {
203
- "role_tag": "role",
204
- "content_tag": "content",
205
- "user_tag": "user",
206
- "assistant_tag": "assistant"
207
- }
208
- },
209
- "agent_instruct": {
210
- "hf_hub_url": "THUDM/AgentInstruct",
211
- "ms_hub_url": "ZhipuAI/AgentInstruct",
212
- "formatting": "sharegpt"
213
- },
214
- "lmsys_chat": {
215
- "hf_hub_url": "lmsys/lmsys-chat-1m",
216
- "ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
217
- "formatting": "sharegpt",
218
- "columns": {
219
- "messages": "conversation"
220
- },
221
- "tags": {
222
- "role_tag": "role",
223
- "content_tag": "content",
224
- "user_tag": "human",
225
- "assistant_tag": "assistant"
226
- }
227
- },
228
- "evol_instruct": {
229
- "hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
230
- "ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
231
- "formatting": "sharegpt"
232
- },
233
- "glaive_toolcall_100k": {
234
- "hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
235
- "formatting": "sharegpt",
236
- "columns": {
237
- "messages": "conversations",
238
- "tools": "tools"
239
- }
240
- },
241
- "cosmopedia": {
242
- "hf_hub_url": "HuggingFaceTB/cosmopedia",
243
- "columns": {
244
- "prompt": "prompt",
245
- "response": "text"
246
- }
247
- },
248
- "stem_zh": {
249
- "hf_hub_url": "hfl/stem_zh_instruction"
250
- },
251
- "ruozhiba_gpt4": {
252
- "hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
253
- },
254
- "neo_sft": {
255
- "hf_hub_url": "m-a-p/neo_sft_phase2",
256
- "formatting": "sharegpt"
257
- },
258
- "magpie_pro_300k": {
259
- "hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
260
- "formatting": "sharegpt"
261
- },
262
- "web_instruct": {
263
- "hf_hub_url": "TIGER-Lab/WebInstructSub",
264
- "columns": {
265
- "prompt": "question",
266
- "response": "answer"
267
- }
268
- },
269
- "llava_1k_en": {
270
- "hf_hub_url": "BUAADreamer/llava-en-zh-2k",
271
- "subset": "en",
272
- "formatting": "sharegpt",
273
- "columns": {
274
- "messages": "messages",
275
- "images": "images"
276
- },
277
- "tags": {
278
- "role_tag": "role",
279
- "content_tag": "content",
280
- "user_tag": "user",
281
- "assistant_tag": "assistant"
282
- }
283
- },
284
- "llava_1k_zh": {
285
- "hf_hub_url": "BUAADreamer/llava-en-zh-2k",
286
- "subset": "zh",
287
- "formatting": "sharegpt",
288
- "columns": {
289
- "messages": "messages",
290
- "images": "images"
291
- },
292
- "tags": {
293
- "role_tag": "role",
294
- "content_tag": "content",
295
- "user_tag": "user",
296
- "assistant_tag": "assistant"
297
- }
298
- },
299
- "llava_150k_en": {
300
- "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
301
- "subset": "en",
302
- "formatting": "sharegpt",
303
- "columns": {
304
- "messages": "messages",
305
- "images": "images"
306
- },
307
- "tags": {
308
- "role_tag": "role",
309
- "content_tag": "content",
310
- "user_tag": "user",
311
- "assistant_tag": "assistant"
312
- }
313
- },
314
- "llava_150k_zh": {
315
- "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
316
- "subset": "zh",
317
- "formatting": "sharegpt",
318
- "columns": {
319
- "messages": "messages",
320
- "images": "images"
321
- },
322
- "tags": {
323
- "role_tag": "role",
324
- "content_tag": "content",
325
- "user_tag": "user",
326
- "assistant_tag": "assistant"
327
- }
328
- },
329
- "mllm_pt_demo": {
330
- "hf_hub_url": "BUAADreamer/mllm_pt_demo",
331
- "formatting": "sharegpt",
332
- "columns": {
333
- "messages": "messages",
334
- "images": "images"
335
- },
336
- "tags": {
337
- "role_tag": "role",
338
- "content_tag": "content",
339
- "user_tag": "user",
340
- "assistant_tag": "assistant"
341
- }
342
- },
343
- "oasst_de": {
344
- "hf_hub_url": "mayflowergmbh/oasst_de"
345
- },
346
- "dolly_15k_de": {
347
- "hf_hub_url": "mayflowergmbh/dolly-15k_de"
348
- },
349
- "alpaca-gpt4_de": {
350
- "hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
351
- },
352
- "openschnabeltier_de": {
353
- "hf_hub_url": "mayflowergmbh/openschnabeltier_de"
354
- },
355
- "evol_instruct_de": {
356
- "hf_hub_url": "mayflowergmbh/evol-instruct_de"
357
- },
358
- "dolphin_de": {
359
- "hf_hub_url": "mayflowergmbh/dolphin_de"
360
- },
361
- "booksum_de": {
362
- "hf_hub_url": "mayflowergmbh/booksum_de"
363
- },
364
- "airoboros_de": {
365
- "hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
366
- },
367
- "ultrachat_de": {
368
- "hf_hub_url": "mayflowergmbh/ultra-chat_de"
369
- },
370
- "dpo_en_demo": {
371
- "file_name": "dpo_en_demo.json",
372
- "ranking": true,
373
- "formatting": "sharegpt",
374
- "columns": {
375
- "messages": "conversations",
376
- "chosen": "chosen",
377
- "rejected": "rejected"
378
- }
379
- },
380
- "dpo_zh_demo": {
381
- "file_name": "dpo_zh_demo.json",
382
- "ranking": true,
383
- "formatting": "sharegpt",
384
- "columns": {
385
- "messages": "conversations",
386
- "chosen": "chosen",
387
- "rejected": "rejected"
388
- }
389
- },
390
- "dpo_mix_en": {
391
- "hf_hub_url": "hiyouga/DPO-En-Zh-20k",
392
- "subset": "en",
393
- "ranking": true,
394
- "formatting": "sharegpt",
395
- "columns": {
396
- "messages": "conversations",
397
- "chosen": "chosen",
398
- "rejected": "rejected"
399
- }
400
- },
401
- "dpo_mix_zh": {
402
- "hf_hub_url": "hiyouga/DPO-En-Zh-20k",
403
- "subset": "zh",
404
- "ranking": true,
405
- "formatting": "sharegpt",
406
- "columns": {
407
- "messages": "conversations",
408
- "chosen": "chosen",
409
- "rejected": "rejected"
410
- }
411
- },
412
- "ultrafeedback": {
413
- "hf_hub_url": "llamafactory/ultrafeedback_binarized",
414
- "ms_hub_url": "llamafactory/ultrafeedback_binarized",
415
- "ranking": true,
416
- "columns": {
417
- "prompt": "instruction",
418
- "chosen": "chosen",
419
- "rejected": "rejected"
420
- }
421
- },
422
- "orca_pairs": {
423
- "hf_hub_url": "Intel/orca_dpo_pairs",
424
- "ranking": true,
425
- "columns": {
426
- "prompt": "question",
427
- "chosen": "chosen",
428
- "rejected": "rejected",
429
- "system": "system"
430
- }
431
- },
432
- "hh_rlhf_en": {
433
- "script_url": "hh_rlhf_en",
434
- "ranking": true,
435
- "columns": {
436
- "prompt": "instruction",
437
- "chosen": "chosen",
438
- "rejected": "rejected",
439
- "history": "history"
440
- }
441
- },
442
- "nectar_rm": {
443
- "hf_hub_url": "AstraMindAI/RLAIF-Nectar",
444
- "ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
445
- "ranking": true
446
- },
447
- "orca_dpo_de": {
448
- "hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
449
- "ranking": true
450
- },
451
- "kto_en_demo": {
452
- "file_name": "kto_en_demo.json",
453
- "formatting": "sharegpt",
454
- "columns": {
455
- "messages": "messages",
456
- "kto_tag": "label"
457
- },
458
- "tags": {
459
- "role_tag": "role",
460
- "content_tag": "content",
461
- "user_tag": "user",
462
- "assistant_tag": "assistant"
463
- }
464
- },
465
- "kto_mix_en": {
466
- "hf_hub_url": "argilla/kto-mix-15k",
467
- "formatting": "sharegpt",
468
- "columns": {
469
- "messages": "completion",
470
- "kto_tag": "label"
471
- },
472
- "tags": {
473
- "role_tag": "role",
474
- "content_tag": "content",
475
- "user_tag": "user",
476
- "assistant_tag": "assistant"
477
- }
478
- },
479
- "ultrafeedback_kto": {
480
- "hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto",
481
- "ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto",
482
- "columns": {
483
- "prompt": "prompt",
484
- "response": "completion",
485
- "kto_tag": "label"
486
- }
487
- },
488
- "wiki_demo": {
489
- "file_name": "wiki_demo.txt",
490
- "columns": {
491
- "prompt": "text"
492
- }
493
- },
494
- "c4_demo": {
495
- "file_name": "c4_demo.json",
496
- "columns": {
497
- "prompt": "text"
498
- }
499
- },
500
- "refinedweb": {
501
- "hf_hub_url": "tiiuae/falcon-refinedweb",
502
- "columns": {
503
- "prompt": "content"
504
- }
505
- },
506
- "redpajama_v2": {
507
- "hf_hub_url": "togethercomputer/RedPajama-Data-V2",
508
- "columns": {
509
- "prompt": "raw_content"
510
- },
511
- "subset": "default"
512
- },
513
- "wikipedia_en": {
514
- "hf_hub_url": "olm/olm-wikipedia-20221220",
515
- "ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
516
- "columns": {
517
- "prompt": "text"
518
- }
519
- },
520
- "wikipedia_zh": {
521
- "hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
522
- "ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
523
- "columns": {
524
- "prompt": "completion"
525
- }
526
- },
527
- "pile": {
528
- "hf_hub_url": "monology/pile-uncopyrighted",
529
- "ms_hub_url": "AI-ModelScope/pile",
530
- "columns": {
531
- "prompt": "text"
532
- }
533
- },
534
- "skypile": {
535
- "hf_hub_url": "Skywork/SkyPile-150B",
536
- "ms_hub_url": "AI-ModelScope/SkyPile-150B",
537
- "columns": {
538
- "prompt": "text"
539
- }
540
- },
541
- "fineweb": {
542
- "hf_hub_url": "HuggingFaceFW/fineweb",
543
- "columns": {
544
- "prompt": "text"
545
- }
546
- },
547
- "fineweb_edu": {
548
- "hf_hub_url": "HuggingFaceFW/fineweb-edu",
549
- "columns": {
550
- "prompt": "text"
551
- }
552
- },
553
- "the_stack": {
554
- "hf_hub_url": "bigcode/the-stack",
555
- "ms_hub_url": "AI-ModelScope/the-stack",
556
- "columns": {
557
- "prompt": "content"
558
- }
559
- },
560
- "starcoder_python": {
561
- "hf_hub_url": "bigcode/starcoderdata",
562
- "ms_hub_url": "AI-ModelScope/starcoderdata",
563
- "columns": {
564
- "prompt": "content"
565
- },
566
- "folder": "python"
567
- }
568
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
datasets/mac/mac-test.tsv CHANGED
The diff for this file is too large to render. See raw diff
 
llama-factory/config/llama3_8b_lora_sft.yaml CHANGED
@@ -8,7 +8,7 @@ finetuning_type: lora
8
  lora_target: all
9
  quantization_bit: 4 # use 4-bit QLoRA
10
  loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
- # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
 
13
  ### dataset
14
  dataset: alpaca_mac
@@ -20,7 +20,7 @@ preprocessing_num_workers: 16
20
 
21
  ### output
22
  # output_dir: saves/llama3-8b/lora/sft
23
- output_dir: /Workspace/Users/donghao.huang@mastercard.com/lf-saves/llama3-8b/lora/sft/
24
  logging_steps: 10
25
  save_steps: 100
26
  plot_loss: true
 
8
  lora_target: all
9
  quantization_bit: 4 # use 4-bit QLoRA
10
  loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
 
13
  ### dataset
14
  dataset: alpaca_mac
 
20
 
21
  ### output
22
  # output_dir: saves/llama3-8b/lora/sft
23
+ output_dir: /content/llama3-8b/
24
  logging_steps: 10
25
  save_steps: 100
26
  plot_loss: true
llama-factory/config/qwen2_7b_lora_sft.yaml CHANGED
@@ -6,34 +6,41 @@ stage: sft
6
  do_train: true
7
  finetuning_type: lora
8
  lora_target: all
 
 
 
9
 
10
  ### dataset
11
  dataset: alpaca_mac
12
  template: chatml
13
  cutoff_len: 1024
14
- max_samples: 4528
15
  overwrite_cache: true
16
  preprocessing_num_workers: 16
17
 
18
  ### output
19
- output_dir: saves/qwen2-7b/lora/sft
20
  logging_steps: 10
21
- save_steps: 560
22
  plot_loss: true
23
  overwrite_output_dir: true
 
24
 
25
  ### train
26
  per_device_train_batch_size: 1
27
  gradient_accumulation_steps: 8
28
  learning_rate: 1.0e-4
29
- num_train_epochs: 10.0
30
  lr_scheduler_type: cosine
31
  warmup_ratio: 0.1
32
  bf16: true
33
  ddp_timeout: 180000000
34
 
35
  ### eval
36
- val_size: 0.01
37
  per_device_eval_batch_size: 1
38
  eval_strategy: steps
39
- eval_steps: 560
 
 
 
 
6
  do_train: true
7
  finetuning_type: lora
8
  lora_target: all
9
+ quantization_bit: 4 # use 4-bit QLoRA
10
+ loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
 
13
  ### dataset
14
  dataset: alpaca_mac
15
  template: chatml
16
  cutoff_len: 1024
17
+ max_samples: 50
18
  overwrite_cache: true
19
  preprocessing_num_workers: 16
20
 
21
  ### output
22
+ output_dir: /content/qwen2-7b/
23
  logging_steps: 10
24
+ save_steps: 10
25
  plot_loss: true
26
  overwrite_output_dir: true
27
+ # resume_from_checkpoint: true
28
 
29
  ### train
30
  per_device_train_batch_size: 1
31
  gradient_accumulation_steps: 8
32
  learning_rate: 1.0e-4
33
+ num_train_epochs: 6.0
34
  lr_scheduler_type: cosine
35
  warmup_ratio: 0.1
36
  bf16: true
37
  ddp_timeout: 180000000
38
 
39
  ### eval
40
+ val_size: 0.02
41
  per_device_eval_batch_size: 1
42
  eval_strategy: steps
43
+ eval_steps: 10
44
+
45
+ report_to: wandb
46
+ run_name: qwen2_7b_mac_colab # optional
llm_toolkit/eval_mac.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import sys
3
  import torch
4
  from dotenv import find_dotenv, load_dotenv
 
 
5
 
6
  found_dotenv = find_dotenv(".env")
7
 
@@ -14,7 +16,6 @@ path = os.path.dirname(found_dotenv)
14
  print(f"Adding {path} to sys.path")
15
  sys.path.append(path)
16
 
17
- from llm_toolkit.translation_engine import *
18
  from llm_toolkit.translation_utils import *
19
 
20
  model_name = os.getenv("MODEL_NAME")
@@ -25,6 +26,48 @@ results_path = os.getenv("RESULTS_PATH")
25
 
26
  print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  gpu_stats = torch.cuda.get_device_properties(0)
29
  start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
30
  max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
 
2
  import sys
3
  import torch
4
  from dotenv import find_dotenv, load_dotenv
5
+ from llamafactory.chat import ChatModel
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
7
 
8
  found_dotenv = find_dotenv(".env")
9
 
 
16
  print(f"Adding {path} to sys.path")
17
  sys.path.append(path)
18
 
 
19
  from llm_toolkit.translation_utils import *
20
 
21
  model_name = os.getenv("MODEL_NAME")
 
26
 
27
  print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
28
 
29
+
30
+ def load_model(
31
+ model_name,
32
+ max_seq_length=2048,
33
+ dtype=None,
34
+ load_in_4bit=False,
35
+ adapter_name_or_path=None,
36
+ ):
37
+ print(f"loading model: {model_name}")
38
+
39
+ if adapter_name_or_path:
40
+ template = "llama3" if "llama-3" in model_name.lower() else "chatml"
41
+
42
+ args = dict(
43
+ model_name_or_path=model_name,
44
+ adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
45
+ template=template, # same to the one in training
46
+ finetuning_type="lora", # same to the one in training
47
+ quantization_bit=4, # load 4-bit quantized model
48
+ )
49
+ chat_model = ChatModel(args)
50
+ return chat_model.engine.model, chat_model.engine.tokenizer
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
53
+ bnb_config = BitsAndBytesConfig(
54
+ load_in_4bit=True,
55
+ bnb_4bit_quant_type="nf4",
56
+ bnb_4bit_use_double_quant=False,
57
+ bnb_4bit_compute_dtype=torch.bfloat16,
58
+ )
59
+
60
+ model = AutoModelForCausalLM.from_pretrained(
61
+ model_name,
62
+ quantization_config=bnb_config,
63
+ # attn_implementation="flash_attention_2",
64
+ trust_remote_code=True,
65
+ device_map="auto",
66
+ )
67
+
68
+ return model, tokenizer
69
+
70
+
71
  gpu_stats = torch.cuda.get_device_properties(0)
72
  start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
73
  max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
llm_toolkit/translation_engine.py CHANGED
@@ -1,11 +1,9 @@
1
  import os
2
  import pandas as pd
3
- from datasets import load_dataset
4
  import torch
5
  from unsloth import FastLanguageModel, is_bfloat16_supported
6
  from trl import SFTTrainer
7
  from transformers import TrainingArguments, TextStreamer
8
- from tqdm import tqdm
9
  from llm_toolkit.translation_utils import *
10
  from llamafactory.chat import ChatModel
11
 
@@ -36,7 +34,7 @@ def load_model(
36
  ):
37
  print(f"loading model: {model_name}")
38
 
39
- if adapter_name_or_path is not None:
40
  args = dict(
41
  model_name_or_path=model_name,
42
  adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
@@ -130,156 +128,3 @@ def load_trainer(
130
  )
131
 
132
  return trainer
133
-
134
-
135
- def load_translation_dataset(data_path, tokenizer=None):
136
- train_data_file = data_path.replace(".tsv", "-train.tsv")
137
- test_data_file = data_path.replace(".tsv", "-test.tsv")
138
-
139
- if not os.path.exists(train_data_file):
140
- print("generating train/test data files")
141
- dataset = load_dataset(
142
- "csv", data_files=data_path, delimiter="\t", split="train"
143
- )
144
- print(len(dataset))
145
- dataset = dataset.filter(lambda x: x["chinese"] and x["english"])
146
-
147
- datasets = dataset.train_test_split(test_size=0.2)
148
- print(len(dataset))
149
-
150
- # Convert to pandas DataFrame
151
- train_df = pd.DataFrame(datasets["train"])
152
- test_df = pd.DataFrame(datasets["test"])
153
-
154
- # Save to TSV
155
- train_df.to_csv(train_data_file, sep="\t", index=False)
156
- test_df.to_csv(test_data_file, sep="\t", index=False)
157
-
158
- print("loading train/test data files")
159
- datasets = load_dataset(
160
- "csv",
161
- data_files={"train": train_data_file, "test": test_data_file},
162
- delimiter="\t",
163
- )
164
-
165
- if tokenizer:
166
- translation_prompt = "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n{}"
167
-
168
- def formatting_prompts_func(examples):
169
- inputs = examples["chinese"]
170
- outputs = examples["english"]
171
-
172
- messages = [
173
- {
174
- "role": "system",
175
- "content": "You are an expert in translating Chinese to English.",
176
- },
177
- None,
178
- ]
179
-
180
- model_name = os.getenv("MODEL_NAME")
181
-
182
- if "mistral" in model_name.lower():
183
- messages = messages[1:]
184
-
185
- texts = []
186
- prompts = []
187
- for input, output in zip(inputs, outputs):
188
- prompt = translation_prompt.format(input)
189
- messages[-1] = {"role": "user", "content": prompt}
190
-
191
- prompt = tokenizer.apply_chat_template(
192
- messages, tokenize=False, add_generation_prompt=True
193
- )
194
- prompts.append(prompt)
195
- texts.append(prompt + output + tokenizer.eos_token)
196
- return {"text": texts, "prompt": prompts}
197
-
198
- datasets = datasets.map(
199
- formatting_prompts_func,
200
- batched=True,
201
- )
202
-
203
- print(datasets)
204
- return datasets
205
-
206
-
207
- def eval_model(model, tokenizer, eval_dataset):
208
- total = len(eval_dataset)
209
- predictions = []
210
- for i in tqdm(range(total)):
211
- inputs = tokenizer(
212
- eval_dataset["prompt"][i : i + 1],
213
- return_tensors="pt",
214
- ).to("cuda")
215
-
216
- outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
217
- decoded_output = tokenizer.batch_decode(outputs)
218
- debug = i == 0
219
- decoded_output = [
220
- extract_answer(output, debug=debug) for output in decoded_output
221
- ]
222
- predictions.extend(decoded_output)
223
-
224
- return predictions
225
-
226
-
227
- def save_model(
228
- model,
229
- tokenizer,
230
- include_gguf=True,
231
- include_merged=True,
232
- publish=True,
233
- ):
234
- try:
235
- token = os.getenv("HF_TOKEN") or None
236
- model_name = os.getenv("MODEL_NAME")
237
-
238
- save_method = "lora"
239
- quantization_method = "q5_k_m"
240
-
241
- model_names = get_model_names(
242
- model_name, save_method=save_method, quantization_method=quantization_method
243
- )
244
-
245
- model.save_pretrained(model_names["local"])
246
- tokenizer.save_pretrained(model_names["local"])
247
-
248
- if publish:
249
- model.push_to_hub(
250
- model_names["hub"],
251
- token=token,
252
- )
253
- tokenizer.push_to_hub(
254
- model_names["hub"],
255
- token=token,
256
- )
257
-
258
- if include_merged:
259
- model.save_pretrained_merged(
260
- model_names["local"] + "-merged", tokenizer, save_method=save_method
261
- )
262
- if publish:
263
- model.push_to_hub_merged(
264
- model_names["hub"] + "-merged",
265
- tokenizer,
266
- save_method="lora",
267
- token="",
268
- )
269
-
270
- if include_gguf:
271
- model.save_pretrained_gguf(
272
- model_names["local-gguf"],
273
- tokenizer,
274
- quantization_method=quantization_method,
275
- )
276
-
277
- if publish:
278
- model.push_to_hub_gguf(
279
- model_names["hub-gguf"],
280
- tokenizer,
281
- quantization_method=quantization_method,
282
- token=token,
283
- )
284
- except Exception as e:
285
- print(e)
 
1
  import os
2
  import pandas as pd
 
3
  import torch
4
  from unsloth import FastLanguageModel, is_bfloat16_supported
5
  from trl import SFTTrainer
6
  from transformers import TrainingArguments, TextStreamer
 
7
  from llm_toolkit.translation_utils import *
8
  from llamafactory.chat import ChatModel
9
 
 
34
  ):
35
  print(f"loading model: {model_name}")
36
 
37
+ if adapter_name_or_path:
38
  args = dict(
39
  model_name_or_path=model_name,
40
  adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
 
128
  )
129
 
130
  return trainer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llm_toolkit/translation_utils.py CHANGED
@@ -4,10 +4,12 @@ import pandas as pd
4
  import evaluate
5
  import seaborn as sns
6
  import matplotlib.pyplot as plt
7
-
8
  from langchain_openai import ChatOpenAI
9
  from langchain_core.prompts import ChatPromptTemplate
 
10
 
 
11
 
12
  bleu = evaluate.load("bleu")
13
  rouge = evaluate.load("rouge")
@@ -85,6 +87,159 @@ def save_results(model_name, results_path, dataset, predictions, debug=False):
85
  df.to_csv(results_path, index=False)
86
 
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def get_metrics(df):
89
  metrics_df = pd.DataFrame(df.columns.T)[2:]
90
  metrics_df.rename(columns={0: "model"}, inplace=True)
 
4
  import evaluate
5
  import seaborn as sns
6
  import matplotlib.pyplot as plt
7
+ from datasets import load_dataset
8
  from langchain_openai import ChatOpenAI
9
  from langchain_core.prompts import ChatPromptTemplate
10
+ from tqdm import tqdm
11
 
12
+ print(f"loading {__file__}")
13
 
14
  bleu = evaluate.load("bleu")
15
  rouge = evaluate.load("rouge")
 
87
  df.to_csv(results_path, index=False)
88
 
89
 
90
+ def load_translation_dataset(data_path, tokenizer=None):
91
+ train_data_file = data_path.replace(".tsv", "-train.tsv")
92
+ test_data_file = data_path.replace(".tsv", "-test.tsv")
93
+
94
+ if not os.path.exists(train_data_file):
95
+ print("generating train/test data files")
96
+ dataset = load_dataset(
97
+ "csv", data_files=data_path, delimiter="\t", split="train"
98
+ )
99
+ print(len(dataset))
100
+ dataset = dataset.filter(lambda x: x["chinese"] and x["english"])
101
+
102
+ datasets = dataset.train_test_split(test_size=0.2)
103
+ print(len(dataset))
104
+
105
+ # Convert to pandas DataFrame
106
+ train_df = pd.DataFrame(datasets["train"])
107
+ test_df = pd.DataFrame(datasets["test"])
108
+
109
+ # Save to TSV
110
+ train_df.to_csv(train_data_file, sep="\t", index=False)
111
+ test_df.to_csv(test_data_file, sep="\t", index=False)
112
+
113
+ print("loading train/test data files")
114
+ datasets = load_dataset(
115
+ "csv",
116
+ data_files={"train": train_data_file, "test": test_data_file},
117
+ delimiter="\t",
118
+ )
119
+
120
+ if tokenizer:
121
+ translation_prompt = "Please translate the following Chinese text into English and provide only the translated content, nothing else.\n{}"
122
+
123
+ def formatting_prompts_func(examples):
124
+ inputs = examples["chinese"]
125
+ outputs = examples["english"]
126
+
127
+ messages = [
128
+ {
129
+ "role": "system",
130
+ "content": "You are an expert in translating Chinese to English.",
131
+ },
132
+ None,
133
+ ]
134
+
135
+ model_name = os.getenv("MODEL_NAME")
136
+
137
+ if "mistral" in model_name.lower():
138
+ messages = messages[1:]
139
+
140
+ texts = []
141
+ prompts = []
142
+ for input, output in zip(inputs, outputs):
143
+ prompt = translation_prompt.format(input)
144
+ messages[-1] = {"role": "user", "content": prompt}
145
+
146
+ prompt = tokenizer.apply_chat_template(
147
+ messages, tokenize=False, add_generation_prompt=True
148
+ )
149
+ prompts.append(prompt)
150
+ texts.append(prompt + output + tokenizer.eos_token)
151
+ return {"text": texts, "prompt": prompts}
152
+
153
+ datasets = datasets.map(
154
+ formatting_prompts_func,
155
+ batched=True,
156
+ )
157
+
158
+ print(datasets)
159
+ return datasets
160
+
161
+
162
+ def eval_model(model, tokenizer, eval_dataset):
163
+ total = len(eval_dataset)
164
+ predictions = []
165
+ for i in tqdm(range(total)):
166
+ inputs = tokenizer(
167
+ eval_dataset["prompt"][i : i + 1],
168
+ return_tensors="pt",
169
+ ).to("cuda")
170
+
171
+ outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
172
+ decoded_output = tokenizer.batch_decode(outputs)
173
+ debug = i == 0
174
+ decoded_output = [
175
+ extract_answer(output, debug=debug) for output in decoded_output
176
+ ]
177
+ predictions.extend(decoded_output)
178
+
179
+ return predictions
180
+
181
+
182
+ def save_model(
183
+ model,
184
+ tokenizer,
185
+ include_gguf=True,
186
+ include_merged=True,
187
+ publish=True,
188
+ ):
189
+ try:
190
+ token = os.getenv("HF_TOKEN") or None
191
+ model_name = os.getenv("MODEL_NAME")
192
+
193
+ save_method = "lora"
194
+ quantization_method = "q5_k_m"
195
+
196
+ model_names = get_model_names(
197
+ model_name, save_method=save_method, quantization_method=quantization_method
198
+ )
199
+
200
+ model.save_pretrained(model_names["local"])
201
+ tokenizer.save_pretrained(model_names["local"])
202
+
203
+ if publish:
204
+ model.push_to_hub(
205
+ model_names["hub"],
206
+ token=token,
207
+ )
208
+ tokenizer.push_to_hub(
209
+ model_names["hub"],
210
+ token=token,
211
+ )
212
+
213
+ if include_merged:
214
+ model.save_pretrained_merged(
215
+ model_names["local"] + "-merged", tokenizer, save_method=save_method
216
+ )
217
+ if publish:
218
+ model.push_to_hub_merged(
219
+ model_names["hub"] + "-merged",
220
+ tokenizer,
221
+ save_method="lora",
222
+ token="",
223
+ )
224
+
225
+ if include_gguf:
226
+ model.save_pretrained_gguf(
227
+ model_names["local-gguf"],
228
+ tokenizer,
229
+ quantization_method=quantization_method,
230
+ )
231
+
232
+ if publish:
233
+ model.push_to_hub_gguf(
234
+ model_names["hub-gguf"],
235
+ tokenizer,
236
+ quantization_method=quantization_method,
237
+ token=token,
238
+ )
239
+ except Exception as e:
240
+ print(e)
241
+
242
+
243
  def get_metrics(df):
244
  metrics_df = pd.DataFrame(df.columns.T)[2:]
245
  metrics_df.rename(columns={0: "model"}, inplace=True)
notebooks/00_fine-tune-with-colab.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- huggingface_hub==0.23.0
2
  nltk==3.8.1
3
  python-dotenv==1.0.1
4
  black==24.4.0
@@ -10,5 +10,7 @@ scikit-learn==1.5.0
10
  jupyter
11
  ipywidgets
12
  packaging
 
 
13
  # triton
14
  # xformers
 
1
+ huggingface_hub==0.23.2
2
  nltk==3.8.1
3
  python-dotenv==1.0.1
4
  black==24.4.0
 
10
  jupyter
11
  ipywidgets
12
  packaging
13
+ langchain_openai==0.1.13
14
+ wandb==0.17.4
15
  # triton
16
  # xformers
results/mac-results_lf.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5acc087808de5df6839cbf7b170094c6e63445aab4bea15e4be9564b905eb51
3
- size 3236072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20e21280e557b2e3292a686267318a757c1ed8f370da290df4f1825c98c51152
3
+ size 11580
scripts/tune-lf.sh CHANGED
@@ -5,4 +5,5 @@ cd $BASEDIR/../llama-factory
5
  echo Current Directory:
6
  pwd
7
 
 
8
  llamafactory-cli train $1
 
5
  echo Current Directory:
6
  pwd
7
 
8
+ YAML=$1 python -c 'import os, json, sys, yaml; filename=os.getenv("YAML"); y=yaml.safe_load(open(filename)) ; print(f"{filename}:\n", json.dumps(y, indent=2))'
9
  llamafactory-cli train $1