inflaton commited on
Commit
31cb217
·
1 Parent(s): 2b7032e

prep for tuning with P2

Browse files
.gitattributes CHANGED
@@ -49,3 +49,6 @@ results/mgtv-results_nv4080_r2.csv filter=lfs diff=lfs merge=lfs -text
49
  results/mgtv-results_t4.csv filter=lfs diff=lfs merge=lfs -text
50
  results/mgtv-results_t4_r2.csv filter=lfs diff=lfs merge=lfs -text
51
  results/mgtv-results_t4_r3.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
49
  results/mgtv-results_t4.csv filter=lfs diff=lfs merge=lfs -text
50
  results/mgtv-results_t4_r2.csv filter=lfs diff=lfs merge=lfs -text
51
  results/mgtv-results_t4_r3.csv filter=lfs diff=lfs merge=lfs -text
52
+ llama-factory/data/alpaca_mac.json filter=lfs diff=lfs merge=lfs -text
53
+ llama-factory/data/alpaca_mgtv_p2.json filter=lfs diff=lfs merge=lfs -text
54
+ llama-factory/data/dataset_info.json filter=lfs diff=lfs merge=lfs -text
competition/08c_InterLM_finetuning_NV4080_p2.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
llama-factory/config/internlm2_5_7b_lora_sft_4bit_p2.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: internlm/internlm2_5-7b-chat-1m
3
+
4
+ ### method
5
+ stage: sft
6
+ do_train: true
7
+ finetuning_type: lora
8
+ lora_target: all
9
+ quantization_bit: 4 # use 4-bit QLoRA
10
+ loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ # use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
12
+ upcast_layernorm: true
13
+
14
+ ### dataset
15
+ dataset: alpaca_mgtv_p2
16
+ template: chatml
17
+ cutoff_len: 1024
18
+ max_samples: 5000
19
+ overwrite_cache: true
20
+ preprocessing_num_workers: 16
21
+
22
+ ### output
23
+ output_dir: saves/internlm2_5_7b/lora/sft_p2
24
+ logging_steps: 100
25
+ save_steps: 562
26
+ plot_loss: true
27
+ overwrite_output_dir: true
28
+ # resume_from_checkpoint: true
29
+
30
+ ### train
31
+ per_device_train_batch_size: 1
32
+ gradient_accumulation_steps: 8
33
+ learning_rate: 1.0e-4
34
+ num_train_epochs: 6.0
35
+ lr_scheduler_type: cosine
36
+ warmup_ratio: 0.1
37
+ bf16: true
38
+ ddp_timeout: 180000000
39
+
40
+ ### eval
41
+ val_size: 0.1
42
+ per_device_eval_batch_size: 1
43
+ eval_strategy: steps
44
+ eval_steps: 562
45
+
46
+ report_to: none
47
+ run_name: internlm2_5_7b # optional
llama-factory/data/alpaca_mac.json CHANGED
The diff for this file is too large to render. See raw diff
 
llama-factory/data/alpaca_mgtv_p2.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12360f3fa7e5b62de84fd7b7bb8153abd35932a754e310414663c229499b57e6
3
+ size 94426969
llama-factory/data/dataset_info.json CHANGED
@@ -1,574 +1,3 @@
1
- {
2
- "mgtv_train": {
3
- "file_name": "mgtv_train.json"
4
- },
5
- "alpaca_mgtv_p1": {
6
- "file_name": "alpaca_mgtv_p1.json"
7
- },
8
- "alpaca_mac": {
9
- "file_name": "alpaca_mac.json"
10
- },
11
- "identity": {
12
- "file_name": "identity.json"
13
- },
14
- "alpaca_en_demo": {
15
- "file_name": "alpaca_en_demo.json"
16
- },
17
- "alpaca_zh_demo": {
18
- "file_name": "alpaca_zh_demo.json"
19
- },
20
- "glaive_toolcall_en_demo": {
21
- "file_name": "glaive_toolcall_en_demo.json",
22
- "formatting": "sharegpt",
23
- "columns": {
24
- "messages": "conversations",
25
- "tools": "tools"
26
- }
27
- },
28
- "glaive_toolcall_zh_demo": {
29
- "file_name": "glaive_toolcall_zh_demo.json",
30
- "formatting": "sharegpt",
31
- "columns": {
32
- "messages": "conversations",
33
- "tools": "tools"
34
- }
35
- },
36
- "mllm_demo": {
37
- "file_name": "mllm_demo.json",
38
- "formatting": "sharegpt",
39
- "columns": {
40
- "messages": "messages",
41
- "images": "images"
42
- },
43
- "tags": {
44
- "role_tag": "role",
45
- "content_tag": "content",
46
- "user_tag": "user",
47
- "assistant_tag": "assistant"
48
- }
49
- },
50
- "alpaca_en": {
51
- "hf_hub_url": "llamafactory/alpaca_en",
52
- "ms_hub_url": "llamafactory/alpaca_en"
53
- },
54
- "alpaca_zh": {
55
- "hf_hub_url": "llamafactory/alpaca_zh",
56
- "ms_hub_url": "llamafactory/alpaca_zh"
57
- },
58
- "alpaca_gpt4_en": {
59
- "hf_hub_url": "llamafactory/alpaca_gpt4_en",
60
- "ms_hub_url": "llamafactory/alpaca_gpt4_en"
61
- },
62
- "alpaca_gpt4_zh": {
63
- "hf_hub_url": "llamafactory/alpaca_gpt4_zh",
64
- "ms_hub_url": "llamafactory/alpaca_gpt4_zh"
65
- },
66
- "glaive_toolcall_en": {
67
- "hf_hub_url": "llamafactory/glaive_toolcall_en",
68
- "formatting": "sharegpt",
69
- "columns": {
70
- "messages": "conversations",
71
- "tools": "tools"
72
- }
73
- },
74
- "glaive_toolcall_zh": {
75
- "hf_hub_url": "llamafactory/glaive_toolcall_zh",
76
- "formatting": "sharegpt",
77
- "columns": {
78
- "messages": "conversations",
79
- "tools": "tools"
80
- }
81
- },
82
- "lima": {
83
- "hf_hub_url": "llamafactory/lima",
84
- "formatting": "sharegpt"
85
- },
86
- "guanaco": {
87
- "hf_hub_url": "JosephusCheung/GuanacoDataset",
88
- "ms_hub_url": "AI-ModelScope/GuanacoDataset"
89
- },
90
- "belle_2m": {
91
- "hf_hub_url": "BelleGroup/train_2M_CN",
92
- "ms_hub_url": "AI-ModelScope/train_2M_CN"
93
- },
94
- "belle_1m": {
95
- "hf_hub_url": "BelleGroup/train_1M_CN",
96
- "ms_hub_url": "AI-ModelScope/train_1M_CN"
97
- },
98
- "belle_0.5m": {
99
- "hf_hub_url": "BelleGroup/train_0.5M_CN",
100
- "ms_hub_url": "AI-ModelScope/train_0.5M_CN"
101
- },
102
- "belle_dialog": {
103
- "hf_hub_url": "BelleGroup/generated_chat_0.4M",
104
- "ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
105
- },
106
- "belle_math": {
107
- "hf_hub_url": "BelleGroup/school_math_0.25M",
108
- "ms_hub_url": "AI-ModelScope/school_math_0.25M"
109
- },
110
- "belle_multiturn": {
111
- "script_url": "belle_multiturn",
112
- "formatting": "sharegpt"
113
- },
114
- "ultra_chat": {
115
- "script_url": "ultra_chat",
116
- "formatting": "sharegpt"
117
- },
118
- "open_platypus": {
119
- "hf_hub_url": "garage-bAInd/Open-Platypus",
120
- "ms_hub_url": "AI-ModelScope/Open-Platypus"
121
- },
122
- "codealpaca": {
123
- "hf_hub_url": "sahil2801/CodeAlpaca-20k",
124
- "ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
125
- },
126
- "alpaca_cot": {
127
- "hf_hub_url": "QingyiSi/Alpaca-CoT",
128
- "ms_hub_url": "AI-ModelScope/Alpaca-CoT"
129
- },
130
- "openorca": {
131
- "hf_hub_url": "Open-Orca/OpenOrca",
132
- "ms_hub_url": "AI-ModelScope/OpenOrca",
133
- "columns": {
134
- "prompt": "question",
135
- "response": "response",
136
- "system": "system_prompt"
137
- }
138
- },
139
- "slimorca": {
140
- "hf_hub_url": "Open-Orca/SlimOrca",
141
- "formatting": "sharegpt"
142
- },
143
- "mathinstruct": {
144
- "hf_hub_url": "TIGER-Lab/MathInstruct",
145
- "ms_hub_url": "AI-ModelScope/MathInstruct",
146
- "columns": {
147
- "prompt": "instruction",
148
- "response": "output"
149
- }
150
- },
151
- "firefly": {
152
- "hf_hub_url": "YeungNLP/firefly-train-1.1M",
153
- "columns": {
154
- "prompt": "input",
155
- "response": "target"
156
- }
157
- },
158
- "wikiqa": {
159
- "hf_hub_url": "wiki_qa",
160
- "columns": {
161
- "prompt": "question",
162
- "response": "answer"
163
- }
164
- },
165
- "webqa": {
166
- "hf_hub_url": "suolyer/webqa",
167
- "ms_hub_url": "AI-ModelScope/webqa",
168
- "columns": {
169
- "prompt": "input",
170
- "response": "output"
171
- }
172
- },
173
- "webnovel": {
174
- "hf_hub_url": "zxbsmk/webnovel_cn",
175
- "ms_hub_url": "AI-ModelScope/webnovel_cn"
176
- },
177
- "nectar_sft": {
178
- "hf_hub_url": "AstraMindAI/SFT-Nectar",
179
- "ms_hub_url": "AI-ModelScope/SFT-Nectar"
180
- },
181
- "deepctrl": {
182
- "ms_hub_url": "deepctrl/deepctrl-sft-data"
183
- },
184
- "adgen": {
185
- "hf_hub_url": "HasturOfficial/adgen",
186
- "ms_hub_url": "AI-ModelScope/adgen",
187
- "columns": {
188
- "prompt": "content",
189
- "response": "summary"
190
- }
191
- },
192
- "sharegpt_hyper": {
193
- "hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
194
- "formatting": "sharegpt"
195
- },
196
- "sharegpt4": {
197
- "hf_hub_url": "shibing624/sharegpt_gpt4",
198
- "ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
199
- "formatting": "sharegpt"
200
- },
201
- "ultrachat_200k": {
202
- "hf_hub_url": "HuggingFaceH4/ultrachat_200k",
203
- "ms_hub_url": "AI-ModelScope/ultrachat_200k",
204
- "formatting": "sharegpt",
205
- "columns": {
206
- "messages": "messages"
207
- },
208
- "tags": {
209
- "role_tag": "role",
210
- "content_tag": "content",
211
- "user_tag": "user",
212
- "assistant_tag": "assistant"
213
- }
214
- },
215
- "agent_instruct": {
216
- "hf_hub_url": "THUDM/AgentInstruct",
217
- "ms_hub_url": "ZhipuAI/AgentInstruct",
218
- "formatting": "sharegpt"
219
- },
220
- "lmsys_chat": {
221
- "hf_hub_url": "lmsys/lmsys-chat-1m",
222
- "ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
223
- "formatting": "sharegpt",
224
- "columns": {
225
- "messages": "conversation"
226
- },
227
- "tags": {
228
- "role_tag": "role",
229
- "content_tag": "content",
230
- "user_tag": "human",
231
- "assistant_tag": "assistant"
232
- }
233
- },
234
- "evol_instruct": {
235
- "hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
236
- "ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
237
- "formatting": "sharegpt"
238
- },
239
- "glaive_toolcall_100k": {
240
- "hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
241
- "formatting": "sharegpt",
242
- "columns": {
243
- "messages": "conversations",
244
- "tools": "tools"
245
- }
246
- },
247
- "cosmopedia": {
248
- "hf_hub_url": "HuggingFaceTB/cosmopedia",
249
- "columns": {
250
- "prompt": "prompt",
251
- "response": "text"
252
- }
253
- },
254
- "stem_zh": {
255
- "hf_hub_url": "hfl/stem_zh_instruction"
256
- },
257
- "ruozhiba_gpt4": {
258
- "hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
259
- },
260
- "neo_sft": {
261
- "hf_hub_url": "m-a-p/neo_sft_phase2",
262
- "formatting": "sharegpt"
263
- },
264
- "magpie_pro_300k": {
265
- "hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
266
- "formatting": "sharegpt"
267
- },
268
- "web_instruct": {
269
- "hf_hub_url": "TIGER-Lab/WebInstructSub",
270
- "columns": {
271
- "prompt": "question",
272
- "response": "answer"
273
- }
274
- },
275
- "llava_1k_en": {
276
- "hf_hub_url": "BUAADreamer/llava-en-zh-2k",
277
- "subset": "en",
278
- "formatting": "sharegpt",
279
- "columns": {
280
- "messages": "messages",
281
- "images": "images"
282
- },
283
- "tags": {
284
- "role_tag": "role",
285
- "content_tag": "content",
286
- "user_tag": "user",
287
- "assistant_tag": "assistant"
288
- }
289
- },
290
- "llava_1k_zh": {
291
- "hf_hub_url": "BUAADreamer/llava-en-zh-2k",
292
- "subset": "zh",
293
- "formatting": "sharegpt",
294
- "columns": {
295
- "messages": "messages",
296
- "images": "images"
297
- },
298
- "tags": {
299
- "role_tag": "role",
300
- "content_tag": "content",
301
- "user_tag": "user",
302
- "assistant_tag": "assistant"
303
- }
304
- },
305
- "llava_150k_en": {
306
- "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
307
- "subset": "en",
308
- "formatting": "sharegpt",
309
- "columns": {
310
- "messages": "messages",
311
- "images": "images"
312
- },
313
- "tags": {
314
- "role_tag": "role",
315
- "content_tag": "content",
316
- "user_tag": "user",
317
- "assistant_tag": "assistant"
318
- }
319
- },
320
- "llava_150k_zh": {
321
- "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
322
- "subset": "zh",
323
- "formatting": "sharegpt",
324
- "columns": {
325
- "messages": "messages",
326
- "images": "images"
327
- },
328
- "tags": {
329
- "role_tag": "role",
330
- "content_tag": "content",
331
- "user_tag": "user",
332
- "assistant_tag": "assistant"
333
- }
334
- },
335
- "mllm_pt_demo": {
336
- "hf_hub_url": "BUAADreamer/mllm_pt_demo",
337
- "formatting": "sharegpt",
338
- "columns": {
339
- "messages": "messages",
340
- "images": "images"
341
- },
342
- "tags": {
343
- "role_tag": "role",
344
- "content_tag": "content",
345
- "user_tag": "user",
346
- "assistant_tag": "assistant"
347
- }
348
- },
349
- "oasst_de": {
350
- "hf_hub_url": "mayflowergmbh/oasst_de"
351
- },
352
- "dolly_15k_de": {
353
- "hf_hub_url": "mayflowergmbh/dolly-15k_de"
354
- },
355
- "alpaca-gpt4_de": {
356
- "hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
357
- },
358
- "openschnabeltier_de": {
359
- "hf_hub_url": "mayflowergmbh/openschnabeltier_de"
360
- },
361
- "evol_instruct_de": {
362
- "hf_hub_url": "mayflowergmbh/evol-instruct_de"
363
- },
364
- "dolphin_de": {
365
- "hf_hub_url": "mayflowergmbh/dolphin_de"
366
- },
367
- "booksum_de": {
368
- "hf_hub_url": "mayflowergmbh/booksum_de"
369
- },
370
- "airoboros_de": {
371
- "hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
372
- },
373
- "ultrachat_de": {
374
- "hf_hub_url": "mayflowergmbh/ultra-chat_de"
375
- },
376
- "dpo_en_demo": {
377
- "file_name": "dpo_en_demo.json",
378
- "ranking": true,
379
- "formatting": "sharegpt",
380
- "columns": {
381
- "messages": "conversations",
382
- "chosen": "chosen",
383
- "rejected": "rejected"
384
- }
385
- },
386
- "dpo_zh_demo": {
387
- "file_name": "dpo_zh_demo.json",
388
- "ranking": true,
389
- "formatting": "sharegpt",
390
- "columns": {
391
- "messages": "conversations",
392
- "chosen": "chosen",
393
- "rejected": "rejected"
394
- }
395
- },
396
- "dpo_mix_en": {
397
- "hf_hub_url": "hiyouga/DPO-En-Zh-20k",
398
- "subset": "en",
399
- "ranking": true,
400
- "formatting": "sharegpt",
401
- "columns": {
402
- "messages": "conversations",
403
- "chosen": "chosen",
404
- "rejected": "rejected"
405
- }
406
- },
407
- "dpo_mix_zh": {
408
- "hf_hub_url": "hiyouga/DPO-En-Zh-20k",
409
- "subset": "zh",
410
- "ranking": true,
411
- "formatting": "sharegpt",
412
- "columns": {
413
- "messages": "conversations",
414
- "chosen": "chosen",
415
- "rejected": "rejected"
416
- }
417
- },
418
- "ultrafeedback": {
419
- "hf_hub_url": "llamafactory/ultrafeedback_binarized",
420
- "ms_hub_url": "llamafactory/ultrafeedback_binarized",
421
- "ranking": true,
422
- "columns": {
423
- "prompt": "instruction",
424
- "chosen": "chosen",
425
- "rejected": "rejected"
426
- }
427
- },
428
- "orca_pairs": {
429
- "hf_hub_url": "Intel/orca_dpo_pairs",
430
- "ranking": true,
431
- "columns": {
432
- "prompt": "question",
433
- "chosen": "chosen",
434
- "rejected": "rejected",
435
- "system": "system"
436
- }
437
- },
438
- "hh_rlhf_en": {
439
- "script_url": "hh_rlhf_en",
440
- "ranking": true,
441
- "columns": {
442
- "prompt": "instruction",
443
- "chosen": "chosen",
444
- "rejected": "rejected",
445
- "history": "history"
446
- }
447
- },
448
- "nectar_rm": {
449
- "hf_hub_url": "AstraMindAI/RLAIF-Nectar",
450
- "ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
451
- "ranking": true
452
- },
453
- "orca_dpo_de": {
454
- "hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
455
- "ranking": true
456
- },
457
- "kto_en_demo": {
458
- "file_name": "kto_en_demo.json",
459
- "formatting": "sharegpt",
460
- "columns": {
461
- "messages": "messages",
462
- "kto_tag": "label"
463
- },
464
- "tags": {
465
- "role_tag": "role",
466
- "content_tag": "content",
467
- "user_tag": "user",
468
- "assistant_tag": "assistant"
469
- }
470
- },
471
- "kto_mix_en": {
472
- "hf_hub_url": "argilla/kto-mix-15k",
473
- "formatting": "sharegpt",
474
- "columns": {
475
- "messages": "completion",
476
- "kto_tag": "label"
477
- },
478
- "tags": {
479
- "role_tag": "role",
480
- "content_tag": "content",
481
- "user_tag": "user",
482
- "assistant_tag": "assistant"
483
- }
484
- },
485
- "ultrafeedback_kto": {
486
- "hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto",
487
- "ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto",
488
- "columns": {
489
- "prompt": "prompt",
490
- "response": "completion",
491
- "kto_tag": "label"
492
- }
493
- },
494
- "wiki_demo": {
495
- "file_name": "wiki_demo.txt",
496
- "columns": {
497
- "prompt": "text"
498
- }
499
- },
500
- "c4_demo": {
501
- "file_name": "c4_demo.json",
502
- "columns": {
503
- "prompt": "text"
504
- }
505
- },
506
- "refinedweb": {
507
- "hf_hub_url": "tiiuae/falcon-refinedweb",
508
- "columns": {
509
- "prompt": "content"
510
- }
511
- },
512
- "redpajama_v2": {
513
- "hf_hub_url": "togethercomputer/RedPajama-Data-V2",
514
- "columns": {
515
- "prompt": "raw_content"
516
- },
517
- "subset": "default"
518
- },
519
- "wikipedia_en": {
520
- "hf_hub_url": "olm/olm-wikipedia-20221220",
521
- "ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
522
- "columns": {
523
- "prompt": "text"
524
- }
525
- },
526
- "wikipedia_zh": {
527
- "hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
528
- "ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
529
- "columns": {
530
- "prompt": "completion"
531
- }
532
- },
533
- "pile": {
534
- "hf_hub_url": "monology/pile-uncopyrighted",
535
- "ms_hub_url": "AI-ModelScope/pile",
536
- "columns": {
537
- "prompt": "text"
538
- }
539
- },
540
- "skypile": {
541
- "hf_hub_url": "Skywork/SkyPile-150B",
542
- "ms_hub_url": "AI-ModelScope/SkyPile-150B",
543
- "columns": {
544
- "prompt": "text"
545
- }
546
- },
547
- "fineweb": {
548
- "hf_hub_url": "HuggingFaceFW/fineweb",
549
- "columns": {
550
- "prompt": "text"
551
- }
552
- },
553
- "fineweb_edu": {
554
- "hf_hub_url": "HuggingFaceFW/fineweb-edu",
555
- "columns": {
556
- "prompt": "text"
557
- }
558
- },
559
- "the_stack": {
560
- "hf_hub_url": "bigcode/the-stack",
561
- "ms_hub_url": "AI-ModelScope/the-stack",
562
- "columns": {
563
- "prompt": "content"
564
- }
565
- },
566
- "starcoder_python": {
567
- "hf_hub_url": "bigcode/starcoderdata",
568
- "ms_hub_url": "AI-ModelScope/starcoderdata",
569
- "columns": {
570
- "prompt": "content"
571
- },
572
- "folder": "python"
573
- }
574
- }
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a491ee69f583486d9649f5fb4ef2f06c72d5f4397d98afec654b1e917901e66a
3
+ size 13750