Spaces:
Build error
Build error
ref code from novel-translation
Browse files- .gitattributes +7 -0
- competition/01_EDA.ipynb +0 -0
- config/qwen2_0.5b_lora_sft.yaml +39 -0
- config/qwen2_1.5b_lora_sft.yaml +39 -0
- config/qwen2_7b_lora_sft.yaml +39 -0
- data/alpaca_mac.json +0 -0
- data/dataset_info.json +568 -0
- datasets/mgtv/dev.csv +3 -0
- datasets/mgtv/test_a.csv +3 -0
- datasets/mgtv/train.csv +3 -0
- llama-factory/config/qwen2_0.5b_lora_sft.yaml +39 -0
- llama-factory/config/qwen2_1.5b_lora_sft.yaml +39 -0
- llama-factory/config/qwen2_7b_lora_sft.yaml +39 -0
- llama-factory/data/alpaca_mac.json +0 -0
- llama-factory/data/dataset_info.json +568 -0
- notebooks/01_Finetune-Llama3-with-LLaMA-Factory.ipynb +1 -0
- novel-translation/00_Data_Analysis.ipynb +0 -0
- novel-translation/01_Qwen2-0.5B_Unsloth.ipynb +0 -0
- novel-translation/02_Qwen2-1.5B_Unsloth.ipynb +0 -0
- novel-translation/03_Qwen2-0.5B_1.5B-4bit.ipynb +0 -0
- novel-translation/04_tune-small-no-flash-attn.ipynb +0 -0
- novel-translation/05_tune-small-with-flash-attn.ipynb +0 -0
- novel-translation/06_tune-small-py3.11.ipynb +0 -0
- novel-translation/07_tune-lf-py3.11.ipynb +0 -0
- novel-translation/08_eval-lf-py3.11.ipynb +0 -0
- requirements.txt +2 -2
- results/mac-results-colab.csv +0 -0
- results/mac-results-colab.gsheet +3 -0
- results/mac-results_lf.csv +3 -0
- scripts/tune-lf.sh +8 -0
.gitattributes
CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
datasets/mgtv/ filter=lfs diff=lfs merge=lfs -text
|
37 |
+
datasets/mgtv/dev.csv filter=lfs diff=lfs merge=lfs -text
|
38 |
+
datasets/mgtv/test_a.csv filter=lfs diff=lfs merge=lfs -text
|
39 |
+
datasets/mgtv/train.csv filter=lfs diff=lfs merge=lfs -text
|
40 |
+
results/mac-results-colab.csv filter=lfs diff=lfs merge=lfs -text
|
41 |
+
results/mac-results-colab.gsheet filter=lfs diff=lfs merge=lfs -text
|
42 |
+
results/mac-results_lf.csv filter=lfs diff=lfs merge=lfs -text
|
competition/01_EDA.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config/qwen2_0.5b_lora_sft.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-0.5B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: chatml
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/qwen2-0.5b/lora/sft
|
20 |
+
logging_steps: 10
|
21 |
+
save_steps: 560
|
22 |
+
plot_loss: true
|
23 |
+
overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 1
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 10.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 560
|
config/qwen2_1.5b_lora_sft.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: chatml
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/qwen2-1.5b/lora/sft
|
20 |
+
logging_steps: 10
|
21 |
+
save_steps: 560
|
22 |
+
plot_loss: true
|
23 |
+
overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 1
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 10.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 560
|
config/qwen2_7b_lora_sft.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-7B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: chatml
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/qwen2-7b/lora/sft
|
20 |
+
logging_steps: 10
|
21 |
+
save_steps: 560
|
22 |
+
plot_loss: true
|
23 |
+
overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 1
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 10.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 560
|
data/alpaca_mac.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/dataset_info.json
ADDED
@@ -0,0 +1,568 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpaca_mac": {
|
3 |
+
"file_name": "alpaca_mac.json"
|
4 |
+
},
|
5 |
+
"identity": {
|
6 |
+
"file_name": "identity.json"
|
7 |
+
},
|
8 |
+
"alpaca_en_demo": {
|
9 |
+
"file_name": "alpaca_en_demo.json"
|
10 |
+
},
|
11 |
+
"alpaca_zh_demo": {
|
12 |
+
"file_name": "alpaca_zh_demo.json"
|
13 |
+
},
|
14 |
+
"glaive_toolcall_en_demo": {
|
15 |
+
"file_name": "glaive_toolcall_en_demo.json",
|
16 |
+
"formatting": "sharegpt",
|
17 |
+
"columns": {
|
18 |
+
"messages": "conversations",
|
19 |
+
"tools": "tools"
|
20 |
+
}
|
21 |
+
},
|
22 |
+
"glaive_toolcall_zh_demo": {
|
23 |
+
"file_name": "glaive_toolcall_zh_demo.json",
|
24 |
+
"formatting": "sharegpt",
|
25 |
+
"columns": {
|
26 |
+
"messages": "conversations",
|
27 |
+
"tools": "tools"
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"mllm_demo": {
|
31 |
+
"file_name": "mllm_demo.json",
|
32 |
+
"formatting": "sharegpt",
|
33 |
+
"columns": {
|
34 |
+
"messages": "messages",
|
35 |
+
"images": "images"
|
36 |
+
},
|
37 |
+
"tags": {
|
38 |
+
"role_tag": "role",
|
39 |
+
"content_tag": "content",
|
40 |
+
"user_tag": "user",
|
41 |
+
"assistant_tag": "assistant"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"alpaca_en": {
|
45 |
+
"hf_hub_url": "llamafactory/alpaca_en",
|
46 |
+
"ms_hub_url": "llamafactory/alpaca_en"
|
47 |
+
},
|
48 |
+
"alpaca_zh": {
|
49 |
+
"hf_hub_url": "llamafactory/alpaca_zh",
|
50 |
+
"ms_hub_url": "llamafactory/alpaca_zh"
|
51 |
+
},
|
52 |
+
"alpaca_gpt4_en": {
|
53 |
+
"hf_hub_url": "llamafactory/alpaca_gpt4_en",
|
54 |
+
"ms_hub_url": "llamafactory/alpaca_gpt4_en"
|
55 |
+
},
|
56 |
+
"alpaca_gpt4_zh": {
|
57 |
+
"hf_hub_url": "llamafactory/alpaca_gpt4_zh",
|
58 |
+
"ms_hub_url": "llamafactory/alpaca_gpt4_zh"
|
59 |
+
},
|
60 |
+
"glaive_toolcall_en": {
|
61 |
+
"hf_hub_url": "llamafactory/glaive_toolcall_en",
|
62 |
+
"formatting": "sharegpt",
|
63 |
+
"columns": {
|
64 |
+
"messages": "conversations",
|
65 |
+
"tools": "tools"
|
66 |
+
}
|
67 |
+
},
|
68 |
+
"glaive_toolcall_zh": {
|
69 |
+
"hf_hub_url": "llamafactory/glaive_toolcall_zh",
|
70 |
+
"formatting": "sharegpt",
|
71 |
+
"columns": {
|
72 |
+
"messages": "conversations",
|
73 |
+
"tools": "tools"
|
74 |
+
}
|
75 |
+
},
|
76 |
+
"lima": {
|
77 |
+
"hf_hub_url": "llamafactory/lima",
|
78 |
+
"formatting": "sharegpt"
|
79 |
+
},
|
80 |
+
"guanaco": {
|
81 |
+
"hf_hub_url": "JosephusCheung/GuanacoDataset",
|
82 |
+
"ms_hub_url": "AI-ModelScope/GuanacoDataset"
|
83 |
+
},
|
84 |
+
"belle_2m": {
|
85 |
+
"hf_hub_url": "BelleGroup/train_2M_CN",
|
86 |
+
"ms_hub_url": "AI-ModelScope/train_2M_CN"
|
87 |
+
},
|
88 |
+
"belle_1m": {
|
89 |
+
"hf_hub_url": "BelleGroup/train_1M_CN",
|
90 |
+
"ms_hub_url": "AI-ModelScope/train_1M_CN"
|
91 |
+
},
|
92 |
+
"belle_0.5m": {
|
93 |
+
"hf_hub_url": "BelleGroup/train_0.5M_CN",
|
94 |
+
"ms_hub_url": "AI-ModelScope/train_0.5M_CN"
|
95 |
+
},
|
96 |
+
"belle_dialog": {
|
97 |
+
"hf_hub_url": "BelleGroup/generated_chat_0.4M",
|
98 |
+
"ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
|
99 |
+
},
|
100 |
+
"belle_math": {
|
101 |
+
"hf_hub_url": "BelleGroup/school_math_0.25M",
|
102 |
+
"ms_hub_url": "AI-ModelScope/school_math_0.25M"
|
103 |
+
},
|
104 |
+
"belle_multiturn": {
|
105 |
+
"script_url": "belle_multiturn",
|
106 |
+
"formatting": "sharegpt"
|
107 |
+
},
|
108 |
+
"ultra_chat": {
|
109 |
+
"script_url": "ultra_chat",
|
110 |
+
"formatting": "sharegpt"
|
111 |
+
},
|
112 |
+
"open_platypus": {
|
113 |
+
"hf_hub_url": "garage-bAInd/Open-Platypus",
|
114 |
+
"ms_hub_url": "AI-ModelScope/Open-Platypus"
|
115 |
+
},
|
116 |
+
"codealpaca": {
|
117 |
+
"hf_hub_url": "sahil2801/CodeAlpaca-20k",
|
118 |
+
"ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
|
119 |
+
},
|
120 |
+
"alpaca_cot": {
|
121 |
+
"hf_hub_url": "QingyiSi/Alpaca-CoT",
|
122 |
+
"ms_hub_url": "AI-ModelScope/Alpaca-CoT"
|
123 |
+
},
|
124 |
+
"openorca": {
|
125 |
+
"hf_hub_url": "Open-Orca/OpenOrca",
|
126 |
+
"ms_hub_url": "AI-ModelScope/OpenOrca",
|
127 |
+
"columns": {
|
128 |
+
"prompt": "question",
|
129 |
+
"response": "response",
|
130 |
+
"system": "system_prompt"
|
131 |
+
}
|
132 |
+
},
|
133 |
+
"slimorca": {
|
134 |
+
"hf_hub_url": "Open-Orca/SlimOrca",
|
135 |
+
"formatting": "sharegpt"
|
136 |
+
},
|
137 |
+
"mathinstruct": {
|
138 |
+
"hf_hub_url": "TIGER-Lab/MathInstruct",
|
139 |
+
"ms_hub_url": "AI-ModelScope/MathInstruct",
|
140 |
+
"columns": {
|
141 |
+
"prompt": "instruction",
|
142 |
+
"response": "output"
|
143 |
+
}
|
144 |
+
},
|
145 |
+
"firefly": {
|
146 |
+
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
147 |
+
"columns": {
|
148 |
+
"prompt": "input",
|
149 |
+
"response": "target"
|
150 |
+
}
|
151 |
+
},
|
152 |
+
"wikiqa": {
|
153 |
+
"hf_hub_url": "wiki_qa",
|
154 |
+
"columns": {
|
155 |
+
"prompt": "question",
|
156 |
+
"response": "answer"
|
157 |
+
}
|
158 |
+
},
|
159 |
+
"webqa": {
|
160 |
+
"hf_hub_url": "suolyer/webqa",
|
161 |
+
"ms_hub_url": "AI-ModelScope/webqa",
|
162 |
+
"columns": {
|
163 |
+
"prompt": "input",
|
164 |
+
"response": "output"
|
165 |
+
}
|
166 |
+
},
|
167 |
+
"webnovel": {
|
168 |
+
"hf_hub_url": "zxbsmk/webnovel_cn",
|
169 |
+
"ms_hub_url": "AI-ModelScope/webnovel_cn"
|
170 |
+
},
|
171 |
+
"nectar_sft": {
|
172 |
+
"hf_hub_url": "AstraMindAI/SFT-Nectar",
|
173 |
+
"ms_hub_url": "AI-ModelScope/SFT-Nectar"
|
174 |
+
},
|
175 |
+
"deepctrl": {
|
176 |
+
"ms_hub_url": "deepctrl/deepctrl-sft-data"
|
177 |
+
},
|
178 |
+
"adgen": {
|
179 |
+
"hf_hub_url": "HasturOfficial/adgen",
|
180 |
+
"ms_hub_url": "AI-ModelScope/adgen",
|
181 |
+
"columns": {
|
182 |
+
"prompt": "content",
|
183 |
+
"response": "summary"
|
184 |
+
}
|
185 |
+
},
|
186 |
+
"sharegpt_hyper": {
|
187 |
+
"hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
|
188 |
+
"formatting": "sharegpt"
|
189 |
+
},
|
190 |
+
"sharegpt4": {
|
191 |
+
"hf_hub_url": "shibing624/sharegpt_gpt4",
|
192 |
+
"ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
|
193 |
+
"formatting": "sharegpt"
|
194 |
+
},
|
195 |
+
"ultrachat_200k": {
|
196 |
+
"hf_hub_url": "HuggingFaceH4/ultrachat_200k",
|
197 |
+
"ms_hub_url": "AI-ModelScope/ultrachat_200k",
|
198 |
+
"formatting": "sharegpt",
|
199 |
+
"columns": {
|
200 |
+
"messages": "messages"
|
201 |
+
},
|
202 |
+
"tags": {
|
203 |
+
"role_tag": "role",
|
204 |
+
"content_tag": "content",
|
205 |
+
"user_tag": "user",
|
206 |
+
"assistant_tag": "assistant"
|
207 |
+
}
|
208 |
+
},
|
209 |
+
"agent_instruct": {
|
210 |
+
"hf_hub_url": "THUDM/AgentInstruct",
|
211 |
+
"ms_hub_url": "ZhipuAI/AgentInstruct",
|
212 |
+
"formatting": "sharegpt"
|
213 |
+
},
|
214 |
+
"lmsys_chat": {
|
215 |
+
"hf_hub_url": "lmsys/lmsys-chat-1m",
|
216 |
+
"ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
|
217 |
+
"formatting": "sharegpt",
|
218 |
+
"columns": {
|
219 |
+
"messages": "conversation"
|
220 |
+
},
|
221 |
+
"tags": {
|
222 |
+
"role_tag": "role",
|
223 |
+
"content_tag": "content",
|
224 |
+
"user_tag": "human",
|
225 |
+
"assistant_tag": "assistant"
|
226 |
+
}
|
227 |
+
},
|
228 |
+
"evol_instruct": {
|
229 |
+
"hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
|
230 |
+
"ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
|
231 |
+
"formatting": "sharegpt"
|
232 |
+
},
|
233 |
+
"glaive_toolcall_100k": {
|
234 |
+
"hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
|
235 |
+
"formatting": "sharegpt",
|
236 |
+
"columns": {
|
237 |
+
"messages": "conversations",
|
238 |
+
"tools": "tools"
|
239 |
+
}
|
240 |
+
},
|
241 |
+
"cosmopedia": {
|
242 |
+
"hf_hub_url": "HuggingFaceTB/cosmopedia",
|
243 |
+
"columns": {
|
244 |
+
"prompt": "prompt",
|
245 |
+
"response": "text"
|
246 |
+
}
|
247 |
+
},
|
248 |
+
"stem_zh": {
|
249 |
+
"hf_hub_url": "hfl/stem_zh_instruction"
|
250 |
+
},
|
251 |
+
"ruozhiba_gpt4": {
|
252 |
+
"hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
|
253 |
+
},
|
254 |
+
"neo_sft": {
|
255 |
+
"hf_hub_url": "m-a-p/neo_sft_phase2",
|
256 |
+
"formatting": "sharegpt"
|
257 |
+
},
|
258 |
+
"magpie_pro_300k": {
|
259 |
+
"hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
|
260 |
+
"formatting": "sharegpt"
|
261 |
+
},
|
262 |
+
"web_instruct": {
|
263 |
+
"hf_hub_url": "TIGER-Lab/WebInstructSub",
|
264 |
+
"columns": {
|
265 |
+
"prompt": "question",
|
266 |
+
"response": "answer"
|
267 |
+
}
|
268 |
+
},
|
269 |
+
"llava_1k_en": {
|
270 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
|
271 |
+
"subset": "en",
|
272 |
+
"formatting": "sharegpt",
|
273 |
+
"columns": {
|
274 |
+
"messages": "messages",
|
275 |
+
"images": "images"
|
276 |
+
},
|
277 |
+
"tags": {
|
278 |
+
"role_tag": "role",
|
279 |
+
"content_tag": "content",
|
280 |
+
"user_tag": "user",
|
281 |
+
"assistant_tag": "assistant"
|
282 |
+
}
|
283 |
+
},
|
284 |
+
"llava_1k_zh": {
|
285 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
|
286 |
+
"subset": "zh",
|
287 |
+
"formatting": "sharegpt",
|
288 |
+
"columns": {
|
289 |
+
"messages": "messages",
|
290 |
+
"images": "images"
|
291 |
+
},
|
292 |
+
"tags": {
|
293 |
+
"role_tag": "role",
|
294 |
+
"content_tag": "content",
|
295 |
+
"user_tag": "user",
|
296 |
+
"assistant_tag": "assistant"
|
297 |
+
}
|
298 |
+
},
|
299 |
+
"llava_150k_en": {
|
300 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-300k",
|
301 |
+
"subset": "en",
|
302 |
+
"formatting": "sharegpt",
|
303 |
+
"columns": {
|
304 |
+
"messages": "messages",
|
305 |
+
"images": "images"
|
306 |
+
},
|
307 |
+
"tags": {
|
308 |
+
"role_tag": "role",
|
309 |
+
"content_tag": "content",
|
310 |
+
"user_tag": "user",
|
311 |
+
"assistant_tag": "assistant"
|
312 |
+
}
|
313 |
+
},
|
314 |
+
"llava_150k_zh": {
|
315 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-300k",
|
316 |
+
"subset": "zh",
|
317 |
+
"formatting": "sharegpt",
|
318 |
+
"columns": {
|
319 |
+
"messages": "messages",
|
320 |
+
"images": "images"
|
321 |
+
},
|
322 |
+
"tags": {
|
323 |
+
"role_tag": "role",
|
324 |
+
"content_tag": "content",
|
325 |
+
"user_tag": "user",
|
326 |
+
"assistant_tag": "assistant"
|
327 |
+
}
|
328 |
+
},
|
329 |
+
"mllm_pt_demo": {
|
330 |
+
"hf_hub_url": "BUAADreamer/mllm_pt_demo",
|
331 |
+
"formatting": "sharegpt",
|
332 |
+
"columns": {
|
333 |
+
"messages": "messages",
|
334 |
+
"images": "images"
|
335 |
+
},
|
336 |
+
"tags": {
|
337 |
+
"role_tag": "role",
|
338 |
+
"content_tag": "content",
|
339 |
+
"user_tag": "user",
|
340 |
+
"assistant_tag": "assistant"
|
341 |
+
}
|
342 |
+
},
|
343 |
+
"oasst_de": {
|
344 |
+
"hf_hub_url": "mayflowergmbh/oasst_de"
|
345 |
+
},
|
346 |
+
"dolly_15k_de": {
|
347 |
+
"hf_hub_url": "mayflowergmbh/dolly-15k_de"
|
348 |
+
},
|
349 |
+
"alpaca-gpt4_de": {
|
350 |
+
"hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
|
351 |
+
},
|
352 |
+
"openschnabeltier_de": {
|
353 |
+
"hf_hub_url": "mayflowergmbh/openschnabeltier_de"
|
354 |
+
},
|
355 |
+
"evol_instruct_de": {
|
356 |
+
"hf_hub_url": "mayflowergmbh/evol-instruct_de"
|
357 |
+
},
|
358 |
+
"dolphin_de": {
|
359 |
+
"hf_hub_url": "mayflowergmbh/dolphin_de"
|
360 |
+
},
|
361 |
+
"booksum_de": {
|
362 |
+
"hf_hub_url": "mayflowergmbh/booksum_de"
|
363 |
+
},
|
364 |
+
"airoboros_de": {
|
365 |
+
"hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
|
366 |
+
},
|
367 |
+
"ultrachat_de": {
|
368 |
+
"hf_hub_url": "mayflowergmbh/ultra-chat_de"
|
369 |
+
},
|
370 |
+
"dpo_en_demo": {
|
371 |
+
"file_name": "dpo_en_demo.json",
|
372 |
+
"ranking": true,
|
373 |
+
"formatting": "sharegpt",
|
374 |
+
"columns": {
|
375 |
+
"messages": "conversations",
|
376 |
+
"chosen": "chosen",
|
377 |
+
"rejected": "rejected"
|
378 |
+
}
|
379 |
+
},
|
380 |
+
"dpo_zh_demo": {
|
381 |
+
"file_name": "dpo_zh_demo.json",
|
382 |
+
"ranking": true,
|
383 |
+
"formatting": "sharegpt",
|
384 |
+
"columns": {
|
385 |
+
"messages": "conversations",
|
386 |
+
"chosen": "chosen",
|
387 |
+
"rejected": "rejected"
|
388 |
+
}
|
389 |
+
},
|
390 |
+
"dpo_mix_en": {
|
391 |
+
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
|
392 |
+
"subset": "en",
|
393 |
+
"ranking": true,
|
394 |
+
"formatting": "sharegpt",
|
395 |
+
"columns": {
|
396 |
+
"messages": "conversations",
|
397 |
+
"chosen": "chosen",
|
398 |
+
"rejected": "rejected"
|
399 |
+
}
|
400 |
+
},
|
401 |
+
"dpo_mix_zh": {
|
402 |
+
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
|
403 |
+
"subset": "zh",
|
404 |
+
"ranking": true,
|
405 |
+
"formatting": "sharegpt",
|
406 |
+
"columns": {
|
407 |
+
"messages": "conversations",
|
408 |
+
"chosen": "chosen",
|
409 |
+
"rejected": "rejected"
|
410 |
+
}
|
411 |
+
},
|
412 |
+
"ultrafeedback": {
|
413 |
+
"hf_hub_url": "llamafactory/ultrafeedback_binarized",
|
414 |
+
"ms_hub_url": "llamafactory/ultrafeedback_binarized",
|
415 |
+
"ranking": true,
|
416 |
+
"columns": {
|
417 |
+
"prompt": "instruction",
|
418 |
+
"chosen": "chosen",
|
419 |
+
"rejected": "rejected"
|
420 |
+
}
|
421 |
+
},
|
422 |
+
"orca_pairs": {
|
423 |
+
"hf_hub_url": "Intel/orca_dpo_pairs",
|
424 |
+
"ranking": true,
|
425 |
+
"columns": {
|
426 |
+
"prompt": "question",
|
427 |
+
"chosen": "chosen",
|
428 |
+
"rejected": "rejected",
|
429 |
+
"system": "system"
|
430 |
+
}
|
431 |
+
},
|
432 |
+
"hh_rlhf_en": {
|
433 |
+
"script_url": "hh_rlhf_en",
|
434 |
+
"ranking": true,
|
435 |
+
"columns": {
|
436 |
+
"prompt": "instruction",
|
437 |
+
"chosen": "chosen",
|
438 |
+
"rejected": "rejected",
|
439 |
+
"history": "history"
|
440 |
+
}
|
441 |
+
},
|
442 |
+
"nectar_rm": {
|
443 |
+
"hf_hub_url": "AstraMindAI/RLAIF-Nectar",
|
444 |
+
"ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
|
445 |
+
"ranking": true
|
446 |
+
},
|
447 |
+
"orca_dpo_de": {
|
448 |
+
"hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
|
449 |
+
"ranking": true
|
450 |
+
},
|
451 |
+
"kto_en_demo": {
|
452 |
+
"file_name": "kto_en_demo.json",
|
453 |
+
"formatting": "sharegpt",
|
454 |
+
"columns": {
|
455 |
+
"messages": "messages",
|
456 |
+
"kto_tag": "label"
|
457 |
+
},
|
458 |
+
"tags": {
|
459 |
+
"role_tag": "role",
|
460 |
+
"content_tag": "content",
|
461 |
+
"user_tag": "user",
|
462 |
+
"assistant_tag": "assistant"
|
463 |
+
}
|
464 |
+
},
|
465 |
+
"kto_mix_en": {
|
466 |
+
"hf_hub_url": "argilla/kto-mix-15k",
|
467 |
+
"formatting": "sharegpt",
|
468 |
+
"columns": {
|
469 |
+
"messages": "completion",
|
470 |
+
"kto_tag": "label"
|
471 |
+
},
|
472 |
+
"tags": {
|
473 |
+
"role_tag": "role",
|
474 |
+
"content_tag": "content",
|
475 |
+
"user_tag": "user",
|
476 |
+
"assistant_tag": "assistant"
|
477 |
+
}
|
478 |
+
},
|
479 |
+
"ultrafeedback_kto": {
|
480 |
+
"hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto",
|
481 |
+
"ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto",
|
482 |
+
"columns": {
|
483 |
+
"prompt": "prompt",
|
484 |
+
"response": "completion",
|
485 |
+
"kto_tag": "label"
|
486 |
+
}
|
487 |
+
},
|
488 |
+
"wiki_demo": {
|
489 |
+
"file_name": "wiki_demo.txt",
|
490 |
+
"columns": {
|
491 |
+
"prompt": "text"
|
492 |
+
}
|
493 |
+
},
|
494 |
+
"c4_demo": {
|
495 |
+
"file_name": "c4_demo.json",
|
496 |
+
"columns": {
|
497 |
+
"prompt": "text"
|
498 |
+
}
|
499 |
+
},
|
500 |
+
"refinedweb": {
|
501 |
+
"hf_hub_url": "tiiuae/falcon-refinedweb",
|
502 |
+
"columns": {
|
503 |
+
"prompt": "content"
|
504 |
+
}
|
505 |
+
},
|
506 |
+
"redpajama_v2": {
|
507 |
+
"hf_hub_url": "togethercomputer/RedPajama-Data-V2",
|
508 |
+
"columns": {
|
509 |
+
"prompt": "raw_content"
|
510 |
+
},
|
511 |
+
"subset": "default"
|
512 |
+
},
|
513 |
+
"wikipedia_en": {
|
514 |
+
"hf_hub_url": "olm/olm-wikipedia-20221220",
|
515 |
+
"ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
|
516 |
+
"columns": {
|
517 |
+
"prompt": "text"
|
518 |
+
}
|
519 |
+
},
|
520 |
+
"wikipedia_zh": {
|
521 |
+
"hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
|
522 |
+
"ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
|
523 |
+
"columns": {
|
524 |
+
"prompt": "completion"
|
525 |
+
}
|
526 |
+
},
|
527 |
+
"pile": {
|
528 |
+
"hf_hub_url": "monology/pile-uncopyrighted",
|
529 |
+
"ms_hub_url": "AI-ModelScope/pile",
|
530 |
+
"columns": {
|
531 |
+
"prompt": "text"
|
532 |
+
}
|
533 |
+
},
|
534 |
+
"skypile": {
|
535 |
+
"hf_hub_url": "Skywork/SkyPile-150B",
|
536 |
+
"ms_hub_url": "AI-ModelScope/SkyPile-150B",
|
537 |
+
"columns": {
|
538 |
+
"prompt": "text"
|
539 |
+
}
|
540 |
+
},
|
541 |
+
"fineweb": {
|
542 |
+
"hf_hub_url": "HuggingFaceFW/fineweb",
|
543 |
+
"columns": {
|
544 |
+
"prompt": "text"
|
545 |
+
}
|
546 |
+
},
|
547 |
+
"fineweb_edu": {
|
548 |
+
"hf_hub_url": "HuggingFaceFW/fineweb-edu",
|
549 |
+
"columns": {
|
550 |
+
"prompt": "text"
|
551 |
+
}
|
552 |
+
},
|
553 |
+
"the_stack": {
|
554 |
+
"hf_hub_url": "bigcode/the-stack",
|
555 |
+
"ms_hub_url": "AI-ModelScope/the-stack",
|
556 |
+
"columns": {
|
557 |
+
"prompt": "content"
|
558 |
+
}
|
559 |
+
},
|
560 |
+
"starcoder_python": {
|
561 |
+
"hf_hub_url": "bigcode/starcoderdata",
|
562 |
+
"ms_hub_url": "AI-ModelScope/starcoderdata",
|
563 |
+
"columns": {
|
564 |
+
"prompt": "content"
|
565 |
+
},
|
566 |
+
"folder": "python"
|
567 |
+
}
|
568 |
+
}
|
datasets/mgtv/dev.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:449f236786e2105cd1dd0ba5f4a037c3608a03d73a24597e880cc5009e8c53b6
|
3 |
+
size 2741482
|
datasets/mgtv/test_a.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7c29598e27c726bef8a9f2672b83dfc4f7edb6fb6a7ff19bf63cadbdc6e9a62
|
3 |
+
size 1816769
|
datasets/mgtv/train.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06570ba22afc612ea7033d2fda6acf67774f662e5c60f57e4ce8e28ca2dd9b22
|
3 |
+
size 20747995
|
llama-factory/config/qwen2_0.5b_lora_sft.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-0.5B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: chatml
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/qwen2-0.5b/lora/sft
|
20 |
+
logging_steps: 10
|
21 |
+
save_steps: 560
|
22 |
+
plot_loss: true
|
23 |
+
overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 1
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 10.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 560
|
llama-factory/config/qwen2_1.5b_lora_sft.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-1.5B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: chatml
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/qwen2-1.5b/lora/sft
|
20 |
+
logging_steps: 10
|
21 |
+
save_steps: 560
|
22 |
+
plot_loss: true
|
23 |
+
overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 1
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 10.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 560
|
llama-factory/config/qwen2_7b_lora_sft.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: Qwen/Qwen2-7B-Instruct
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
|
10 |
+
### dataset
|
11 |
+
dataset: alpaca_mac
|
12 |
+
template: chatml
|
13 |
+
cutoff_len: 1024
|
14 |
+
max_samples: 4528
|
15 |
+
overwrite_cache: true
|
16 |
+
preprocessing_num_workers: 16
|
17 |
+
|
18 |
+
### output
|
19 |
+
output_dir: saves/qwen2-7b/lora/sft
|
20 |
+
logging_steps: 10
|
21 |
+
save_steps: 560
|
22 |
+
plot_loss: true
|
23 |
+
overwrite_output_dir: true
|
24 |
+
|
25 |
+
### train
|
26 |
+
per_device_train_batch_size: 1
|
27 |
+
gradient_accumulation_steps: 8
|
28 |
+
learning_rate: 1.0e-4
|
29 |
+
num_train_epochs: 10.0
|
30 |
+
lr_scheduler_type: cosine
|
31 |
+
warmup_ratio: 0.1
|
32 |
+
bf16: true
|
33 |
+
ddp_timeout: 180000000
|
34 |
+
|
35 |
+
### eval
|
36 |
+
val_size: 0.01
|
37 |
+
per_device_eval_batch_size: 1
|
38 |
+
eval_strategy: steps
|
39 |
+
eval_steps: 560
|
llama-factory/data/alpaca_mac.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
llama-factory/data/dataset_info.json
ADDED
@@ -0,0 +1,568 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpaca_mac": {
|
3 |
+
"file_name": "alpaca_mac.json"
|
4 |
+
},
|
5 |
+
"identity": {
|
6 |
+
"file_name": "identity.json"
|
7 |
+
},
|
8 |
+
"alpaca_en_demo": {
|
9 |
+
"file_name": "alpaca_en_demo.json"
|
10 |
+
},
|
11 |
+
"alpaca_zh_demo": {
|
12 |
+
"file_name": "alpaca_zh_demo.json"
|
13 |
+
},
|
14 |
+
"glaive_toolcall_en_demo": {
|
15 |
+
"file_name": "glaive_toolcall_en_demo.json",
|
16 |
+
"formatting": "sharegpt",
|
17 |
+
"columns": {
|
18 |
+
"messages": "conversations",
|
19 |
+
"tools": "tools"
|
20 |
+
}
|
21 |
+
},
|
22 |
+
"glaive_toolcall_zh_demo": {
|
23 |
+
"file_name": "glaive_toolcall_zh_demo.json",
|
24 |
+
"formatting": "sharegpt",
|
25 |
+
"columns": {
|
26 |
+
"messages": "conversations",
|
27 |
+
"tools": "tools"
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"mllm_demo": {
|
31 |
+
"file_name": "mllm_demo.json",
|
32 |
+
"formatting": "sharegpt",
|
33 |
+
"columns": {
|
34 |
+
"messages": "messages",
|
35 |
+
"images": "images"
|
36 |
+
},
|
37 |
+
"tags": {
|
38 |
+
"role_tag": "role",
|
39 |
+
"content_tag": "content",
|
40 |
+
"user_tag": "user",
|
41 |
+
"assistant_tag": "assistant"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"alpaca_en": {
|
45 |
+
"hf_hub_url": "llamafactory/alpaca_en",
|
46 |
+
"ms_hub_url": "llamafactory/alpaca_en"
|
47 |
+
},
|
48 |
+
"alpaca_zh": {
|
49 |
+
"hf_hub_url": "llamafactory/alpaca_zh",
|
50 |
+
"ms_hub_url": "llamafactory/alpaca_zh"
|
51 |
+
},
|
52 |
+
"alpaca_gpt4_en": {
|
53 |
+
"hf_hub_url": "llamafactory/alpaca_gpt4_en",
|
54 |
+
"ms_hub_url": "llamafactory/alpaca_gpt4_en"
|
55 |
+
},
|
56 |
+
"alpaca_gpt4_zh": {
|
57 |
+
"hf_hub_url": "llamafactory/alpaca_gpt4_zh",
|
58 |
+
"ms_hub_url": "llamafactory/alpaca_gpt4_zh"
|
59 |
+
},
|
60 |
+
"glaive_toolcall_en": {
|
61 |
+
"hf_hub_url": "llamafactory/glaive_toolcall_en",
|
62 |
+
"formatting": "sharegpt",
|
63 |
+
"columns": {
|
64 |
+
"messages": "conversations",
|
65 |
+
"tools": "tools"
|
66 |
+
}
|
67 |
+
},
|
68 |
+
"glaive_toolcall_zh": {
|
69 |
+
"hf_hub_url": "llamafactory/glaive_toolcall_zh",
|
70 |
+
"formatting": "sharegpt",
|
71 |
+
"columns": {
|
72 |
+
"messages": "conversations",
|
73 |
+
"tools": "tools"
|
74 |
+
}
|
75 |
+
},
|
76 |
+
"lima": {
|
77 |
+
"hf_hub_url": "llamafactory/lima",
|
78 |
+
"formatting": "sharegpt"
|
79 |
+
},
|
80 |
+
"guanaco": {
|
81 |
+
"hf_hub_url": "JosephusCheung/GuanacoDataset",
|
82 |
+
"ms_hub_url": "AI-ModelScope/GuanacoDataset"
|
83 |
+
},
|
84 |
+
"belle_2m": {
|
85 |
+
"hf_hub_url": "BelleGroup/train_2M_CN",
|
86 |
+
"ms_hub_url": "AI-ModelScope/train_2M_CN"
|
87 |
+
},
|
88 |
+
"belle_1m": {
|
89 |
+
"hf_hub_url": "BelleGroup/train_1M_CN",
|
90 |
+
"ms_hub_url": "AI-ModelScope/train_1M_CN"
|
91 |
+
},
|
92 |
+
"belle_0.5m": {
|
93 |
+
"hf_hub_url": "BelleGroup/train_0.5M_CN",
|
94 |
+
"ms_hub_url": "AI-ModelScope/train_0.5M_CN"
|
95 |
+
},
|
96 |
+
"belle_dialog": {
|
97 |
+
"hf_hub_url": "BelleGroup/generated_chat_0.4M",
|
98 |
+
"ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
|
99 |
+
},
|
100 |
+
"belle_math": {
|
101 |
+
"hf_hub_url": "BelleGroup/school_math_0.25M",
|
102 |
+
"ms_hub_url": "AI-ModelScope/school_math_0.25M"
|
103 |
+
},
|
104 |
+
"belle_multiturn": {
|
105 |
+
"script_url": "belle_multiturn",
|
106 |
+
"formatting": "sharegpt"
|
107 |
+
},
|
108 |
+
"ultra_chat": {
|
109 |
+
"script_url": "ultra_chat",
|
110 |
+
"formatting": "sharegpt"
|
111 |
+
},
|
112 |
+
"open_platypus": {
|
113 |
+
"hf_hub_url": "garage-bAInd/Open-Platypus",
|
114 |
+
"ms_hub_url": "AI-ModelScope/Open-Platypus"
|
115 |
+
},
|
116 |
+
"codealpaca": {
|
117 |
+
"hf_hub_url": "sahil2801/CodeAlpaca-20k",
|
118 |
+
"ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
|
119 |
+
},
|
120 |
+
"alpaca_cot": {
|
121 |
+
"hf_hub_url": "QingyiSi/Alpaca-CoT",
|
122 |
+
"ms_hub_url": "AI-ModelScope/Alpaca-CoT"
|
123 |
+
},
|
124 |
+
"openorca": {
|
125 |
+
"hf_hub_url": "Open-Orca/OpenOrca",
|
126 |
+
"ms_hub_url": "AI-ModelScope/OpenOrca",
|
127 |
+
"columns": {
|
128 |
+
"prompt": "question",
|
129 |
+
"response": "response",
|
130 |
+
"system": "system_prompt"
|
131 |
+
}
|
132 |
+
},
|
133 |
+
"slimorca": {
|
134 |
+
"hf_hub_url": "Open-Orca/SlimOrca",
|
135 |
+
"formatting": "sharegpt"
|
136 |
+
},
|
137 |
+
"mathinstruct": {
|
138 |
+
"hf_hub_url": "TIGER-Lab/MathInstruct",
|
139 |
+
"ms_hub_url": "AI-ModelScope/MathInstruct",
|
140 |
+
"columns": {
|
141 |
+
"prompt": "instruction",
|
142 |
+
"response": "output"
|
143 |
+
}
|
144 |
+
},
|
145 |
+
"firefly": {
|
146 |
+
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
147 |
+
"columns": {
|
148 |
+
"prompt": "input",
|
149 |
+
"response": "target"
|
150 |
+
}
|
151 |
+
},
|
152 |
+
"wikiqa": {
|
153 |
+
"hf_hub_url": "wiki_qa",
|
154 |
+
"columns": {
|
155 |
+
"prompt": "question",
|
156 |
+
"response": "answer"
|
157 |
+
}
|
158 |
+
},
|
159 |
+
"webqa": {
|
160 |
+
"hf_hub_url": "suolyer/webqa",
|
161 |
+
"ms_hub_url": "AI-ModelScope/webqa",
|
162 |
+
"columns": {
|
163 |
+
"prompt": "input",
|
164 |
+
"response": "output"
|
165 |
+
}
|
166 |
+
},
|
167 |
+
"webnovel": {
|
168 |
+
"hf_hub_url": "zxbsmk/webnovel_cn",
|
169 |
+
"ms_hub_url": "AI-ModelScope/webnovel_cn"
|
170 |
+
},
|
171 |
+
"nectar_sft": {
|
172 |
+
"hf_hub_url": "AstraMindAI/SFT-Nectar",
|
173 |
+
"ms_hub_url": "AI-ModelScope/SFT-Nectar"
|
174 |
+
},
|
175 |
+
"deepctrl": {
|
176 |
+
"ms_hub_url": "deepctrl/deepctrl-sft-data"
|
177 |
+
},
|
178 |
+
"adgen": {
|
179 |
+
"hf_hub_url": "HasturOfficial/adgen",
|
180 |
+
"ms_hub_url": "AI-ModelScope/adgen",
|
181 |
+
"columns": {
|
182 |
+
"prompt": "content",
|
183 |
+
"response": "summary"
|
184 |
+
}
|
185 |
+
},
|
186 |
+
"sharegpt_hyper": {
|
187 |
+
"hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
|
188 |
+
"formatting": "sharegpt"
|
189 |
+
},
|
190 |
+
"sharegpt4": {
|
191 |
+
"hf_hub_url": "shibing624/sharegpt_gpt4",
|
192 |
+
"ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
|
193 |
+
"formatting": "sharegpt"
|
194 |
+
},
|
195 |
+
"ultrachat_200k": {
|
196 |
+
"hf_hub_url": "HuggingFaceH4/ultrachat_200k",
|
197 |
+
"ms_hub_url": "AI-ModelScope/ultrachat_200k",
|
198 |
+
"formatting": "sharegpt",
|
199 |
+
"columns": {
|
200 |
+
"messages": "messages"
|
201 |
+
},
|
202 |
+
"tags": {
|
203 |
+
"role_tag": "role",
|
204 |
+
"content_tag": "content",
|
205 |
+
"user_tag": "user",
|
206 |
+
"assistant_tag": "assistant"
|
207 |
+
}
|
208 |
+
},
|
209 |
+
"agent_instruct": {
|
210 |
+
"hf_hub_url": "THUDM/AgentInstruct",
|
211 |
+
"ms_hub_url": "ZhipuAI/AgentInstruct",
|
212 |
+
"formatting": "sharegpt"
|
213 |
+
},
|
214 |
+
"lmsys_chat": {
|
215 |
+
"hf_hub_url": "lmsys/lmsys-chat-1m",
|
216 |
+
"ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
|
217 |
+
"formatting": "sharegpt",
|
218 |
+
"columns": {
|
219 |
+
"messages": "conversation"
|
220 |
+
},
|
221 |
+
"tags": {
|
222 |
+
"role_tag": "role",
|
223 |
+
"content_tag": "content",
|
224 |
+
"user_tag": "human",
|
225 |
+
"assistant_tag": "assistant"
|
226 |
+
}
|
227 |
+
},
|
228 |
+
"evol_instruct": {
|
229 |
+
"hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
|
230 |
+
"ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
|
231 |
+
"formatting": "sharegpt"
|
232 |
+
},
|
233 |
+
"glaive_toolcall_100k": {
|
234 |
+
"hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
|
235 |
+
"formatting": "sharegpt",
|
236 |
+
"columns": {
|
237 |
+
"messages": "conversations",
|
238 |
+
"tools": "tools"
|
239 |
+
}
|
240 |
+
},
|
241 |
+
"cosmopedia": {
|
242 |
+
"hf_hub_url": "HuggingFaceTB/cosmopedia",
|
243 |
+
"columns": {
|
244 |
+
"prompt": "prompt",
|
245 |
+
"response": "text"
|
246 |
+
}
|
247 |
+
},
|
248 |
+
"stem_zh": {
|
249 |
+
"hf_hub_url": "hfl/stem_zh_instruction"
|
250 |
+
},
|
251 |
+
"ruozhiba_gpt4": {
|
252 |
+
"hf_hub_url": "hfl/ruozhiba_gpt4_turbo"
|
253 |
+
},
|
254 |
+
"neo_sft": {
|
255 |
+
"hf_hub_url": "m-a-p/neo_sft_phase2",
|
256 |
+
"formatting": "sharegpt"
|
257 |
+
},
|
258 |
+
"magpie_pro_300k": {
|
259 |
+
"hf_hub_url": "Magpie-Align/Magpie-Pro-300K-Filtered",
|
260 |
+
"formatting": "sharegpt"
|
261 |
+
},
|
262 |
+
"web_instruct": {
|
263 |
+
"hf_hub_url": "TIGER-Lab/WebInstructSub",
|
264 |
+
"columns": {
|
265 |
+
"prompt": "question",
|
266 |
+
"response": "answer"
|
267 |
+
}
|
268 |
+
},
|
269 |
+
"llava_1k_en": {
|
270 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
|
271 |
+
"subset": "en",
|
272 |
+
"formatting": "sharegpt",
|
273 |
+
"columns": {
|
274 |
+
"messages": "messages",
|
275 |
+
"images": "images"
|
276 |
+
},
|
277 |
+
"tags": {
|
278 |
+
"role_tag": "role",
|
279 |
+
"content_tag": "content",
|
280 |
+
"user_tag": "user",
|
281 |
+
"assistant_tag": "assistant"
|
282 |
+
}
|
283 |
+
},
|
284 |
+
"llava_1k_zh": {
|
285 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-2k",
|
286 |
+
"subset": "zh",
|
287 |
+
"formatting": "sharegpt",
|
288 |
+
"columns": {
|
289 |
+
"messages": "messages",
|
290 |
+
"images": "images"
|
291 |
+
},
|
292 |
+
"tags": {
|
293 |
+
"role_tag": "role",
|
294 |
+
"content_tag": "content",
|
295 |
+
"user_tag": "user",
|
296 |
+
"assistant_tag": "assistant"
|
297 |
+
}
|
298 |
+
},
|
299 |
+
"llava_150k_en": {
|
300 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-300k",
|
301 |
+
"subset": "en",
|
302 |
+
"formatting": "sharegpt",
|
303 |
+
"columns": {
|
304 |
+
"messages": "messages",
|
305 |
+
"images": "images"
|
306 |
+
},
|
307 |
+
"tags": {
|
308 |
+
"role_tag": "role",
|
309 |
+
"content_tag": "content",
|
310 |
+
"user_tag": "user",
|
311 |
+
"assistant_tag": "assistant"
|
312 |
+
}
|
313 |
+
},
|
314 |
+
"llava_150k_zh": {
|
315 |
+
"hf_hub_url": "BUAADreamer/llava-en-zh-300k",
|
316 |
+
"subset": "zh",
|
317 |
+
"formatting": "sharegpt",
|
318 |
+
"columns": {
|
319 |
+
"messages": "messages",
|
320 |
+
"images": "images"
|
321 |
+
},
|
322 |
+
"tags": {
|
323 |
+
"role_tag": "role",
|
324 |
+
"content_tag": "content",
|
325 |
+
"user_tag": "user",
|
326 |
+
"assistant_tag": "assistant"
|
327 |
+
}
|
328 |
+
},
|
329 |
+
"mllm_pt_demo": {
|
330 |
+
"hf_hub_url": "BUAADreamer/mllm_pt_demo",
|
331 |
+
"formatting": "sharegpt",
|
332 |
+
"columns": {
|
333 |
+
"messages": "messages",
|
334 |
+
"images": "images"
|
335 |
+
},
|
336 |
+
"tags": {
|
337 |
+
"role_tag": "role",
|
338 |
+
"content_tag": "content",
|
339 |
+
"user_tag": "user",
|
340 |
+
"assistant_tag": "assistant"
|
341 |
+
}
|
342 |
+
},
|
343 |
+
"oasst_de": {
|
344 |
+
"hf_hub_url": "mayflowergmbh/oasst_de"
|
345 |
+
},
|
346 |
+
"dolly_15k_de": {
|
347 |
+
"hf_hub_url": "mayflowergmbh/dolly-15k_de"
|
348 |
+
},
|
349 |
+
"alpaca-gpt4_de": {
|
350 |
+
"hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
|
351 |
+
},
|
352 |
+
"openschnabeltier_de": {
|
353 |
+
"hf_hub_url": "mayflowergmbh/openschnabeltier_de"
|
354 |
+
},
|
355 |
+
"evol_instruct_de": {
|
356 |
+
"hf_hub_url": "mayflowergmbh/evol-instruct_de"
|
357 |
+
},
|
358 |
+
"dolphin_de": {
|
359 |
+
"hf_hub_url": "mayflowergmbh/dolphin_de"
|
360 |
+
},
|
361 |
+
"booksum_de": {
|
362 |
+
"hf_hub_url": "mayflowergmbh/booksum_de"
|
363 |
+
},
|
364 |
+
"airoboros_de": {
|
365 |
+
"hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
|
366 |
+
},
|
367 |
+
"ultrachat_de": {
|
368 |
+
"hf_hub_url": "mayflowergmbh/ultra-chat_de"
|
369 |
+
},
|
370 |
+
"dpo_en_demo": {
|
371 |
+
"file_name": "dpo_en_demo.json",
|
372 |
+
"ranking": true,
|
373 |
+
"formatting": "sharegpt",
|
374 |
+
"columns": {
|
375 |
+
"messages": "conversations",
|
376 |
+
"chosen": "chosen",
|
377 |
+
"rejected": "rejected"
|
378 |
+
}
|
379 |
+
},
|
380 |
+
"dpo_zh_demo": {
|
381 |
+
"file_name": "dpo_zh_demo.json",
|
382 |
+
"ranking": true,
|
383 |
+
"formatting": "sharegpt",
|
384 |
+
"columns": {
|
385 |
+
"messages": "conversations",
|
386 |
+
"chosen": "chosen",
|
387 |
+
"rejected": "rejected"
|
388 |
+
}
|
389 |
+
},
|
390 |
+
"dpo_mix_en": {
|
391 |
+
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
|
392 |
+
"subset": "en",
|
393 |
+
"ranking": true,
|
394 |
+
"formatting": "sharegpt",
|
395 |
+
"columns": {
|
396 |
+
"messages": "conversations",
|
397 |
+
"chosen": "chosen",
|
398 |
+
"rejected": "rejected"
|
399 |
+
}
|
400 |
+
},
|
401 |
+
"dpo_mix_zh": {
|
402 |
+
"hf_hub_url": "hiyouga/DPO-En-Zh-20k",
|
403 |
+
"subset": "zh",
|
404 |
+
"ranking": true,
|
405 |
+
"formatting": "sharegpt",
|
406 |
+
"columns": {
|
407 |
+
"messages": "conversations",
|
408 |
+
"chosen": "chosen",
|
409 |
+
"rejected": "rejected"
|
410 |
+
}
|
411 |
+
},
|
412 |
+
"ultrafeedback": {
|
413 |
+
"hf_hub_url": "llamafactory/ultrafeedback_binarized",
|
414 |
+
"ms_hub_url": "llamafactory/ultrafeedback_binarized",
|
415 |
+
"ranking": true,
|
416 |
+
"columns": {
|
417 |
+
"prompt": "instruction",
|
418 |
+
"chosen": "chosen",
|
419 |
+
"rejected": "rejected"
|
420 |
+
}
|
421 |
+
},
|
422 |
+
"orca_pairs": {
|
423 |
+
"hf_hub_url": "Intel/orca_dpo_pairs",
|
424 |
+
"ranking": true,
|
425 |
+
"columns": {
|
426 |
+
"prompt": "question",
|
427 |
+
"chosen": "chosen",
|
428 |
+
"rejected": "rejected",
|
429 |
+
"system": "system"
|
430 |
+
}
|
431 |
+
},
|
432 |
+
"hh_rlhf_en": {
|
433 |
+
"script_url": "hh_rlhf_en",
|
434 |
+
"ranking": true,
|
435 |
+
"columns": {
|
436 |
+
"prompt": "instruction",
|
437 |
+
"chosen": "chosen",
|
438 |
+
"rejected": "rejected",
|
439 |
+
"history": "history"
|
440 |
+
}
|
441 |
+
},
|
442 |
+
"nectar_rm": {
|
443 |
+
"hf_hub_url": "AstraMindAI/RLAIF-Nectar",
|
444 |
+
"ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
|
445 |
+
"ranking": true
|
446 |
+
},
|
447 |
+
"orca_dpo_de": {
|
448 |
+
"hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
|
449 |
+
"ranking": true
|
450 |
+
},
|
451 |
+
"kto_en_demo": {
|
452 |
+
"file_name": "kto_en_demo.json",
|
453 |
+
"formatting": "sharegpt",
|
454 |
+
"columns": {
|
455 |
+
"messages": "messages",
|
456 |
+
"kto_tag": "label"
|
457 |
+
},
|
458 |
+
"tags": {
|
459 |
+
"role_tag": "role",
|
460 |
+
"content_tag": "content",
|
461 |
+
"user_tag": "user",
|
462 |
+
"assistant_tag": "assistant"
|
463 |
+
}
|
464 |
+
},
|
465 |
+
"kto_mix_en": {
|
466 |
+
"hf_hub_url": "argilla/kto-mix-15k",
|
467 |
+
"formatting": "sharegpt",
|
468 |
+
"columns": {
|
469 |
+
"messages": "completion",
|
470 |
+
"kto_tag": "label"
|
471 |
+
},
|
472 |
+
"tags": {
|
473 |
+
"role_tag": "role",
|
474 |
+
"content_tag": "content",
|
475 |
+
"user_tag": "user",
|
476 |
+
"assistant_tag": "assistant"
|
477 |
+
}
|
478 |
+
},
|
479 |
+
"ultrafeedback_kto": {
|
480 |
+
"hf_hub_url": "argilla/ultrafeedback-binarized-preferences-cleaned-kto",
|
481 |
+
"ms_hub_url": "AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto",
|
482 |
+
"columns": {
|
483 |
+
"prompt": "prompt",
|
484 |
+
"response": "completion",
|
485 |
+
"kto_tag": "label"
|
486 |
+
}
|
487 |
+
},
|
488 |
+
"wiki_demo": {
|
489 |
+
"file_name": "wiki_demo.txt",
|
490 |
+
"columns": {
|
491 |
+
"prompt": "text"
|
492 |
+
}
|
493 |
+
},
|
494 |
+
"c4_demo": {
|
495 |
+
"file_name": "c4_demo.json",
|
496 |
+
"columns": {
|
497 |
+
"prompt": "text"
|
498 |
+
}
|
499 |
+
},
|
500 |
+
"refinedweb": {
|
501 |
+
"hf_hub_url": "tiiuae/falcon-refinedweb",
|
502 |
+
"columns": {
|
503 |
+
"prompt": "content"
|
504 |
+
}
|
505 |
+
},
|
506 |
+
"redpajama_v2": {
|
507 |
+
"hf_hub_url": "togethercomputer/RedPajama-Data-V2",
|
508 |
+
"columns": {
|
509 |
+
"prompt": "raw_content"
|
510 |
+
},
|
511 |
+
"subset": "default"
|
512 |
+
},
|
513 |
+
"wikipedia_en": {
|
514 |
+
"hf_hub_url": "olm/olm-wikipedia-20221220",
|
515 |
+
"ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
|
516 |
+
"columns": {
|
517 |
+
"prompt": "text"
|
518 |
+
}
|
519 |
+
},
|
520 |
+
"wikipedia_zh": {
|
521 |
+
"hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
|
522 |
+
"ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
|
523 |
+
"columns": {
|
524 |
+
"prompt": "completion"
|
525 |
+
}
|
526 |
+
},
|
527 |
+
"pile": {
|
528 |
+
"hf_hub_url": "monology/pile-uncopyrighted",
|
529 |
+
"ms_hub_url": "AI-ModelScope/pile",
|
530 |
+
"columns": {
|
531 |
+
"prompt": "text"
|
532 |
+
}
|
533 |
+
},
|
534 |
+
"skypile": {
|
535 |
+
"hf_hub_url": "Skywork/SkyPile-150B",
|
536 |
+
"ms_hub_url": "AI-ModelScope/SkyPile-150B",
|
537 |
+
"columns": {
|
538 |
+
"prompt": "text"
|
539 |
+
}
|
540 |
+
},
|
541 |
+
"fineweb": {
|
542 |
+
"hf_hub_url": "HuggingFaceFW/fineweb",
|
543 |
+
"columns": {
|
544 |
+
"prompt": "text"
|
545 |
+
}
|
546 |
+
},
|
547 |
+
"fineweb_edu": {
|
548 |
+
"hf_hub_url": "HuggingFaceFW/fineweb-edu",
|
549 |
+
"columns": {
|
550 |
+
"prompt": "text"
|
551 |
+
}
|
552 |
+
},
|
553 |
+
"the_stack": {
|
554 |
+
"hf_hub_url": "bigcode/the-stack",
|
555 |
+
"ms_hub_url": "AI-ModelScope/the-stack",
|
556 |
+
"columns": {
|
557 |
+
"prompt": "content"
|
558 |
+
}
|
559 |
+
},
|
560 |
+
"starcoder_python": {
|
561 |
+
"hf_hub_url": "bigcode/starcoderdata",
|
562 |
+
"ms_hub_url": "AI-ModelScope/starcoderdata",
|
563 |
+
"columns": {
|
564 |
+
"prompt": "content"
|
565 |
+
},
|
566 |
+
"folder": "python"
|
567 |
+
}
|
568 |
+
}
|
notebooks/01_Finetune-Llama3-with-LLaMA-Factory.ipynb
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[{"file_id":"1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9","timestamp":1719737717483}],"gpuType":"T4"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["# Finetune Llama-3 with LLaMA Factory\n","\n","Please use a **free** Tesla T4 Colab GPU to run this!\n","\n","Project homepage: https://github.com/hiyouga/LLaMA-Factory"],"metadata":{"id":"1oHFCsV0z-Jw"}},{"cell_type":"markdown","source":["## Install Dependencies"],"metadata":{"id":"lr7rB3szzhtx"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"giM74oK1rRIH"},"outputs":[],"source":["%cd /content/\n","%rm -rf LLaMA-Factory\n","!git clone https://github.com/hiyouga/LLaMA-Factory.git\n","%cd LLaMA-Factory\n","%ls\n","!pip install -e .[torch,bitsandbytes]"]},{"cell_type":"markdown","source":["### Check GPU environment"],"metadata":{"id":"H9RXn_YQnn9f"}},{"cell_type":"code","source":["import torch\n","try:\n"," assert torch.cuda.is_available() is True\n","except AssertionError:\n"," print(\"Please set up a GPU before using LLaMA Factory: https://medium.com/mlearning-ai/training-yolov4-on-google-colab-316f8fff99c6\")"],"metadata":{"id":"ZkN-ktlsnrdU"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Update Identity Dataset"],"metadata":{"id":"TeYs5Lz-QJYk"}},{"cell_type":"code","source":["import json\n","\n","%cd /content/LLaMA-Factory/\n","\n","NAME = \"Llama-3\"\n","AUTHOR = \"LLaMA Factory\"\n","\n","with open(\"data/identity.json\", \"r\", encoding=\"utf-8\") as f:\n"," dataset = json.load(f)\n","\n","for sample in dataset:\n"," sample[\"output\"] = sample[\"output\"].replace(\"{{\"+ \"name\" + \"}}\", NAME).replace(\"{{\"+ \"author\" + \"}}\", AUTHOR)\n","\n","with open(\"data/identity.json\", \"w\", encoding=\"utf-8\") as f:\n"," json.dump(dataset, f, indent=2, ensure_ascii=False)"],"metadata":{"id":"ap_fvMBsQHJc"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Fine-tune model via LLaMA Board"],"metadata":{"id":"2QiXcvdzzW3Y"}},{"cell_type":"code","source":["%cd /content/LLaMA-Factory/\n","!GRADIO_SHARE=1 llamafactory-cli webui"],"metadata":{"id":"YLsdS6V5yUMy"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Fine-tune model via Command Line\n","\n","It takes ~30min for training."],"metadata":{"id":"rgR3UFhB0Ifq"}},{"cell_type":"code","source":["import json\n","\n","args = dict(\n"," stage=\"sft\", # do supervised fine-tuning\n"," do_train=True,\n"," model_name_or_path=\"unsloth/llama-3-8b-Instruct-bnb-4bit\", # use bnb-4bit-quantized Llama-3-8B-Instruct model\n"," dataset=\"identity,alpaca_en_demo\", # use alpaca and identity datasets\n"," template=\"llama3\", # use llama3 prompt template\n"," finetuning_type=\"lora\", # use LoRA adapters to save memory\n"," lora_target=\"all\", # attach LoRA adapters to all linear layers\n"," output_dir=\"llama3_lora\", # the path to save LoRA adapters\n"," per_device_train_batch_size=2, # the batch size\n"," gradient_accumulation_steps=4, # the gradient accumulation steps\n"," lr_scheduler_type=\"cosine\", # use cosine learning rate scheduler\n"," logging_steps=10, # log every 10 steps\n"," warmup_ratio=0.1, # use warmup scheduler\n"," save_steps=1000, # save checkpoint every 1000 steps\n"," learning_rate=5e-5, # the learning rate\n"," num_train_epochs=3.0, # the epochs of training\n"," max_samples=500, # use 500 examples in each dataset\n"," max_grad_norm=1.0, # clip gradient norm to 1.0\n"," quantization_bit=4, # use 4-bit QLoRA\n"," loraplus_lr_ratio=16.0, # use LoRA+ algorithm with lambda=16.0\n"," fp16=True, # use float16 mixed precision training\n",")\n","\n","json.dump(args, open(\"train_llama3.json\", \"w\", encoding=\"utf-8\"), indent=2)\n","\n","%cd /content/LLaMA-Factory/\n","\n","!llamafactory-cli train train_llama3.json"],"metadata":{"id":"CS0Qk5OR0i4Q"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Infer the fine-tuned model"],"metadata":{"id":"PVNaC-xS5N40"}},{"cell_type":"code","source":["from llamafactory.chat import ChatModel\n","from llamafactory.extras.misc import torch_gc\n","\n","%cd /content/LLaMA-Factory/\n","\n","args = dict(\n"," model_name_or_path=\"unsloth/llama-3-8b-Instruct-bnb-4bit\", # use bnb-4bit-quantized Llama-3-8B-Instruct model\n"," adapter_name_or_path=\"llama3_lora\", # load the saved LoRA adapters\n"," template=\"llama3\", # same to the one in training\n"," finetuning_type=\"lora\", # same to the one in training\n"," quantization_bit=4, # load 4-bit quantized model\n",")\n","chat_model = ChatModel(args)\n","\n","messages = []\n","print(\"Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.\")\n","while True:\n"," query = input(\"\\nUser: \")\n"," if query.strip() == \"exit\":\n"," break\n"," if query.strip() == \"clear\":\n"," messages = []\n"," torch_gc()\n"," print(\"History has been removed.\")\n"," continue\n","\n"," messages.append({\"role\": \"user\", \"content\": query})\n"," print(\"Assistant: \", end=\"\", flush=True)\n","\n"," response = \"\"\n"," for new_text in chat_model.stream_chat(messages):\n"," print(new_text, end=\"\", flush=True)\n"," response += new_text\n"," print()\n"," messages.append({\"role\": \"assistant\", \"content\": response})\n","\n","torch_gc()"],"metadata":{"id":"oh8H9A_25SF9"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Merge the LoRA adapter and optionally upload model\n","\n","NOTE: the Colab free version has merely 12GB RAM, where merging LoRA of a 8B model needs at least 18GB RAM, thus you **cannot** perform it in the free version."],"metadata":{"id":"kTESHaFvbNTr"}},{"cell_type":"code","source":["!huggingface-cli login"],"metadata":{"id":"mcNcHcA4bf4Z"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import json\n","\n","args = dict(\n"," model_name_or_path=\"meta-llama/Meta-Llama-3-8B-Instruct\", # use official non-quantized Llama-3-8B-Instruct model\n"," adapter_name_or_path=\"llama3_lora\", # load the saved LoRA adapters\n"," template=\"llama3\", # same to the one in training\n"," finetuning_type=\"lora\", # same to the one in training\n"," export_dir=\"llama3_lora_merged\", # the path to save the merged model\n"," export_size=2, # the file shard size (in GB) of the merged model\n"," export_device=\"cpu\", # the device used in export, can be chosen from `cpu` and `cuda`\n"," #export_hub_model_id=\"your_id/your_model\", # the Hugging Face hub ID to upload model\n",")\n","\n","json.dump(args, open(\"merge_llama3.json\", \"w\", encoding=\"utf-8\"), indent=2)\n","\n","%cd /content/LLaMA-Factory/\n","\n","!llamafactory-cli export merge_llama3.json"],"metadata":{"id":"IMojogHbaOZF"},"execution_count":null,"outputs":[]}]}
|
novel-translation/00_Data_Analysis.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
novel-translation/01_Qwen2-0.5B_Unsloth.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
novel-translation/02_Qwen2-1.5B_Unsloth.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
novel-translation/03_Qwen2-0.5B_1.5B-4bit.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
novel-translation/04_tune-small-no-flash-attn.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
novel-translation/05_tune-small-with-flash-attn.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
novel-translation/06_tune-small-py3.11.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
novel-translation/07_tune-lf-py3.11.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
novel-translation/08_eval-lf-py3.11.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -10,5 +10,5 @@ scikit-learn==1.5.0
|
|
10 |
jupyter
|
11 |
ipywidgets
|
12 |
packaging
|
13 |
-
triton
|
14 |
-
xformers
|
|
|
10 |
jupyter
|
11 |
ipywidgets
|
12 |
packaging
|
13 |
+
# triton
|
14 |
+
# xformers
|
results/mac-results-colab.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
results/mac-results-colab.gsheet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fd488430f65a2b959d746b81e485a0b596f8e32537979904416dfc021b1181d
|
3 |
+
size 179
|
results/mac-results_lf.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5acc087808de5df6839cbf7b170094c6e63445aab4bea15e4be9564b905eb51
|
3 |
+
size 3236072
|
scripts/tune-lf.sh
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/../llama-factory
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
llamafactory-cli train $1
|