|
sft: |
|
one_dataset_every_time: true |
|
lr: 0.00002 |
|
batch_size: 32 |
|
grad_accumulation_steps: 1 |
|
pad_token_id: 0 |
|
max_length: 1024 |
|
epochs: 5 |
|
weight_decay: 0.1 |
|
interval: 2000 |
|
torch_dtype: null |
|
model_path: "../ckpt/MiniLLM-0.2B-WithWudao/final/model.pt" |
|
config_path: "../config/MiniLLM-0.2B-WithWudao-SFT/bert4torch_config.json" |
|
save_dir: "../ckpt/MiniLLM-0.2B-WithWudao-SFT" |
|
dataset_save_dir: "../sft_data" |
|
|
|
data_process: |
|
MAX_LENGTH: 1024 |
|
pad_token_id: 0 |
|
eos_token_id: 2 |
|
dataset_src_dir: "/home/hfai/h01305/data/corpus/sft/common/" |
|
file_names: [ |
|
"Tongjilibo/self_cognition.json", |
|
"alpaca-zh/alpaca_gpt4_data_zh.json", |
|
"BelleGroup/Belle_open_source_0.5M.json", |
|
"BelleGroup/Belle_open_source_1M.json", |
|
"BelleGroup/school_math_0.25M.json", |
|
"deepctrl-sft-data/sft_data_zh.jsonl", |
|
"moss-002-sft-data/zh_helpfulness.json", |
|
"moss-002-sft-data/zh_honesty.json", |
|
"moss-003-sft-data/moss-003-sft-no-tools.jsonl", |
|
"CodeChat/continue_zh.jsonl", |
|
"CodeChat/continue_zh_2.jsonl", |
|
"ShareGPT-Chinese-English-90k/common_zh_70k.jsonl", |
|
"ShareGPT-Chinese-English-90k/computer_cn_26k_continue.jsonl", |
|
"ShareGPT-Chinese-English-90k/computer_zh_26k.jsonl", |
|
"ShareGPT-Chinese-English-90k/unknow_zh_38k.jsonl", |
|
"ShareGPT-Chinese-English-90k/unknow_zh_38k_continue.jsonl", |
|
"firefly-train-1.1M/firefly-train-1.1M.jsonl" |
|
] |
|
dataset_save_dir: "../sft_data" |
|
max_samples: null |
|
max_samples_per_file: 100000 |
|
name: MiniLLM |
|
author: Tongjilibo |
|
date: 2024年 |
|
|