mist-zh / train_config.yml
Erin's picture
Upload 7 files
42df499
raw
history blame
2.49 kB
task_name: general
model_name: bge
model_dir: /iyunwen/nlpdata/PublicPretrainedModel/bge-base-zh/
use_deepspeed: true
desc: "piccolo"
train_method: "ewc"
ewc_ratio: 10.0
cosent_ratio: 20.0
in_batch_ratio: 30.0
save_steps: 50
hard_neg_ratio: 0.2
in_batch_train_paths:
# synthetic_qp里的qp还是bge的向量
synthetic_qp:
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl
# normal里的hard neg默认是bm25
normal:
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_long_length_hard_neg.jsonl
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_long_length_hard_neg.jsonl
- /iyunwen/nlpdata/work/LP/Data/VecData/stella/mrc_data.jsonl
- /iyunwen/nlpdata/work/LP/Data/VecData/stella/guowang_data.jsonl
pair_train_paths:
binclf:
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/binclf_data.jsonl
nli:
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/nli_data.jsonl
loader_idxs: null
in_batch_bsz: 128
pair_bsz: 128
max_length: 512
auto_ouput_dir: false
train_args:
seed: 666
output_dir: /iyunwen/nlpdata/work/LP/model_path/vec_embedding/stella/s4/
evaluation_strategy: "no"
num_train_epochs: 4
logging_steps: 9999999
eval_steps: 9999999
per_device_train_batch_size: 128
gradient_accumulation_steps: 1
per_device_eval_batch_size: 32
learning_rate: 5.0e-06
weight_decay: 0.00001
warmup_ratio: 0.05
lr_scheduler_type: "linear"
dataloader_drop_last: false
fp16: true
gradient_checkpointing: true
deepspeed:
fp16:
enabled: true
hysteresis: 2
initial_scale_power: 16
loss_scale: 0
loss_scale_window: 1000
min_loss_scale: 1
train_micro_batch_size_per_gpu: 128
train_batch_size: "auto"
gradient_accumulation_steps: 1
gradient_clipping: auto
optimizer:
params:
adam_w_mode: true
lr: 1e-6
torch_adam: true
weight_decay: auto
type: AdamW
scheduler:
params:
total_num_steps: auto
warmup_max_lr: auto
warmup_min_lr: auto
warmup_num_steps: auto
type: WarmupDecayLR
steps_per_print: 4
wall_clock_breakdown: false
zero_optimization:
allgather_bucket_size: 200000000.0
allgather_partitions: true
contiguous_gradients: true
overlap_comm: true
reduce_bucket_size: auto
reduce_scatter: true
stage: 0