task_name: general model_name: bge model_dir: /iyunwen/nlpdata/PublicPretrainedModel/bge-base-zh/ use_deepspeed: true desc: "piccolo" train_method: "ewc" ewc_ratio: 10.0 cosent_ratio: 20.0 in_batch_ratio: 30.0 save_steps: 50 hard_neg_ratio: 0.2 in_batch_train_paths: # synthetic_qp里的qp还是bge的向量 synthetic_qp: - /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl - /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl # normal里的hard neg默认是bm25 normal: - /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_long_length_hard_neg.jsonl - /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_long_length_hard_neg.jsonl - /iyunwen/nlpdata/work/LP/Data/VecData/stella/mrc_data.jsonl - /iyunwen/nlpdata/work/LP/Data/VecData/stella/guowang_data.jsonl pair_train_paths: binclf: - /iyunwen/nlpdata/work/LP/Data/VecData/v2/binclf_data.jsonl nli: - /iyunwen/nlpdata/work/LP/Data/VecData/v2/nli_data.jsonl loader_idxs: null in_batch_bsz: 128 pair_bsz: 128 max_length: 512 auto_ouput_dir: false train_args: seed: 666 output_dir: /iyunwen/nlpdata/work/LP/model_path/vec_embedding/stella/s4/ evaluation_strategy: "no" num_train_epochs: 4 logging_steps: 9999999 eval_steps: 9999999 per_device_train_batch_size: 128 gradient_accumulation_steps: 1 per_device_eval_batch_size: 32 learning_rate: 5.0e-06 weight_decay: 0.00001 warmup_ratio: 0.05 lr_scheduler_type: "linear" dataloader_drop_last: false fp16: true gradient_checkpointing: true deepspeed: fp16: enabled: true hysteresis: 2 initial_scale_power: 16 loss_scale: 0 loss_scale_window: 1000 min_loss_scale: 1 train_micro_batch_size_per_gpu: 128 train_batch_size: "auto" gradient_accumulation_steps: 1 gradient_clipping: auto optimizer: params: adam_w_mode: true lr: 1e-6 torch_adam: true weight_decay: auto type: AdamW scheduler: params: total_num_steps: auto warmup_max_lr: auto warmup_min_lr: auto warmup_num_steps: auto type: WarmupDecayLR steps_per_print: 4 wall_clock_breakdown: false zero_optimization: allgather_bucket_size: 200000000.0 allgather_partitions: true contiguous_gradients: true overlap_comm: true reduce_bucket_size: auto reduce_scatter: true stage: 0