|
task_name: general |
|
model_name: bge |
|
model_dir: /iyunwen/nlpdata/PublicPretrainedModel/bge-base-zh/ |
|
use_deepspeed: true |
|
desc: "piccolo" |
|
train_method: "ewc" |
|
ewc_ratio: 10.0 |
|
cosent_ratio: 20.0 |
|
in_batch_ratio: 30.0 |
|
save_steps: 50 |
|
hard_neg_ratio: 0.2 |
|
in_batch_train_paths: |
|
|
|
synthetic_qp: |
|
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl |
|
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl |
|
|
|
normal: |
|
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_long_length_hard_neg.jsonl |
|
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_long_length_hard_neg.jsonl |
|
- /iyunwen/nlpdata/work/LP/Data/VecData/stella/mrc_data.jsonl |
|
- /iyunwen/nlpdata/work/LP/Data/VecData/stella/guowang_data.jsonl |
|
|
|
|
|
pair_train_paths: |
|
binclf: |
|
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/binclf_data.jsonl |
|
nli: |
|
- /iyunwen/nlpdata/work/LP/Data/VecData/v2/nli_data.jsonl |
|
|
|
loader_idxs: null |
|
in_batch_bsz: 128 |
|
pair_bsz: 128 |
|
max_length: 512 |
|
|
|
auto_ouput_dir: false |
|
train_args: |
|
seed: 666 |
|
output_dir: /iyunwen/nlpdata/work/LP/model_path/vec_embedding/stella/s4/ |
|
evaluation_strategy: "no" |
|
num_train_epochs: 4 |
|
logging_steps: 9999999 |
|
eval_steps: 9999999 |
|
per_device_train_batch_size: 128 |
|
gradient_accumulation_steps: 1 |
|
per_device_eval_batch_size: 32 |
|
learning_rate: 5.0e-06 |
|
weight_decay: 0.00001 |
|
warmup_ratio: 0.05 |
|
lr_scheduler_type: "linear" |
|
dataloader_drop_last: false |
|
|
|
fp16: true |
|
gradient_checkpointing: true |
|
deepspeed: |
|
fp16: |
|
enabled: true |
|
hysteresis: 2 |
|
initial_scale_power: 16 |
|
loss_scale: 0 |
|
loss_scale_window: 1000 |
|
min_loss_scale: 1 |
|
train_micro_batch_size_per_gpu: 128 |
|
train_batch_size: "auto" |
|
gradient_accumulation_steps: 1 |
|
gradient_clipping: auto |
|
optimizer: |
|
params: |
|
adam_w_mode: true |
|
lr: 1e-6 |
|
torch_adam: true |
|
weight_decay: auto |
|
type: AdamW |
|
scheduler: |
|
params: |
|
total_num_steps: auto |
|
warmup_max_lr: auto |
|
warmup_min_lr: auto |
|
warmup_num_steps: auto |
|
type: WarmupDecayLR |
|
steps_per_print: 4 |
|
wall_clock_breakdown: false |
|
zero_optimization: |
|
allgather_bucket_size: 200000000.0 |
|
allgather_partitions: true |
|
contiguous_gradients: true |
|
overlap_comm: true |
|
reduce_bucket_size: auto |
|
reduce_scatter: true |
|
stage: 0 |
|
|
|
|