Erin
/

mist-zh

Feature Extraction

text-embeddings-inference

Inference Endpoints

Model card Files Files and versions Community

mist-zh / train_config.yml

Erin's picture

Upload 7 files

42df499 about 1 year ago

2.49 kB

	task_name: general
	model_name: bge
	model_dir: /iyunwen/nlpdata/PublicPretrainedModel/bge-base-zh/
	use_deepspeed: true
	desc: "piccolo"
	train_method: "ewc"
	ewc_ratio: 10.0
	cosent_ratio: 20.0
	in_batch_ratio: 30.0
	save_steps: 50
	hard_neg_ratio: 0.2
	in_batch_train_paths:
	# synthetic_qp里的qp还是bge的向量
	synthetic_qp:
	- /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl
	- /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl
	# normal里的hard neg默认是bm25
	normal:
	- /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_long_length_hard_neg.jsonl
	- /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_long_length_hard_neg.jsonl
	- /iyunwen/nlpdata/work/LP/Data/VecData/stella/mrc_data.jsonl
	- /iyunwen/nlpdata/work/LP/Data/VecData/stella/guowang_data.jsonl


	pair_train_paths:
	binclf:
	- /iyunwen/nlpdata/work/LP/Data/VecData/v2/binclf_data.jsonl
	nli:
	- /iyunwen/nlpdata/work/LP/Data/VecData/v2/nli_data.jsonl

	loader_idxs: null
	in_batch_bsz: 128
	pair_bsz: 128
	max_length: 512

	auto_ouput_dir: false
	train_args:
	seed: 666
	output_dir: /iyunwen/nlpdata/work/LP/model_path/vec_embedding/stella/s4/
	evaluation_strategy: "no"
	num_train_epochs: 4
	logging_steps: 9999999
	eval_steps: 9999999
	per_device_train_batch_size: 128
	gradient_accumulation_steps: 1
	per_device_eval_batch_size: 32
	learning_rate: 5.0e-06
	weight_decay: 0.00001
	warmup_ratio: 0.05
	lr_scheduler_type: "linear"
	dataloader_drop_last: false

	fp16: true
	gradient_checkpointing: true
	deepspeed:
	fp16:
	enabled: true
	hysteresis: 2
	initial_scale_power: 16
	loss_scale: 0
	loss_scale_window: 1000
	min_loss_scale: 1
	train_micro_batch_size_per_gpu: 128
	train_batch_size: "auto"
	gradient_accumulation_steps: 1
	gradient_clipping: auto
	optimizer:
	params:
	adam_w_mode: true
	lr: 1e-6
	torch_adam: true
	weight_decay: auto
	type: AdamW
	scheduler:
	params:
	total_num_steps: auto
	warmup_max_lr: auto
	warmup_min_lr: auto
	warmup_num_steps: auto
	type: WarmupDecayLR
	steps_per_print: 4
	wall_clock_breakdown: false
	zero_optimization:
	allgather_bucket_size: 200000000.0
	allgather_partitions: true
	contiguous_gradients: true
	overlap_comm: true
	reduce_bucket_size: auto
	reduce_scatter: true
	stage: 0