tests / config.yaml

Upload 10 files

c5aa5a6 verified about 1 year ago

5.47 kB

	model:
	names:
	- timm_image
	categorical_transformer:
	out_features: 192
	d_token: 192
	ffn_d_hidden: 192
	num_trans_blocks: 0
	num_attn_heads: 8
	residual_dropout: 0.0
	attention_dropout: 0.2
	ffn_dropout: 0.1
	normalization: layer_norm
	ffn_activation: reglu
	head_activation: relu
	data_types:
	- categorical
	additive_attention: false
	share_qv_weights: false
	numerical_transformer:
	out_features: 192
	d_token: 192
	ffn_d_hidden: 192
	num_trans_blocks: 0
	num_attn_heads: 8
	residual_dropout: 0.0
	attention_dropout: 0.2
	ffn_dropout: 0.1
	normalization: layer_norm
	ffn_activation: reglu
	head_activation: relu
	data_types:
	- numerical
	embedding_arch:
	- linear
	- relu
	merge: concat
	additive_attention: false
	share_qv_weights: false
	ner_text:
	checkpoint_name: bert-base-cased
	max_text_len: 512
	gradient_checkpointing: false
	low_cpu_mem_usage: false
	data_types:
	- text
	tokenizer_name: hf_auto
	insert_sep: false
	text_segment_num: 2
	stochastic_chunk: false
	special_tags:
	- X
	- O
	t_few:
	checkpoint_name: t5-small
	gradient_checkpointing: false
	data_types:
	- text
	tokenizer_name: hf_auto
	length_norm: 1.0
	unlikely_loss: 1.0
	mc_loss: 1.0
	max_text_len: 512
	text_segment_num: 2
	insert_sep: true
	low_cpu_mem_usage: false
	stochastic_chunk: false
	text_aug_detect_length: 10
	text_trivial_aug_maxscale: 0.0
	timm_image:
	checkpoint_name: swin_base_patch4_window7_224
	mix_choice: all_logits
	data_types:
	- image
	train_transform_types:
	- resize_shorter_side
	- center_crop
	- trivial_augment
	val_transform_types:
	- resize_shorter_side
	- center_crop
	image_norm: imagenet
	image_size: 224
	max_img_num_per_col: 2
	mmdet_image:
	checkpoint_name: yolov3_mobilenetv2_320_300e_coco
	data_types:
	- image
	train_transform_types:
	- resize_shorter_side
	- center_crop
	- trivial_augment
	val_transform_types:
	- resize_shorter_side
	- center_crop
	image_norm: imagenet
	image_size: 224
	max_img_num_per_col: 2
	mmocr_text_detection:
	checkpoint_name: TextSnake
	data_types:
	- image
	train_transform_types:
	- resize_shorter_side
	- center_crop
	- trivial_augment
	val_transform_types:
	- resize_shorter_side
	- center_crop
	image_norm: imagenet
	image_size: 224
	max_img_num_per_col: 2
	mmocr_text_recognition:
	checkpoint_name: ABINet
	data_types:
	- image
	train_transform_types:
	- resize_shorter_side
	- center_crop
	- trivial_augment
	val_transform_types:
	- resize_shorter_side
	- center_crop
	image_norm: imagenet
	image_size: 224
	max_img_num_per_col: 2
	clip:
	checkpoint_name: openai/clip-vit-base-patch32
	data_types:
	- image
	- text
	train_transform_types:
	- resize_shorter_side
	- center_crop
	- trivial_augment
	val_transform_types:
	- resize_shorter_side
	- center_crop
	image_norm: clip
	image_size: 224
	max_img_num_per_col: 2
	tokenizer_name: clip
	max_text_len: 77
	insert_sep: false
	text_segment_num: 1
	stochastic_chunk: false
	text_aug_detect_length: 10
	text_trivial_aug_maxscale: 0.0
	text_train_augment_types: null
	fusion_transformer:
	hidden_size: 192
	n_blocks: 3
	attention_n_heads: 8
	adapt_in_features: max
	attention_dropout: 0.2
	residual_dropout: 0.0
	ffn_dropout: 0.1
	ffn_d_hidden: 192
	normalization: layer_norm
	ffn_activation: geglu
	head_activation: relu
	data_types: null
	additive_attention: false
	share_qv_weights: false
	data:
	image:
	missing_value_strategy: skip
	text:
	normalize_text: false
	categorical:
	minimum_cat_count: 100
	maximum_num_cat: 20
	convert_to_text: true
	numerical:
	convert_to_text: false
	scaler_with_mean: true
	scaler_with_std: true
	label:
	numerical_label_preprocessing: standardscaler
	pos_label: null
	mixup:
	turn_on: false
	mixup_alpha: 0.8
	cutmix_alpha: 1.0
	cutmix_minmax: null
	prob: 1.0
	switch_prob: 0.5
	mode: batch
	turn_off_epoch: 5
	label_smoothing: 0.1
	templates:
	turn_on: false
	num_templates: 30
	template_length: 2048
	preset_templates:
	- super_glue
	- rte
	custom_templates: null
	optimization:
	optim_type: adamw
	learning_rate: 0.001
	weight_decay: 0.001
	lr_choice: layerwise_decay
	lr_decay: 0.9
	lr_schedule: cosine_decay
	max_epochs: 10
	max_steps: -1
	warmup_steps: 0.1
	end_lr: 0
	lr_mult: 1
	patience: 10
	val_check_interval: 0.5
	check_val_every_n_epoch: 1
	gradient_clip_val: 1
	gradient_clip_algorithm: norm
	track_grad_norm: -1
	log_every_n_steps: 10
	val_metric: null
	top_k: 3
	top_k_average_method: best
	efficient_finetune: null
	lora:
	module_filter: null
	filter:
	- query
	- value
	- ^q$
	- ^v$
	- ^k$
	- ^o$
	r: 8
	alpha: 8
	loss_function: auto
	env:
	num_gpus: 4
	num_nodes: 1
	batch_size: 128
	per_gpu_batch_size: 32
	eval_batch_size_ratio: 4
	per_gpu_batch_size_evaluation: null
	precision: 16
	num_workers: 2
	num_workers_evaluation: 2
	fast_dev_run: false
	deterministic: false
	auto_select_gpus: true
	strategy: ddp
	deepspeed_allgather_size: 1000000000.0
	deepspeed_allreduce_size: 1000000000.0