tests / config.yaml
chriskasparaws's picture
Upload 10 files
c5aa5a6 verified
model:
names:
- timm_image
categorical_transformer:
out_features: 192
d_token: 192
ffn_d_hidden: 192
num_trans_blocks: 0
num_attn_heads: 8
residual_dropout: 0.0
attention_dropout: 0.2
ffn_dropout: 0.1
normalization: layer_norm
ffn_activation: reglu
head_activation: relu
data_types:
- categorical
additive_attention: false
share_qv_weights: false
numerical_transformer:
out_features: 192
d_token: 192
ffn_d_hidden: 192
num_trans_blocks: 0
num_attn_heads: 8
residual_dropout: 0.0
attention_dropout: 0.2
ffn_dropout: 0.1
normalization: layer_norm
ffn_activation: reglu
head_activation: relu
data_types:
- numerical
embedding_arch:
- linear
- relu
merge: concat
additive_attention: false
share_qv_weights: false
ner_text:
checkpoint_name: bert-base-cased
max_text_len: 512
gradient_checkpointing: false
low_cpu_mem_usage: false
data_types:
- text
tokenizer_name: hf_auto
insert_sep: false
text_segment_num: 2
stochastic_chunk: false
special_tags:
- X
- O
t_few:
checkpoint_name: t5-small
gradient_checkpointing: false
data_types:
- text
tokenizer_name: hf_auto
length_norm: 1.0
unlikely_loss: 1.0
mc_loss: 1.0
max_text_len: 512
text_segment_num: 2
insert_sep: true
low_cpu_mem_usage: false
stochastic_chunk: false
text_aug_detect_length: 10
text_trivial_aug_maxscale: 0.0
timm_image:
checkpoint_name: swin_base_patch4_window7_224
mix_choice: all_logits
data_types:
- image
train_transform_types:
- resize_shorter_side
- center_crop
- trivial_augment
val_transform_types:
- resize_shorter_side
- center_crop
image_norm: imagenet
image_size: 224
max_img_num_per_col: 2
mmdet_image:
checkpoint_name: yolov3_mobilenetv2_320_300e_coco
data_types:
- image
train_transform_types:
- resize_shorter_side
- center_crop
- trivial_augment
val_transform_types:
- resize_shorter_side
- center_crop
image_norm: imagenet
image_size: 224
max_img_num_per_col: 2
mmocr_text_detection:
checkpoint_name: TextSnake
data_types:
- image
train_transform_types:
- resize_shorter_side
- center_crop
- trivial_augment
val_transform_types:
- resize_shorter_side
- center_crop
image_norm: imagenet
image_size: 224
max_img_num_per_col: 2
mmocr_text_recognition:
checkpoint_name: ABINet
data_types:
- image
train_transform_types:
- resize_shorter_side
- center_crop
- trivial_augment
val_transform_types:
- resize_shorter_side
- center_crop
image_norm: imagenet
image_size: 224
max_img_num_per_col: 2
clip:
checkpoint_name: openai/clip-vit-base-patch32
data_types:
- image
- text
train_transform_types:
- resize_shorter_side
- center_crop
- trivial_augment
val_transform_types:
- resize_shorter_side
- center_crop
image_norm: clip
image_size: 224
max_img_num_per_col: 2
tokenizer_name: clip
max_text_len: 77
insert_sep: false
text_segment_num: 1
stochastic_chunk: false
text_aug_detect_length: 10
text_trivial_aug_maxscale: 0.0
text_train_augment_types: null
fusion_transformer:
hidden_size: 192
n_blocks: 3
attention_n_heads: 8
adapt_in_features: max
attention_dropout: 0.2
residual_dropout: 0.0
ffn_dropout: 0.1
ffn_d_hidden: 192
normalization: layer_norm
ffn_activation: geglu
head_activation: relu
data_types: null
additive_attention: false
share_qv_weights: false
data:
image:
missing_value_strategy: skip
text:
normalize_text: false
categorical:
minimum_cat_count: 100
maximum_num_cat: 20
convert_to_text: true
numerical:
convert_to_text: false
scaler_with_mean: true
scaler_with_std: true
label:
numerical_label_preprocessing: standardscaler
pos_label: null
mixup:
turn_on: false
mixup_alpha: 0.8
cutmix_alpha: 1.0
cutmix_minmax: null
prob: 1.0
switch_prob: 0.5
mode: batch
turn_off_epoch: 5
label_smoothing: 0.1
templates:
turn_on: false
num_templates: 30
template_length: 2048
preset_templates:
- super_glue
- rte
custom_templates: null
optimization:
optim_type: adamw
learning_rate: 0.001
weight_decay: 0.001
lr_choice: layerwise_decay
lr_decay: 0.9
lr_schedule: cosine_decay
max_epochs: 10
max_steps: -1
warmup_steps: 0.1
end_lr: 0
lr_mult: 1
patience: 10
val_check_interval: 0.5
check_val_every_n_epoch: 1
gradient_clip_val: 1
gradient_clip_algorithm: norm
track_grad_norm: -1
log_every_n_steps: 10
val_metric: null
top_k: 3
top_k_average_method: best
efficient_finetune: null
lora:
module_filter: null
filter:
- query
- value
- ^q$
- ^v$
- ^k$
- ^o$
r: 8
alpha: 8
loss_function: auto
env:
num_gpus: 4
num_nodes: 1
batch_size: 128
per_gpu_batch_size: 32
eval_batch_size_ratio: 4
per_gpu_batch_size_evaluation: null
precision: 16
num_workers: 2
num_workers_evaluation: 2
fast_dev_run: false
deterministic: false
auto_select_gpus: true
strategy: ddp
deepspeed_allgather_size: 1000000000.0
deepspeed_allreduce_size: 1000000000.0