|
model: |
|
names: |
|
- fusion_mlp |
|
- hf_text |
|
- numerical_mlp |
|
- timm_image |
|
numerical_mlp: |
|
hidden_size: 128 |
|
activation: leaky_relu |
|
num_layers: 1 |
|
drop_rate: 0.1 |
|
normalization: layer_norm |
|
d_token: 8 |
|
embedding_arch: null |
|
data_types: |
|
- numerical |
|
merge: concat |
|
hf_text: |
|
checkpoint_name: local://hf_text |
|
gradient_checkpointing: false |
|
pooling_mode: cls |
|
data_types: |
|
- text |
|
tokenizer_name: hf_auto |
|
max_text_len: 512 |
|
insert_sep: true |
|
low_cpu_mem_usage: false |
|
text_segment_num: 2 |
|
stochastic_chunk: false |
|
text_aug_detect_length: 10 |
|
text_trivial_aug_maxscale: 0.0 |
|
text_train_augment_types: null |
|
timm_image: |
|
checkpoint_name: swin_base_patch4_window7_224 |
|
mix_choice: all_logits |
|
data_types: |
|
- image |
|
train_transforms: |
|
- resize_shorter_side |
|
- center_crop |
|
- trivial_augment |
|
val_transforms: |
|
- resize_shorter_side |
|
- center_crop |
|
image_norm: imagenet |
|
image_size: null |
|
max_img_num_per_col: 2 |
|
fusion_mlp: |
|
weight: 0.1 |
|
adapt_in_features: max |
|
hidden_sizes: |
|
- 128 |
|
activation: leaky_relu |
|
drop_rate: 0.1 |
|
normalization: layer_norm |
|
data_types: null |
|
data: |
|
image: |
|
missing_value_strategy: zero |
|
text: |
|
normalize_text: false |
|
categorical: |
|
minimum_cat_count: 100 |
|
maximum_num_cat: 20 |
|
convert_to_text: true |
|
numerical: |
|
convert_to_text: false |
|
scaler_with_mean: true |
|
scaler_with_std: true |
|
document: |
|
missing_value_strategy: zero |
|
label: |
|
numerical_label_preprocessing: standardscaler |
|
pos_label: null |
|
mixup: |
|
turn_on: false |
|
mixup_alpha: 0.8 |
|
cutmix_alpha: 1.0 |
|
cutmix_minmax: null |
|
prob: 1.0 |
|
switch_prob: 0.5 |
|
mode: batch |
|
turn_off_epoch: 5 |
|
label_smoothing: 0.1 |
|
templates: |
|
turn_on: false |
|
num_templates: 30 |
|
template_length: 2048 |
|
preset_templates: |
|
- super_glue |
|
- rte |
|
custom_templates: null |
|
optimization: |
|
optim_type: adamw |
|
learning_rate: 0.0001 |
|
weight_decay: 0.001 |
|
lr_choice: layerwise_decay |
|
lr_decay: 0.9 |
|
lr_schedule: cosine_decay |
|
max_epochs: 10 |
|
max_steps: -1 |
|
warmup_steps: 0.1 |
|
end_lr: 0 |
|
lr_mult: 1 |
|
patience: 10 |
|
val_check_interval: 0.5 |
|
check_val_every_n_epoch: 1 |
|
skip_final_val: false |
|
gradient_clip_val: 1 |
|
gradient_clip_algorithm: norm |
|
track_grad_norm: -1 |
|
log_every_n_steps: 10 |
|
top_k: 3 |
|
top_k_average_method: greedy_soup |
|
efficient_finetune: null |
|
lora: |
|
module_filter: null |
|
filter: |
|
- query |
|
- value |
|
- ^q$ |
|
- ^v$ |
|
- ^k$ |
|
- ^o$ |
|
r: 8 |
|
alpha: 8 |
|
loss_function: auto |
|
focal_loss: |
|
alpha: null |
|
gamma: 2.0 |
|
reduction: mean |
|
env: |
|
num_gpus: 1 |
|
num_nodes: 1 |
|
batch_size: 128 |
|
per_gpu_batch_size: 8 |
|
eval_batch_size_ratio: 4 |
|
per_gpu_batch_size_evaluation: null |
|
precision: 16 |
|
num_workers: 2 |
|
num_workers_evaluation: 2 |
|
fast_dev_run: false |
|
deterministic: false |
|
auto_select_gpus: true |
|
strategy: null |
|
deepspeed_allgather_size: 1000000000.0 |
|
deepspeed_allreduce_size: 1000000000.0 |
|
|