tests

File size: 5,465 Bytes

c5aa5a6

model:
  names:
  - timm_image
  categorical_transformer:
    out_features: 192
    d_token: 192
    ffn_d_hidden: 192
    num_trans_blocks: 0
    num_attn_heads: 8
    residual_dropout: 0.0
    attention_dropout: 0.2
    ffn_dropout: 0.1
    normalization: layer_norm
    ffn_activation: reglu
    head_activation: relu
    data_types:
    - categorical
    additive_attention: false
    share_qv_weights: false
  numerical_transformer:
    out_features: 192
    d_token: 192
    ffn_d_hidden: 192
    num_trans_blocks: 0
    num_attn_heads: 8
    residual_dropout: 0.0
    attention_dropout: 0.2
    ffn_dropout: 0.1
    normalization: layer_norm
    ffn_activation: reglu
    head_activation: relu
    data_types:
    - numerical
    embedding_arch:
    - linear
    - relu
    merge: concat
    additive_attention: false
    share_qv_weights: false
  ner_text:
    checkpoint_name: bert-base-cased
    max_text_len: 512
    gradient_checkpointing: false
    low_cpu_mem_usage: false
    data_types:
    - text
    tokenizer_name: hf_auto
    insert_sep: false
    text_segment_num: 2
    stochastic_chunk: false
    special_tags:
    - X
    - O
  t_few:
    checkpoint_name: t5-small
    gradient_checkpointing: false
    data_types:
    - text
    tokenizer_name: hf_auto
    length_norm: 1.0
    unlikely_loss: 1.0
    mc_loss: 1.0
    max_text_len: 512
    text_segment_num: 2
    insert_sep: true
    low_cpu_mem_usage: false
    stochastic_chunk: false
    text_aug_detect_length: 10
    text_trivial_aug_maxscale: 0.0
  timm_image:
    checkpoint_name: swin_base_patch4_window7_224
    mix_choice: all_logits
    data_types:
    - image
    train_transform_types:
    - resize_shorter_side
    - center_crop
    - trivial_augment
    val_transform_types:
    - resize_shorter_side
    - center_crop
    image_norm: imagenet
    image_size: 224
    max_img_num_per_col: 2
  mmdet_image:
    checkpoint_name: yolov3_mobilenetv2_320_300e_coco
    data_types:
    - image
    train_transform_types:
    - resize_shorter_side
    - center_crop
    - trivial_augment
    val_transform_types:
    - resize_shorter_side
    - center_crop
    image_norm: imagenet
    image_size: 224
    max_img_num_per_col: 2
  mmocr_text_detection:
    checkpoint_name: TextSnake
    data_types:
    - image
    train_transform_types:
    - resize_shorter_side
    - center_crop
    - trivial_augment
    val_transform_types:
    - resize_shorter_side
    - center_crop
    image_norm: imagenet
    image_size: 224
    max_img_num_per_col: 2
  mmocr_text_recognition:
    checkpoint_name: ABINet
    data_types:
    - image
    train_transform_types:
    - resize_shorter_side
    - center_crop
    - trivial_augment
    val_transform_types:
    - resize_shorter_side
    - center_crop
    image_norm: imagenet
    image_size: 224
    max_img_num_per_col: 2
  clip:
    checkpoint_name: openai/clip-vit-base-patch32
    data_types:
    - image
    - text
    train_transform_types:
    - resize_shorter_side
    - center_crop
    - trivial_augment
    val_transform_types:
    - resize_shorter_side
    - center_crop
    image_norm: clip
    image_size: 224
    max_img_num_per_col: 2
    tokenizer_name: clip
    max_text_len: 77
    insert_sep: false
    text_segment_num: 1
    stochastic_chunk: false
    text_aug_detect_length: 10
    text_trivial_aug_maxscale: 0.0
    text_train_augment_types: null
  fusion_transformer:
    hidden_size: 192
    n_blocks: 3
    attention_n_heads: 8
    adapt_in_features: max
    attention_dropout: 0.2
    residual_dropout: 0.0
    ffn_dropout: 0.1
    ffn_d_hidden: 192
    normalization: layer_norm
    ffn_activation: geglu
    head_activation: relu
    data_types: null
    additive_attention: false
    share_qv_weights: false
data:
  image:
    missing_value_strategy: skip
  text:
    normalize_text: false
  categorical:
    minimum_cat_count: 100
    maximum_num_cat: 20
    convert_to_text: true
  numerical:
    convert_to_text: false
    scaler_with_mean: true
    scaler_with_std: true
  label:
    numerical_label_preprocessing: standardscaler
  pos_label: null
  mixup:
    turn_on: false
    mixup_alpha: 0.8
    cutmix_alpha: 1.0
    cutmix_minmax: null
    prob: 1.0
    switch_prob: 0.5
    mode: batch
    turn_off_epoch: 5
    label_smoothing: 0.1
  templates:
    turn_on: false
    num_templates: 30
    template_length: 2048
    preset_templates:
    - super_glue
    - rte
    custom_templates: null
optimization:
  optim_type: adamw
  learning_rate: 0.001
  weight_decay: 0.001
  lr_choice: layerwise_decay
  lr_decay: 0.9
  lr_schedule: cosine_decay
  max_epochs: 10
  max_steps: -1
  warmup_steps: 0.1
  end_lr: 0
  lr_mult: 1
  patience: 10
  val_check_interval: 0.5
  check_val_every_n_epoch: 1
  gradient_clip_val: 1
  gradient_clip_algorithm: norm
  track_grad_norm: -1
  log_every_n_steps: 10
  val_metric: null
  top_k: 3
  top_k_average_method: best
  efficient_finetune: null
  lora:
    module_filter: null
    filter:
    - query
    - value
    - ^q$
    - ^v$
    - ^k$
    - ^o$
    r: 8
    alpha: 8
  loss_function: auto
env:
  num_gpus: 4
  num_nodes: 1
  batch_size: 128
  per_gpu_batch_size: 32
  eval_batch_size_ratio: 4
  per_gpu_batch_size_evaluation: null
  precision: 16
  num_workers: 2
  num_workers_evaluation: 2
  fast_dev_run: false
  deterministic: false
  auto_select_gpus: true
  strategy: ddp
  deepspeed_allgather_size: 1000000000.0
  deepspeed_allreduce_size: 1000000000.0