|
AMP: true |
|
CUDNN_BENCHMARK: true |
|
CUDNN_DETERMINISTIC: false |
|
DATA: |
|
EOS_INDEX: 2 |
|
IMAGE_CROP_SIZE: 224 |
|
IMAGE_TRANSFORM_TRAIN: |
|
- random_resized_crop |
|
- horizontal_flip |
|
- color_jitter |
|
- normalize |
|
IMAGE_TRANSFORM_VAL: |
|
- smallest_resize |
|
- center_crop |
|
- normalize |
|
MASKED_LM: |
|
MASK_PROBABILITY: 0.85 |
|
MASK_PROPORTION: 0.15 |
|
REPLACE_PROBABILITY: 0.1 |
|
MASK_INDEX: 3 |
|
MAX_CAPTION_LENGTH: 50 |
|
ROOT: datasets/redcaps/tarfiles/*.tar |
|
SOS_INDEX: 1 |
|
TOKENIZER_MODEL: datasets/common_30k.model |
|
UNK_INDEX: 0 |
|
USE_PERCENTAGE: 100.0 |
|
USE_SINGLE_CAPTION: false |
|
VOCAB_SIZE: 30000 |
|
MODEL: |
|
DECODER: |
|
BEAM_SIZE: 5 |
|
MAX_DECODING_STEPS: 30 |
|
NAME: nucleus_sampling |
|
NUCLEUS_SIZE: 0.9 |
|
LABEL_SMOOTHING: 0.1 |
|
NAME: virtex_web |
|
TEXTUAL: |
|
DROPOUT: 0.1 |
|
NAME: transdec_prenorm::L6_H512_A8_F2048 |
|
VISUAL: |
|
FEATURE_SIZE: 2048 |
|
FROZEN: false |
|
NAME: torchvision::resnet50 |
|
PRETRAINED: false |
|
OPTIM: |
|
BATCH_SIZE: 256 |
|
CLIP_GRAD_NORM: 10.0 |
|
CNN_LR: 0.0005 |
|
LOOKAHEAD: |
|
ALPHA: 0.5 |
|
STEPS: 5 |
|
USE: false |
|
LR: 0.0005 |
|
LR_DECAY_NAME: cosine |
|
LR_GAMMA: 0.1 |
|
LR_STEPS: [] |
|
NO_DECAY: .*textual.(embedding|transformer).*(norm.*|bias) |
|
NUM_ITERATIONS: 1500000 |
|
OPTIMIZER_NAME: adamw |
|
SGD_MOMENTUM: 0.9 |
|
WARMUP_STEPS: 10000 |
|
WEIGHT_DECAY: 0.01 |
|
RANDOM_SEED: 0 |
|
|