albef-vqa / configs /retrieval.yaml
ryanramos's picture
Add source code
d1b8c9b
raw
history blame
1.63 kB
hidden_size: &hidden_size 768
vocab_size: &vocab_size 30522
type_vocab_size: &type_vocab_size 2
max_position_embeddings: &max_position_embeddings 512
pad_token_id: &pad_token_id 0
embed_size: &embed_size 256
seed: 42
world_size: 1
device: "cuda"
dist_url: "env://"
output_path: "./examples/albef/outputs/retrieval_output.pt"
datamodule_args:
train_files: ["./examples/albef/data_files/coco_train.json"]
test_files: ["./examples/albef/data_files/coco_test.json"]
image_root: "./examples/albef/data_files/coco"
batch_size: 32
num_workers: 8
vision_encoder_args:
hidden_size: *hidden_size
image_size: 384
patch_size: 16
num_hidden_layers: 12
num_attention_heads: 12
mlp_dim: 3072
dropout: 0.0
attention_dropout: 0.0
layer_norm_eps: 1e-6
text_encoder_args:
vocab_size: *vocab_size
hidden_size: *hidden_size
type_vocab_size: *type_vocab_size
max_position_embeddings: *max_position_embeddings
pad_token_id: *pad_token_id
num_hidden_layers: 6
num_attention_heads: 12
intermediate_size: 3072
layer_norm_eps: 1e-12
dropout: 0.0
multimodal_encoder_args:
hidden_size: *hidden_size
num_hidden_layers: 6
num_attention_heads: 12
intermediate_size: 3072
layer_norm_eps: 1e-12
projection_args:
in_features: *hidden_size
out_features: *embed_size
similarity_args:
embed_size: *embed_size
queue_size: 65536
temp: 0.07
training_args:
log_every_n_steps: 100
alpha: 0.4
weight_decay: 0.02
lr: 1e-5
min_lr: 1e-6
max_epochs: 5
step_size: 100
warmup_steps: 1
checkpoint_root: "./examples/albef/checkpoints"
eval_args:
log_every_n_steps: 100
k_test: 256