vilt-vqa2-ft / config.json
csarron's picture
add model
e315025
{
"_name_or_path": "/data/ckpt/converted/vilt_pretrain",
"architectures": [
"ViltForQuestionAnswering"
],
"bos_token_id": 0,
"contrast_method": "none",
"distill": false,
"draw_false_text": 0,
"drop_rate": 0.1,
"eos_token_id": 2,
"freeze_patterns": null,
"hidden_size": 768,
"ib_kl": false,
"initializer_range": 0.02,
"itm_loss": 1,
"keep_ratio": 1,
"max_text_len": 40,
"merge_r": 0,
"merge_ratio": 0,
"merge_style": "tip",
"merge_text": 0,
"mlm_loss": 1,
"mlm_prob": 0.15,
"mlp_ratio": 4,
"model_type": "vilt",
"mpp_loss": 0,
"num_heads": 12,
"num_layers": 12,
"pad_token_id": 1,
"prune_layers": null,
"prune_method": "mlp_states",
"prune_r": 0,
"reduce_layers": null,
"sim_method": "first_head",
"token_types": 2,
"tokenizer": "bert-base-uncased",
"torch_dtype": "float32",
"transformers_version": "4.25.1",
"vit": "vit_base_patch32_384",
"vocab_size": 30522,
"vqa_label_size": 3129,
"whole_word_masking": false
}