|
{ |
|
"model_cfg": { |
|
"embed_dim": 768, |
|
"vision_cfg": { |
|
"image_size": 224, |
|
"layers": 24, |
|
"width": 1024, |
|
"patch_size": 14, |
|
"no_ln_pre": true, |
|
"pool_type": "avg", |
|
"final_ln_after_pool": true |
|
}, |
|
"text_cfg": { |
|
"context_length": 32, |
|
"vocab_size": 32000, |
|
"hf_tokenizer_name": "bert-base-uncased", |
|
"tokenizer_kwargs": { |
|
"strip_sep_token": true |
|
}, |
|
"width": 768, |
|
"heads": 12, |
|
"layers": 12, |
|
"pool_type": "last", |
|
"no_causal_mask": true |
|
} |
|
}, |
|
"preprocess_cfg": { |
|
"mean": [ |
|
0.485, |
|
0.456, |
|
0.406 |
|
], |
|
"std": [ |
|
0.229, |
|
0.224, |
|
0.225 |
|
], |
|
"interpolation": "bilinear", |
|
"resize_mode": "squash" |
|
} |
|
} |