{ "model_cfg": { "embed_dim": 1152, "init_logit_bias": -10, "custom_text": true, "vision_cfg": { "image_size": 384, "timm_model_name": "vit_so400m_patch14_siglip_384", "timm_model_pretrained": false, "timm_pool": "map", "timm_proj": "none" }, "text_cfg": { "context_length": 64, "vocab_size": 32000, "hf_tokenizer_name": "timm/ViT-B-16-SigLIP", "tokenizer_kwargs": { "clean": "canonicalize" }, "width": 1152, "heads": 16, "layers": 27, "mlp_ratio": 3.7362, "no_causal_mask": true, "proj_bias": true, "pool_type": "last", "norm_kwargs": { "eps": 1e-06 } } }, "preprocess_cfg": { "mean": [ 0.48145466, 0.4578275, 0.40821073 ], "std": [ 0.26862954, 0.26130258, 0.27577711 ], "interpolation": "bicubic", "resize_mode": "shortest" } }