{ "model_cfg": { "model_cfg": { "embed_dim": 1024, "vision_cfg": { "image_size": 336, "layers": 1, "width": 336, "patch_size": 14 }, "text_cfg": { "context_length": 512, "vocab_size": 501153, "layers": 1 } } }, "preprocess_cfg": { "mean": [ 0.48145466, 0.4578275, 0.40821073 ], "std": [ 0.26862954, 0.26130258, 0.27577711 ] } }