{ "_name_or_path": "CIDAS/clipseg-rd16", "architectures": [ "CLIPSegForImageSegmentation" ], "conditional_layer": 0, "decoder_attention_dropout": 0.0, "decoder_hidden_act": "quick_gelu", "decoder_intermediate_size": 2048, "decoder_num_attention_heads": 4, "extract_layers": [ 3, 6, 9 ], "initializer_factor": 1.0, "logit_scale_init_value": 2.6592, "model_type": "clipseg", "projection_dim": 512, "reduce_dim": 16, "text_config": { "bos_token_id": 0, "dropout": 0.0, "eos_token_id": 2, "model_type": "clipseg_text_model" }, "transformers_version": "4.37.0.dev0", "use_complex_transposed_convolution": false, "vision_config": { "dropout": 0.0, "model_type": "clipseg_vision_model", "patch_size": 16 } }