|
{ |
|
"module": "keras_hub.src.models.clip.clip_backbone", |
|
"class_name": "CLIPBackbone", |
|
"config": { |
|
"name": "clip_backbone", |
|
"trainable": true, |
|
"vision_encoder": { |
|
"module": "keras_hub.src.models.clip.clip_vision_encoder", |
|
"class_name": "CLIPVisionEncoder", |
|
"config": { |
|
"name": "clip_vision_encoder", |
|
"trainable": true, |
|
"patch_size": 14, |
|
"hidden_dim": 1664, |
|
"num_layers": 48, |
|
"num_heads": 16, |
|
"intermediate_dim": 8192, |
|
"intermediate_activation": "gelu", |
|
"intermediate_output_index": null, |
|
"image_shape": [ |
|
224, |
|
224, |
|
3 |
|
] |
|
}, |
|
"registered_name": "keras_hub>CLIPVisionEncoder" |
|
}, |
|
"text_encoder": { |
|
"module": "keras_hub.src.models.clip.clip_text_encoder", |
|
"class_name": "CLIPTextEncoder", |
|
"config": { |
|
"name": "clip_text_encoder", |
|
"trainable": true, |
|
"vocabulary_size": 49408, |
|
"embedding_dim": 1280, |
|
"hidden_dim": 1280, |
|
"num_layers": 32, |
|
"num_heads": 20, |
|
"intermediate_dim": 5120, |
|
"intermediate_activation": "gelu", |
|
"intermediate_output_index": null, |
|
"max_sequence_length": 77 |
|
}, |
|
"registered_name": "keras_hub>CLIPTextEncoder" |
|
}, |
|
"projection_dim": 1280 |
|
}, |
|
"registered_name": "keras_hub>CLIPBackbone" |
|
} |