File size: 2,076 Bytes
0154197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
{
    "aligner_config": {
        "cls": "MlpProjector",
        "model_type": "aligner",
        "params": {
            "depth": 2,
            "input_dim": 1024,
            "n_embed": 4096,
            "projector_type": "low_high_hybrid_split_mlp_gelu"
        }
    },
    "architectures": [
        "MultiModalityCausalLM"
    ],
    "language_config": {
        "max_position_embeddings": 16384,
        "model_type": "llama",
        "num_hidden_layers": 30,
        "torch_dtype": "float16",
        "vocab_size": 102400
    },
    "model_type": "multi_modality",
    "quantization": {
        "group_size": 64,
        "bits": 4
    },
    "torch_dtype": "float16",
    "transformers_version": "4.38.2",
    "vision_config": {
        "cls": "HybridVisionTower",
        "model_type": "vision",
        "params": {
            "concat_type": "tuple",
            "freeze_high": true,
            "freeze_low": true,
            "high_res_cfg": {
                "ckpt_path": "",
                "image_size": 1024,
                "model_name": "sam_b_downsample",
                "output_dim": 1024,
                "pixel_mean": [
                    0.48145466,
                    0.4578275,
                    0.40821073
                ],
                "pixel_std": [
                    0.26862954,
                    0.26130258,
                    0.27577711
                ],
                "select_feature": "same",
                "select_layer": -1
            },
            "low_res_cfg": {
                "ckpt_path": "",
                "image_size": 384,
                "model_name": "siglip_large_patch16_384",
                "output_dim": 1024,
                "pixel_mean": [
                    0.5,
                    0.5,
                    0.5
                ],
                "pixel_std": [
                    0.5,
                    0.5,
                    0.5
                ],
                "select_feature": "same",
                "select_layer": -1
            }
        },
        "intermediate_size": 4096
    }
}