Zorro123444 commited on
Commit
ba823a3
·
verified ·
1 Parent(s): 6db91a7

Upload 3 files

Browse files

addind config file

Files changed (3) hide show
  1. config.json +55 -0
  2. configuration.json +1 -0
  3. configuration_minicpm.py +113 -0
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openbmb/MiniCPM-Llama3-V-2_5",
3
+ "version": "2.5",
4
+ "architectures": [
5
+ "MiniCPMV"
6
+ ],
7
+ "attention_bias": false,
8
+ "attention_dropout": 0.0,
9
+ "auto_map": {
10
+ "AutoConfig": "configuration_minicpm.MiniCPMVConfig",
11
+ "AutoModel": "modeling_minicpmv.MiniCPMV",
12
+ "AutoModelForCausalLM": "modeling_minicpmv.MiniCPMV"
13
+ },
14
+ "batch_vision_input": true,
15
+ "bos_token_id": 128000,
16
+ "drop_vision_last_layer": false,
17
+ "eos_token_id": 128001,
18
+ "hidden_act": "silu",
19
+ "hidden_size": 4096,
20
+ "image_size": 448,
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 14336,
23
+ "max_position_embeddings": 8192,
24
+ "mm_use_im_start_end": true,
25
+ "model_type": "minicpmv",
26
+ "num_attention_heads": 32,
27
+ "num_hidden_layers": 32,
28
+ "num_key_value_heads": 8,
29
+ "patch_size": 14,
30
+ "pretraining_tp": 1,
31
+ "query_num": 96,
32
+ "rms_norm_eps": 1e-05,
33
+ "rope_scaling": null,
34
+ "rope_theta": 500000.0,
35
+ "slice_config": {
36
+ "max_slice_nums": 9,
37
+ "patch_size": 14,
38
+ "model_type": "minicpmv"
39
+ },
40
+ "slice_mode": true,
41
+ "tie_word_embeddings": false,
42
+ "torch_dtype": "float16",
43
+ "transformers_version": "4.40.0",
44
+ "use_cache": false,
45
+ "vision_config": {
46
+ "hidden_size": 1152,
47
+ "image_size": 980,
48
+ "intermediate_size": 4304,
49
+ "model_type": "idefics2",
50
+ "num_attention_heads": 16,
51
+ "num_hidden_layers": 27,
52
+ "patch_size": 14
53
+ },
54
+ "vocab_size": 128256
55
+ }
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"multimodal-dialogue"}
configuration_minicpm.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
5
+ # and OPT implementations in this library. It has been modified from its
6
+ # original forms to accommodate minor architectural differences compared
7
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ """ MiniCPM model configuration"""
21
+ import os
22
+ from typing import Union
23
+
24
+ from transformers.utils import logging
25
+ from transformers import LlamaConfig, PretrainedConfig
26
+ from transformers.models.idefics2.modeling_idefics2 import Idefics2VisionConfig
27
+
28
+ logger = logging.get_logger(__name__)
29
+
30
+
31
+ class MiniCPMVSliceConfig(PretrainedConfig):
32
+ model_type = "minicpmv"
33
+
34
+ def __init__(
35
+ self,
36
+ patch_size=14,
37
+ max_slice_nums=9,
38
+ scale_resolution=448,
39
+ **kwargs,
40
+ ):
41
+ super().__init__(**kwargs)
42
+ self.patch_size = patch_size
43
+ self.max_slice_nums = max_slice_nums
44
+ self.scale_resolution = scale_resolution
45
+
46
+ @classmethod
47
+ def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
48
+ cls._set_token_in_kwargs(kwargs)
49
+
50
+ config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
51
+
52
+ if config_dict.get("model_type") == "minicpmv":
53
+ config_dict = config_dict["slice_config"]
54
+
55
+ if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
56
+ logger.warning(
57
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
58
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
59
+ )
60
+
61
+ return cls.from_dict(config_dict, **kwargs)
62
+
63
+
64
+
65
+ class MiniCPMVConfig(LlamaConfig):
66
+ model_type = "minicpmv"
67
+ keys_to_ignore_at_inference = ["past_key_values"]
68
+
69
+ default_vision_config = {
70
+ "hidden_size": 1152,
71
+ "image_size": 980,
72
+ "intermediate_size": 4304,
73
+ "model_type": "idefics2",
74
+ "num_attention_heads": 16,
75
+ "num_hidden_layers": 27,
76
+ "patch_size": 14,
77
+ }
78
+
79
+ def __init__(
80
+ self,
81
+ use_cache=True,
82
+ query_num=64,
83
+ image_size=448,
84
+ drop_vision_last_layer=True,
85
+ batch_vision_input=True,
86
+ slice_config=None,
87
+ vision_config=None,
88
+ **kwargs,
89
+ ):
90
+ self.use_cache = use_cache
91
+ self.query_num = query_num
92
+ self.image_size = image_size
93
+ self.drop_vision_last_layer = drop_vision_last_layer
94
+ self.batch_vision_input = batch_vision_input
95
+
96
+ if slice_config is None:
97
+ self.slice_config = MiniCPMVSliceConfig(max_slice_nums=1)
98
+ else:
99
+ self.slice_config = MiniCPMVSliceConfig(**slice_config)
100
+ self.slice_mode = True
101
+
102
+ # same as HuggingFaceM4/siglip-so400m-14-980-flash-attn2-navit
103
+ if vision_config is None:
104
+ self.vision_config = Idefics2VisionConfig(**self.default_vision_config)
105
+ logger.info("vision_config is None, using default vision config")
106
+ elif isinstance(vision_config, dict):
107
+ self.vision_config = Idefics2VisionConfig(**vision_config)
108
+ elif isinstance(vision_config, Idefics2VisionConfig):
109
+ self.vision_config = vision_config
110
+
111
+ self.patch_size = self.vision_config.patch_size
112
+
113
+ super().__init__(**kwargs)