File size: 1,489 Bytes
b51625d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# coding=utf-8
""" mPLUGOwl3 model configuration"""
import os
from typing import Union
from transformers.utils import logging
from .configuration_hyper_qwen2 import HyperQwen2Config
from transformers.models.siglip.configuration_siglip import SiglipVisionConfig
logger = logging.get_logger(__name__)
class mPLUGOwl3Config(HyperQwen2Config):
model_type = "mplugowl3"
keys_to_ignore_at_inference = ["past_key_values"]
default_vision_config = {
"hidden_size": 1152,
"image_size": 384,
"intermediate_size": 4304,
"model_type": "siglip_vision_model",
"num_attention_heads": 16,
"num_hidden_layers": 27,
"patch_size": 14
}
def __init__(
self,
use_cache=True,
vision_config=None,
**kwargs,
):
self.use_cache = use_cache
# same as HuggingFaceM4/siglip-so400m-14-980-flash-attn2-navit add tgt_sizes
if vision_config is None:
self.vision_config = SiglipVisionConfig(**self.default_vision_config)
logger.info("vision_config is None, using default vision config")
elif isinstance(vision_config, dict):
self.vision_config = SiglipVisionConfig(**vision_config)
elif isinstance(vision_config, SiglipVisionConfig):
self.vision_config = vision_config
self.image_size = self.vision_config.image_size
self.patch_size = self.vision_config.patch_size
super().__init__(**kwargs)
|