|
|
|
""" mPLUGOwl3 model configuration""" |
|
|
|
import os |
|
from typing import Union |
|
|
|
from transformers.utils import logging |
|
from .configuration_hyper_qwen2 import HyperQwen2Config |
|
from transformers.models.siglip.configuration_siglip import SiglipVisionConfig |
|
logger = logging.get_logger(__name__) |
|
|
|
|
|
class mPLUGOwl3Config(HyperQwen2Config): |
|
model_type = "mplugowl3" |
|
keys_to_ignore_at_inference = ["past_key_values"] |
|
|
|
default_vision_config = { |
|
"hidden_size": 1152, |
|
"image_size": 378, |
|
"intermediate_size": 4304, |
|
"model_type": "siglip_vision_model", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 27, |
|
"patch_size": 14 |
|
} |
|
|
|
|
|
def __init__( |
|
self, |
|
use_cache=True, |
|
vision_config=None, |
|
**kwargs, |
|
): |
|
self.use_cache = use_cache |
|
|
|
|
|
if vision_config is None: |
|
self.vision_config = SiglipVisionConfig(**self.default_vision_config) |
|
logger.info("vision_config is None, using default vision config") |
|
elif isinstance(vision_config, dict): |
|
self.vision_config = SiglipVisionConfig(**vision_config) |
|
elif isinstance(vision_config, SiglipVisionConfig): |
|
self.vision_config = vision_config |
|
self.image_size = 378 |
|
self.patch_size = self.vision_config.patch_size |
|
|
|
super().__init__(**kwargs) |
|
|