File size: 3,080 Bytes
caecb8c dbe6e99 caecb8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import json
from typing import Any, Dict, Optional
from dacite import Config as DaciteConfig
from dacite import from_dict
from omegaconf import OmegaConf
from transformers.configuration_utils import PretrainedConfig
from xlstm import xLSTMLMModelConfig
# from .config_presets import xlstm_cfg_map
class xLSTMConfig(PretrainedConfig):
"""XLSTM configuration class.
We seperate the specific xLSTM model configuration
from the rest due to the heavy nesting of the configuration.
"""
model_type = "xlstm"
def __init__(
self, vocab_size: int = 32000, config: Optional[Dict[str, Any]] = None, **kwargs
):
super().__init__(**kwargs)
cfg = OmegaConf.create(config)
cfg["vocab_size"] = vocab_size
for key, value in kwargs.items():
cfg[key] = value
self._xlstm_config = cfg
self.vocab_size = vocab_size
self.embedding_dim = cfg.get("embedding_dim")
self.context_length = cfg.get("context_length")
self.hidden_size = cfg.get("embedding_dim")
def to_xlstm_config(self):
return from_dict(
data_class=xLSTMLMModelConfig,
data=OmegaConf.to_container(self._xlstm_config),
config=DaciteConfig(strict=True),
)
def to_dict(self) -> Dict[str, Any]:
"""
Converts the configuration to a dictionary for serialization.
"""
output = super().to_dict()
output["_xlstm_config"] = OmegaConf.to_container(
self._xlstm_config, resolve=True
)
relevant_keys = [
"vocab_size",
"embedding_dim",
"context_length",
"torch_dtype",
"_xlstm_config",
"transformers_version",
"architectures",
"model_type",
]
output_ = output.copy()
for key in output.keys():
if key not in relevant_keys:
output_.pop(key)
return output_
@classmethod
def from_dict(cls, config_dict: Dict[str, Any], **kwargs):
"""
Creates a configuration instance from a dictionary.
"""
xlstm_config = config_dict.pop("_xlstm_config")
vocab_size = config_dict.pop("vocab_size")
config = cls(vocab_size=vocab_size, config=xlstm_config)
if "auto_map" in config_dict and config_dict["auto_map"]:
setattr(config, "auto_map", config_dict.pop("auto_map"))
# breakpoint()
# config.xlstm_config = xlstm_config
if "return_unused_kwargs" in kwargs and kwargs["return_unused_kwargs"]:
return config, {}
return config
def to_json_string(self, *args, **kwargs) -> str:
"""
Serializes the instance to a JSON string.
"""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json_string(cls, json_string: str):
"""
Deserializes the instance from a JSON string.
"""
config_dict = json.loads(json_string)
return cls.from_dict(config_dict)
|