farzadab
/

test-uv-pipeline

Feature Extraction

Transformers

Safetensors

ultravox

custom_code

Model card Files Files and versions Community

farzadab commited on Jul 10, 2024

Commit

3c2070f

verified ·

1 Parent(s): d66aec1

Update ultravox_model.py

Browse files

Files changed (1) hide show

ultravox_model.py +15 -17

ultravox_model.py CHANGED Viewed

@@ -11,9 +11,9 @@ import transformers.modeling_outputs
 import transformers.models
 # We must use relative import in this directory to allow uploading to HF Hub
-from . import ultravox_config
-from . import ultravox_processing
-from . import whisper_model_modified
 class UltravoxModel(
@@ -33,11 +33,11 @@ class UltravoxModel(
         config: Model configuration class with all the parameters of the model.
     """
-    config_class = ultravox_config.UltravoxConfig
-    config: ultravox_config.UltravoxConfig  # for type hinting
     _no_split_modules = ["Wav2Vec2Model", "WhisperEncoder", "LlamaDecoderLayer"]
-    def __init__(self, config: ultravox_config.UltravoxConfig):
         super().__init__(config)
         self.keep_params: Set[str] = set()
@@ -188,13 +188,13 @@ class UltravoxModel(
         return model_input
     @classmethod
-    def _create_audio_tower(cls, config: ultravox_config.UltravoxConfig) -> Union[
         transformers.Wav2Vec2Model,
         transformers.models.whisper.modeling_whisper.WhisperEncoder,
     ]:
         if config.audio_model_id is not None:
             if "whisper" in config.audio_model_id is not None:
-                audio_tower = whisper_model_modified.WhisperEncoder.from_pretrained(
                     config.audio_model_id
                 )
             else:
@@ -203,7 +203,7 @@ class UltravoxModel(
                 )
         else:
             if "whisper" in config.audio_config._name_or_path:
-                audio_tower = whisper_model_modified.WhisperEncoder(config.audio_config)
             else:
                 audio_tower = transformers.AutoModel.from_config(config.audio_config)
@@ -221,7 +221,7 @@ class UltravoxModel(
     @classmethod
     def _create_language_model(
-        cls, config: ultravox_config.UltravoxConfig
     ) -> transformers.LlamaForCausalLM:
         if config.text_model_id is not None:
             language_model = transformers.AutoModelForCausalLM.from_pretrained(
@@ -375,7 +375,7 @@ class SwiGLU(nn.Module):
 class UltravoxProjector(nn.Sequential):
-    def __init__(self, config: ultravox_config.UltravoxConfig):
         super().__init__()
         self.hidden_dim = config.hidden_size
         self._pad_and_stack = StackAudioFrames(config.stack_factor)
@@ -398,15 +398,13 @@ class UltravoxProjector(nn.Sequential):
         return hidden_states
-transformers.AutoConfig.register("ultravox", ultravox_config.UltravoxConfig)
-transformers.AutoModel.register(ultravox_config.UltravoxConfig, UltravoxModel)
 # transformers.AutoModelForCausalLM.register(
-#     ultravox_config.UltravoxConfig, UltravoxModel
 # )
 UltravoxModel.register_for_auto_class()
-transformers.AutoProcessor.register(
-    ultravox_config.UltravoxConfig, ultravox_processing.UltravoxProcessor
-)
 # UltravoxModel.register_for_auto_class("AutoModelForCausalLM")

 import transformers.models
 # We must use relative import in this directory to allow uploading to HF Hub
+from .ultravox_config import UltravoxConfig
+from .ultravox_processing import UltravoxProcessor
+from .whisper_model_modified import WhisperEncoder as ModifiedWhisperEncoder
 class UltravoxModel(
         config: Model configuration class with all the parameters of the model.
     """
+    config_class = UltravoxConfig
+    config: UltravoxConfig  # for type hinting
     _no_split_modules = ["Wav2Vec2Model", "WhisperEncoder", "LlamaDecoderLayer"]
+    def __init__(self, config: UltravoxConfig):
         super().__init__(config)
         self.keep_params: Set[str] = set()
         return model_input
     @classmethod
+    def _create_audio_tower(cls, config: UltravoxConfig) -> Union[
         transformers.Wav2Vec2Model,
         transformers.models.whisper.modeling_whisper.WhisperEncoder,
     ]:
         if config.audio_model_id is not None:
             if "whisper" in config.audio_model_id is not None:
+                audio_tower = ModifiedWhisperEncoder.from_pretrained(
                     config.audio_model_id
                 )
             else:
                 )
         else:
             if "whisper" in config.audio_config._name_or_path:
+                audio_tower = ModifiedWhisperEncoder(config.audio_config)
             else:
                 audio_tower = transformers.AutoModel.from_config(config.audio_config)
     @classmethod
     def _create_language_model(
+        cls, config: UltravoxConfig
     ) -> transformers.LlamaForCausalLM:
         if config.text_model_id is not None:
             language_model = transformers.AutoModelForCausalLM.from_pretrained(
 class UltravoxProjector(nn.Sequential):
+    def __init__(self, config: UltravoxConfig):
         super().__init__()
         self.hidden_dim = config.hidden_size
         self._pad_and_stack = StackAudioFrames(config.stack_factor)
         return hidden_states
+transformers.AutoConfig.register("ultravox", UltravoxConfig)
+transformers.AutoModel.register(UltravoxConfig, UltravoxModel)
 # transformers.AutoModelForCausalLM.register(
+#     UltravoxConfig, UltravoxModel
 # )
 UltravoxModel.register_for_auto_class()
+transformers.AutoProcessor.register(UltravoxConfig, UltravoxProcessor)
 # UltravoxModel.register_for_auto_class("AutoModelForCausalLM")