Feature Extraction
Transformers
Safetensors
English
custom_model
multi-modal
speech-language
custom_code
Eval Results
File size: 526 Bytes
6d4443f
 
 
 
 
676b75c
6d4443f
676b75c
 
 
 
 
 
98465ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from transformers import PretrainedConfig

class SpeechLLMModelConfig(PretrainedConfig):
    model_type = "custom_model"
    
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.audio_enc_dim = 1280
        self.llm_dim = 2048

        self.audio_processor_name = "facebook/hubert-large-ls960-ft"
        self.audio_encoder_name = 'facebook/hubert-xlarge-ll60k'
        self.llm_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
        self.llm_model_checkpoint = "hf_repo/llm_model_checkpoint"