skit-ai
/

speechllm-2B

Feature Extraction

speech-language

Model card Files Files and versions Community

shangeth commited on Jun 19, 2024

Commit

676b75c

·

verified ·

1 Parent(s): cced7c3

Upload model

Files changed (3) hide show

README.md +0 -10
config.json +5 -6
config.py +8 -3

README.md CHANGED Viewed

@@ -66,16 +66,6 @@ model-index:
     - type: accuracy
       value: 60.61
       name: Test Age Accuracy
-  - task:
-      type: audio-classification
-      name: Audio Classification
-    dataset:
-      name: Common Voice 16.1
-      type: common_voice_16_1
-      split: test
-      args:
-        language: en
-    metrics:
     - type: accuracy
       value: 61.56
       name: Test Accent Accuracy

     - type: accuracy
       value: 60.61
       name: Test Age Accuracy
     - type: accuracy
       value: 61.56
       name: Test Accent Accuracy

config.json CHANGED Viewed

@@ -1,14 +1,13 @@
 {
-  "architectures": [
-    "SpeechLLMModel"
-  ],
   "audio_enc_dim": 1280,
   "auto_map": {
-    "AutoConfig": "config.SpeechLLMModelConfig",
-    "AutoModel": "model.SpeechLLMModel"
   },
   "llm_dim": 2048,
   "model_type": "custom_model",
-  "torch_dtype": "float32",
   "transformers_version": "4.38.2"
 }

 {
   "audio_enc_dim": 1280,
+  "audio_encoder_name": "facebook/hubert-xlarge-ll60k",
+  "audio_processor_name": "facebook/hubert-large-ls960-ft",
   "auto_map": {
+    "AutoConfig": "config.SpeechLLMModelConfig"
   },
   "llm_dim": 2048,
+  "llm_model_checkpoint": "hf_repo/llm_model_checkpoint",
+  "llm_model_name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
   "model_type": "custom_model",
   "transformers_version": "4.38.2"
 }

config.py CHANGED Viewed

@@ -3,7 +3,12 @@ from transformers import PretrainedConfig
 class SpeechLLMModelConfig(PretrainedConfig):
     model_type = "custom_model"
-    def __init__(self, audio_enc_dim=1280, llm_dim=2048, **kwargs):
         super().__init__(**kwargs)
-        self.audio_enc_dim = audio_enc_dim
-        self.llm_dim = llm_dim

 class SpeechLLMModelConfig(PretrainedConfig):
     model_type = "custom_model"
+    def __init__(self, **kwargs):
         super().__init__(**kwargs)
+        self.audio_enc_dim = 1280
+        self.llm_dim = 2048
+        self.audio_processor_name = "facebook/hubert-large-ls960-ft"
+        self.audio_encoder_name = 'facebook/hubert-xlarge-ll60k'
+        self.llm_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+        self.llm_model_checkpoint = "hf_repo/llm_model_checkpoint"