Upload model
Browse files- README.md +0 -10
- config.json +5 -6
- config.py +8 -3
README.md
CHANGED
@@ -66,16 +66,6 @@ model-index:
|
|
66 |
- type: accuracy
|
67 |
value: 60.61
|
68 |
name: Test Age Accuracy
|
69 |
-
- task:
|
70 |
-
type: audio-classification
|
71 |
-
name: Audio Classification
|
72 |
-
dataset:
|
73 |
-
name: Common Voice 16.1
|
74 |
-
type: common_voice_16_1
|
75 |
-
split: test
|
76 |
-
args:
|
77 |
-
language: en
|
78 |
-
metrics:
|
79 |
- type: accuracy
|
80 |
value: 61.56
|
81 |
name: Test Accent Accuracy
|
|
|
66 |
- type: accuracy
|
67 |
value: 60.61
|
68 |
name: Test Age Accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
- type: accuracy
|
70 |
value: 61.56
|
71 |
name: Test Accent Accuracy
|
config.json
CHANGED
@@ -1,14 +1,13 @@
|
|
1 |
{
|
2 |
-
"architectures": [
|
3 |
-
"SpeechLLMModel"
|
4 |
-
],
|
5 |
"audio_enc_dim": 1280,
|
|
|
|
|
6 |
"auto_map": {
|
7 |
-
"AutoConfig": "config.SpeechLLMModelConfig"
|
8 |
-
"AutoModel": "model.SpeechLLMModel"
|
9 |
},
|
10 |
"llm_dim": 2048,
|
|
|
|
|
11 |
"model_type": "custom_model",
|
12 |
-
"torch_dtype": "float32",
|
13 |
"transformers_version": "4.38.2"
|
14 |
}
|
|
|
1 |
{
|
|
|
|
|
|
|
2 |
"audio_enc_dim": 1280,
|
3 |
+
"audio_encoder_name": "facebook/hubert-xlarge-ll60k",
|
4 |
+
"audio_processor_name": "facebook/hubert-large-ls960-ft",
|
5 |
"auto_map": {
|
6 |
+
"AutoConfig": "config.SpeechLLMModelConfig"
|
|
|
7 |
},
|
8 |
"llm_dim": 2048,
|
9 |
+
"llm_model_checkpoint": "hf_repo/llm_model_checkpoint",
|
10 |
+
"llm_model_name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
11 |
"model_type": "custom_model",
|
|
|
12 |
"transformers_version": "4.38.2"
|
13 |
}
|
config.py
CHANGED
@@ -3,7 +3,12 @@ from transformers import PretrainedConfig
|
|
3 |
class SpeechLLMModelConfig(PretrainedConfig):
|
4 |
model_type = "custom_model"
|
5 |
|
6 |
-
def __init__(self,
|
7 |
super().__init__(**kwargs)
|
8 |
-
self.audio_enc_dim =
|
9 |
-
self.llm_dim =
|
|
|
|
|
|
|
|
|
|
|
|
3 |
class SpeechLLMModelConfig(PretrainedConfig):
|
4 |
model_type = "custom_model"
|
5 |
|
6 |
+
def __init__(self, **kwargs):
|
7 |
super().__init__(**kwargs)
|
8 |
+
self.audio_enc_dim = 1280
|
9 |
+
self.llm_dim = 2048
|
10 |
+
|
11 |
+
self.audio_processor_name = "facebook/hubert-large-ls960-ft"
|
12 |
+
self.audio_encoder_name = 'facebook/hubert-xlarge-ll60k'
|
13 |
+
self.llm_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
14 |
+
self.llm_model_checkpoint = "hf_repo/llm_model_checkpoint"
|