Yingxu He
commited on
Commit
•
c73efa1
1
Parent(s):
ee6fe1d
Upload MERaLiONForConditionalGeneration
Browse files- config.json +6 -2
- generation_config.json +1 -1
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- model.safetensors.index.json +1 -1
- modeling_meralion.py +0 -17
config.json
CHANGED
@@ -1,7 +1,10 @@
|
|
1 |
{
|
2 |
-
"
|
|
|
|
|
3 |
"auto_map": {
|
4 |
-
"AutoConfig": "configuration_meralion.MERaLiONConfig"
|
|
|
5 |
},
|
6 |
"head_dim": 256,
|
7 |
"hidden_size": 3584,
|
@@ -163,5 +166,6 @@
|
|
163 |
"sliding_window_size": 4096,
|
164 |
"torch_dtype": "bfloat16"
|
165 |
},
|
|
|
166 |
"transformers_version": "4.46.3"
|
167 |
}
|
|
|
1 |
{
|
2 |
+
"architectures": [
|
3 |
+
"MERaLiONForConditionalGeneration"
|
4 |
+
],
|
5 |
"auto_map": {
|
6 |
+
"AutoConfig": "configuration_meralion.MERaLiONConfig",
|
7 |
+
"AutoModelForSpeechSeq2Seq": "modeling_meralion.MERaLiONForConditionalGeneration"
|
8 |
},
|
9 |
"head_dim": 256,
|
10 |
"hidden_size": 3584,
|
|
|
166 |
"sliding_window_size": 4096,
|
167 |
"torch_dtype": "bfloat16"
|
168 |
},
|
169 |
+
"torch_dtype": "bfloat16",
|
170 |
"transformers_version": "4.46.3"
|
171 |
}
|
generation_config.json
CHANGED
@@ -4,5 +4,5 @@
|
|
4 |
"cache_implementation": "hybrid",
|
5 |
"eos_token_id": 107,
|
6 |
"pad_token_id": 0,
|
7 |
-
"transformers_version": "4.
|
8 |
}
|
|
|
4 |
"cache_implementation": "hybrid",
|
5 |
"eos_token_id": 107,
|
6 |
"pad_token_id": 0,
|
7 |
+
"transformers_version": "4.46.3"
|
8 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:109c417e21fe736abd753bb25e7be7400fcd607e557b7809fe2fada6cce16c24
|
3 |
+
size 4984029208
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4991612592
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2c2d7f48181275b438981c4c760e1df3622f79d935b48d16a2e7cf43c52dea1
|
3 |
size 4991612592
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4918183272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e6ef9acd74f3c0d229be95dc8dfdd3bc59af97547412070c5ca5f722fcd4ebd
|
3 |
size 4918183272
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4962259664
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18981d354c807e0cde98e0521acfc3422faec53fcf9a95ce72ac63d5dbd1f7d0
|
3 |
size 4962259664
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"ln_speech.bias": "model-00001-of-00004.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 19855965696
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"ln_speech.bias": "model-00001-of-00004.safetensors",
|
modeling_meralion.py
CHANGED
@@ -1031,12 +1031,6 @@ MERALION_INPUTS_DOCSTRING = r"""
|
|
1031 |
[`PreTrainedTokenizer.__call__`] for details.
|
1032 |
|
1033 |
[What are input IDs?](../glossary#input-ids)
|
1034 |
-
input_ids_left (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
1035 |
-
Indices of left-padded input sequences tokens in the vocabulary. Padding will be ignored by default should you provide
|
1036 |
-
it.
|
1037 |
-
input_ids_right (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
|
1038 |
-
Indices of right-padded input sequences tokens in the vocabulary. Padding will be ignored by default should you provide
|
1039 |
-
it.
|
1040 |
input_features (`torch.FloatTensor` of shape `(batch_size, feature_size, feature_sequence_length)`, *optional*):
|
1041 |
Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by
|
1042 |
loading a `.flac` or `.wav` audio file into an array of type `List[float]` or a `numpy.ndarray`, *e.g.* via
|
@@ -1063,17 +1057,6 @@ MERALION_INPUTS_DOCSTRING = r"""
|
|
1063 |
|
1064 |
- 1 indicates the head is **not masked**,
|
1065 |
- 0 indicates the head is **masked**.
|
1066 |
-
|
1067 |
-
attention_mask_left (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`, *optional*):
|
1068 |
-
Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`:
|
1069 |
-
|
1070 |
-
- 1 for tokens that are **not masked**,
|
1071 |
-
- 0 for tokens that are **masked**.
|
1072 |
-
attention_mask_right (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`, *optional*):
|
1073 |
-
Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`:
|
1074 |
-
|
1075 |
-
- 1 for tokens that are **not masked**,
|
1076 |
-
- 0 for tokens that are **masked**.
|
1077 |
feature_attention_mask (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`, *optional*):
|
1078 |
Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`:
|
1079 |
|
|
|
1031 |
[`PreTrainedTokenizer.__call__`] for details.
|
1032 |
|
1033 |
[What are input IDs?](../glossary#input-ids)
|
|
|
|
|
|
|
|
|
|
|
|
|
1034 |
input_features (`torch.FloatTensor` of shape `(batch_size, feature_size, feature_sequence_length)`, *optional*):
|
1035 |
Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by
|
1036 |
loading a `.flac` or `.wav` audio file into an array of type `List[float]` or a `numpy.ndarray`, *e.g.* via
|
|
|
1057 |
|
1058 |
- 1 indicates the head is **not masked**,
|
1059 |
- 0 indicates the head is **masked**.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1060 |
feature_attention_mask (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`, *optional*):
|
1061 |
Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`:
|
1062 |
|