Upload MERaLiONForConditionalGeneration

Browse files

Files changed (8) hide show

config.json +6 -2
generation_config.json +1 -1
model-00001-of-00004.safetensors +2 -2
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
model.safetensors.index.json +1 -1
modeling_meralion.py +0 -17

config.json CHANGED Viewed

@@ -1,7 +1,10 @@
 {
-  "_attn_implementation_autoset": true,
   "auto_map": {
-    "AutoConfig": "configuration_meralion.MERaLiONConfig"
   },
   "head_dim": 256,
   "hidden_size": 3584,
@@ -163,5 +166,6 @@
     "sliding_window_size": 4096,
     "torch_dtype": "bfloat16"
   },
   "transformers_version": "4.46.3"
 }

 {
+  "architectures": [
+    "MERaLiONForConditionalGeneration"
+  ],
   "auto_map": {
+    "AutoConfig": "configuration_meralion.MERaLiONConfig",
+    "AutoModelForSpeechSeq2Seq": "modeling_meralion.MERaLiONForConditionalGeneration"
   },
   "head_dim": 256,
   "hidden_size": 3584,
     "sliding_window_size": 4096,
     "torch_dtype": "bfloat16"
   },
+  "torch_dtype": "bfloat16",
   "transformers_version": "4.46.3"
 }

generation_config.json CHANGED Viewed

@@ -4,5 +4,5 @@
   "cache_implementation": "hybrid",
   "eos_token_id": 107,
   "pad_token_id": 0,
-  "transformers_version": "4.44.2"
 }

   "cache_implementation": "hybrid",
   "eos_token_id": 107,
   "pad_token_id": 0,
+  "transformers_version": "4.46.3"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:270fdb3d09f24b4a2cf476b1efb2392f1ec2effc77505b591d68666bebeaa230
-size 4984397848

 version https://git-lfs.github.com/spec/v1
+oid sha256:109c417e21fe736abd753bb25e7be7400fcd607e557b7809fe2fada6cce16c24
+size 4984029208

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f29e844bfed074de0b3d36255695552b9583abfbc7f76c2bd6d3ff78a541b95
 size 4991612592

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2c2d7f48181275b438981c4c760e1df3622f79d935b48d16a2e7cf43c52dea1
 size 4991612592

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72660974f5e6a798f8e269c979c4abf2f74f45a0f1f8bbde9d204aca360e69f6
 size 4918183272

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e6ef9acd74f3c0d229be95dc8dfdd3bc59af97547412070c5ca5f722fcd4ebd
 size 4918183272

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a978abdeae43c46a9cb3687aa9ccf1c3e421d01a16fb27f7fa14e64ad4cb9fa
 size 4962259664

 version https://git-lfs.github.com/spec/v1
+oid sha256:18981d354c807e0cde98e0521acfc3422faec53fcf9a95ce72ac63d5dbd1f7d0
 size 4962259664

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 19856334336
   },
   "weight_map": {
     "ln_speech.bias": "model-00001-of-00004.safetensors",

 {
   "metadata": {
+    "total_size": 19855965696
   },
   "weight_map": {
     "ln_speech.bias": "model-00001-of-00004.safetensors",

modeling_meralion.py CHANGED Viewed

@@ -1031,12 +1031,6 @@ MERALION_INPUTS_DOCSTRING = r"""
             [`PreTrainedTokenizer.__call__`] for details.
             [What are input IDs?](../glossary#input-ids)
-        input_ids_left (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
-            Indices of left-padded input sequences tokens in the vocabulary. Padding will be ignored by default should you provide
-            it.
-        input_ids_right (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
-            Indices of right-padded input sequences tokens in the vocabulary. Padding will be ignored by default should you provide
-            it.
         input_features (`torch.FloatTensor` of shape `(batch_size, feature_size, feature_sequence_length)`, *optional*):
             Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by
             loading a `.flac` or `.wav` audio file into an array of type `List[float]` or a `numpy.ndarray`, *e.g.* via
@@ -1063,17 +1057,6 @@ MERALION_INPUTS_DOCSTRING = r"""
             - 1 indicates the head is **not masked**,
             - 0 indicates the head is **masked**.
-        attention_mask_left (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`, *optional*):
-            Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`:
-            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **masked**.
-        attention_mask_right (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`, *optional*):
-            Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`:
-            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **masked**.
         feature_attention_mask (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`, *optional*):
             Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`:

             [`PreTrainedTokenizer.__call__`] for details.
             [What are input IDs?](../glossary#input-ids)
         input_features (`torch.FloatTensor` of shape `(batch_size, feature_size, feature_sequence_length)`, *optional*):
             Float values mel features extracted from the raw speech waveform. Raw speech waveform can be obtained by
             loading a `.flac` or `.wav` audio file into an array of type `List[float]` or a `numpy.ndarray`, *e.g.* via
             - 1 indicates the head is **not masked**,
             - 0 indicates the head is **masked**.
         feature_attention_mask (`torch.Tensor` of shape `(batch_size, feature_sequence_length)`, *optional*):
             Mask to avoid performing attention on padding feature indices. Mask values selected in `[0, 1]`: