Yingxu He
commited on
Upload modeling_meralion.py with huggingface_hub
Browse files- modeling_meralion.py +1 -46
modeling_meralion.py
CHANGED
@@ -1,18 +1,4 @@
|
|
1 |
-
|
2 |
-
# Copyright 2024 the HuggingFace Inc. team. All rights reserved.
|
3 |
-
#
|
4 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
-
# you may not use this file except in compliance with the License.
|
6 |
-
# You may obtain a copy of the License at
|
7 |
-
#
|
8 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
-
#
|
10 |
-
# Unless required by applicable law or agreed to in writing, software
|
11 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
-
# See the License for the specific language governing permissions and
|
14 |
-
# limitations under the License.
|
15 |
-
"""PyTorch MERaLiON model."""
|
16 |
|
17 |
import math
|
18 |
from dataclasses import dataclass
|
@@ -1161,37 +1147,6 @@ class MERaLiONForConditionalGeneration(MERaLiONPreTrainedModel, GenerationMixin)
|
|
1161 |
self.vocab_size = model_embeds.num_embeddings
|
1162 |
return model_embeds
|
1163 |
|
1164 |
-
def _get_multimodal_input_embeds(
|
1165 |
-
self,
|
1166 |
-
input_ids_left,
|
1167 |
-
input_ids_right,
|
1168 |
-
attention_mask_left,
|
1169 |
-
attention_mask_right,
|
1170 |
-
speech_audio_contexts_embeds,
|
1171 |
-
speech_audio_contexts_atts,
|
1172 |
-
):
|
1173 |
-
input_embeds_left = self.text_decoder.base_model.embed_tokens(input_ids_left)
|
1174 |
-
input_embeds_right = self.text_decoder.base_model.embed_tokens(input_ids_right)
|
1175 |
-
|
1176 |
-
multimodal_embeds = torch.cat(
|
1177 |
-
[
|
1178 |
-
input_embeds_left,
|
1179 |
-
speech_audio_contexts_embeds,
|
1180 |
-
input_embeds_right,
|
1181 |
-
],
|
1182 |
-
dim=1,
|
1183 |
-
)
|
1184 |
-
|
1185 |
-
multimodal_attention_mask = torch.cat(
|
1186 |
-
[
|
1187 |
-
attention_mask_left,
|
1188 |
-
speech_audio_contexts_atts,
|
1189 |
-
attention_mask_right,
|
1190 |
-
],
|
1191 |
-
dim=1,
|
1192 |
-
)
|
1193 |
-
return multimodal_embeds, multimodal_attention_mask
|
1194 |
-
|
1195 |
@add_start_docstrings_to_model_forward(MERALION_INPUTS_DOCSTRING)
|
1196 |
@replace_return_docstrings(output_type=MERaLiONOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
1197 |
def forward(
|
|
|
1 |
+
"""PyTorch MERaLiON AudioLLM model."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
import math
|
4 |
from dataclasses import dataclass
|
|
|
1147 |
self.vocab_size = model_embeds.num_embeddings
|
1148 |
return model_embeds
|
1149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1150 |
@add_start_docstrings_to_model_forward(MERALION_INPUTS_DOCSTRING)
|
1151 |
@replace_return_docstrings(output_type=MERaLiONOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
1152 |
def forward(
|