Crystalcareai
commited on
Update modeling_gemmoe.py
Browse files- modeling_gemmoe.py +4 -4
modeling_gemmoe.py
CHANGED
@@ -705,8 +705,10 @@ class GemmoeDecoderLayer(nn.Module):
|
|
705 |
output_attentions: Optional[bool] = False,
|
706 |
output_router_logits: Optional[bool] = False,
|
707 |
use_cache: Optional[bool] = False,
|
|
|
708 |
**kwargs,
|
709 |
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
|
|
710 |
if "padding_mask" in kwargs:
|
711 |
warnings.warn(
|
712 |
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
|
@@ -944,7 +946,6 @@ class GemmoeModel(GemmoePreTrainedModel):
|
|
944 |
self.embed_tokens = value
|
945 |
|
946 |
@add_start_docstrings_to_model_forward(GEMMOE_INPUTS_DOCSTRING)
|
947 |
-
# Ignore copy
|
948 |
def forward(
|
949 |
self,
|
950 |
input_ids: torch.LongTensor = None,
|
@@ -1215,9 +1216,8 @@ class GemmoeForCausalLM(GemmoePreTrainedModel):
|
|
1215 |
```python
|
1216 |
>>> from transformers import AutoTokenizer, GemmoeForCausalLM
|
1217 |
|
1218 |
-
>>> model
|
1219 |
>>> tokenizer = AutoTokenizer.from_pretrained("google/GEMMA-7b")
|
1220 |
-
|
1221 |
>>> prompt = "What is your favorite condiment?"
|
1222 |
>>> inputs = tokenizer(prompt, return_tensors="pt")
|
1223 |
|
@@ -1382,7 +1382,6 @@ class GemmoeForCausalLM(GemmoePreTrainedModel):
|
|
1382 |
)
|
1383 |
return reordered_past
|
1384 |
|
1385 |
-
|
1386 |
@add_start_docstrings(
|
1387 |
"""
|
1388 |
The Gemmoe Model transformer with a sequence classification head on top (linear layer).
|
@@ -1447,6 +1446,7 @@ class GemmoeForSequenceClassification(GemmoePreTrainedModel):
|
|
1447 |
output_attentions=output_attentions,
|
1448 |
output_hidden_states=output_hidden_states,
|
1449 |
return_dict=return_dict,
|
|
|
1450 |
)
|
1451 |
hidden_states = transformer_outputs[0]
|
1452 |
logits = self.score(hidden_states)
|
|
|
705 |
output_attentions: Optional[bool] = False,
|
706 |
output_router_logits: Optional[bool] = False,
|
707 |
use_cache: Optional[bool] = False,
|
708 |
+
cache_position: Optional[torch.LongTensor] = None,
|
709 |
**kwargs,
|
710 |
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
711 |
+
# ... (rest of the code remains the same)
|
712 |
if "padding_mask" in kwargs:
|
713 |
warnings.warn(
|
714 |
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
|
|
|
946 |
self.embed_tokens = value
|
947 |
|
948 |
@add_start_docstrings_to_model_forward(GEMMOE_INPUTS_DOCSTRING)
|
|
|
949 |
def forward(
|
950 |
self,
|
951 |
input_ids: torch.LongTensor = None,
|
|
|
1216 |
```python
|
1217 |
>>> from transformers import AutoTokenizer, GemmoeForCausalLM
|
1218 |
|
1219 |
+
>>> model= GemmoeForCausalLM.from_pretrained("google/GEMMA-7b")
|
1220 |
>>> tokenizer = AutoTokenizer.from_pretrained("google/GEMMA-7b")
|
|
|
1221 |
>>> prompt = "What is your favorite condiment?"
|
1222 |
>>> inputs = tokenizer(prompt, return_tensors="pt")
|
1223 |
|
|
|
1382 |
)
|
1383 |
return reordered_past
|
1384 |
|
|
|
1385 |
@add_start_docstrings(
|
1386 |
"""
|
1387 |
The Gemmoe Model transformer with a sequence classification head on top (linear layer).
|
|
|
1446 |
output_attentions=output_attentions,
|
1447 |
output_hidden_states=output_hidden_states,
|
1448 |
return_dict=return_dict,
|
1449 |
+
cache_position=None,
|
1450 |
)
|
1451 |
hidden_states = transformer_outputs[0]
|
1452 |
logits = self.score(hidden_states)
|