Update modeling_opt.py
Browse files- modeling_opt.py +4 -4
modeling_opt.py
CHANGED
@@ -1086,8 +1086,8 @@ class OPTDecoder(OPTPreTrainedModel):
|
|
1086 |
batch_size, mask_seq_length, device=inputs_embeds.device)
|
1087 |
elif attention_mask.shape[1] != mask_seq_length:
|
1088 |
raise ValueError(
|
1089 |
-
f
|
1090 |
-
attention_mask.shape[1]}, but its length should be
|
1091 |
f"{mask_seq_length} (sum of the lengths of current and past inputs)"
|
1092 |
)
|
1093 |
causal_attention_mask = _prepare_4d_causal_attention_mask(
|
@@ -1119,8 +1119,8 @@ class OPTDecoder(OPTPreTrainedModel):
|
|
1119 |
if attn_mask is not None:
|
1120 |
if attn_mask.size()[0] != (len(self.layers)):
|
1121 |
raise ValueError(
|
1122 |
-
f
|
1123 |
-
len(self.layers)} layers, but it is for
|
1124 |
f" {head_mask.size()[0]}."
|
1125 |
)
|
1126 |
|
|
|
1086 |
batch_size, mask_seq_length, device=inputs_embeds.device)
|
1087 |
elif attention_mask.shape[1] != mask_seq_length:
|
1088 |
raise ValueError(
|
1089 |
+
f'''The provided attention mask has length {
|
1090 |
+
attention_mask.shape[1]}, but its length should be '''
|
1091 |
f"{mask_seq_length} (sum of the lengths of current and past inputs)"
|
1092 |
)
|
1093 |
causal_attention_mask = _prepare_4d_causal_attention_mask(
|
|
|
1119 |
if attn_mask is not None:
|
1120 |
if attn_mask.size()[0] != (len(self.layers)):
|
1121 |
raise ValueError(
|
1122 |
+
f'''The `{mask_name}` should be specified for {
|
1123 |
+
len(self.layers)} layers, but it is for'''
|
1124 |
f" {head_mask.size()[0]}."
|
1125 |
)
|
1126 |
|