jeffra commited on
Commit
5d68b84
1 Parent(s): 125e72e

Upload modeling_arctic.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_arctic.py +2 -2
modeling_arctic.py CHANGED
@@ -56,7 +56,7 @@ from transformers.utils import (
56
  )
57
  from transformers.utils.import_utils import is_torch_fx_available
58
  from .configuration_arctic import ArcticConfig
59
- from transformers.integrations.deepspeed import is_deepspeed_available
60
  from transformers.utils.versions import require_version
61
 
62
  if is_deepspeed_available():
@@ -354,7 +354,7 @@ class ArcticAttention(nn.Module):
354
  ds_optimized_quantization_config=quantization_config,
355
  ds_optimized_base_weight_sharding=True,
356
  dtype=torch.bfloat16)
357
- self.o_proj = get_arctic_linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False,
358
  use_deepspeed_implementation=self.use_deepspeed_implementation,
359
  ds_optimized_lora_config=deepspeed_lora_config,
360
  ds_optimized_quantization_config=quantization_config,
 
56
  )
57
  from transformers.utils.import_utils import is_torch_fx_available
58
  from .configuration_arctic import ArcticConfig
59
+ from transformers.integrations.deepspeed import is_deepspeed_available
60
  from transformers.utils.versions import require_version
61
 
62
  if is_deepspeed_available():
 
354
  ds_optimized_quantization_config=quantization_config,
355
  ds_optimized_base_weight_sharding=True,
356
  dtype=torch.bfloat16)
357
+ self.o_proj = get_arctic_linear(self.hidden_size, self.hidden_size, bias=False,
358
  use_deepspeed_implementation=self.use_deepspeed_implementation,
359
  ds_optimized_lora_config=deepspeed_lora_config,
360
  ds_optimized_quantization_config=quantization_config,