ZZichen commited on
Commit
dbeed0f
1 Parent(s): a3dcf52

Update modeling_deepseek.py

Browse files
Files changed (1) hide show
  1. modeling_deepseek.py +2 -2
modeling_deepseek.py CHANGED
@@ -422,10 +422,10 @@ class MoEGate(nn.Module):
422
  ### compute gating score
423
  hidden_states = hidden_states.view(-1, h)
424
  logits = F.linear(
425
- hidden_states.type(torch.float32), self.weight.type(torch.float32), None
426
  )
427
  if self.scoring_func == "softmax":
428
- scores = logits.softmax(dim=-1, dtype=torch.float32)
429
  else:
430
  raise NotImplementedError(
431
  f"insupportable scoring function for MoE gating: {self.scoring_func}"
 
422
  ### compute gating score
423
  hidden_states = hidden_states.view(-1, h)
424
  logits = F.linear(
425
+ hidden_states, self.weight, None
426
  )
427
  if self.scoring_func == "softmax":
428
+ scores = logits.softmax(dim=-1)
429
  else:
430
  raise NotImplementedError(
431
  f"insupportable scoring function for MoE gating: {self.scoring_func}"