OpenNLPLab commited on
Commit
a5b83ec
1 Parent(s): 1da34bf

Update modeling_transnormer.py

Browse files
Files changed (1) hide show
  1. modeling_transnormer.py +0 -40
modeling_transnormer.py CHANGED
@@ -734,43 +734,6 @@ class TransnormerModel(TransnormerPreTrainedModel):
734
  slope_rate = slope_rates[idx]
735
  slope_rate = slope_rate * (1 - idx / (self.num_layers - 1) + 1e-5)
736
  mask = linear_attn_mask
737
-
738
- # if self.gradient_checkpointing and self.training:
739
-
740
- # def create_custom_forward(module):
741
- # def custom_forward(*inputs):
742
- # # None for past_key_value
743
- # return module(*inputs, output_attentions, None)
744
-
745
- # return custom_forward
746
-
747
- # # layer_outputs = torch.utils.checkpoint.checkpoint(
748
- # # create_custom_forward(layer),
749
- # # hidden_states,
750
- # # mask,
751
- # # linear_attn_padding_mask,
752
- # # None,
753
- # # )
754
- # layer_outputs = torch.utils.checkpoint.checkpoint(
755
- # create_custom_forward(layer),
756
- # hidden_states,
757
- # mask,
758
- # linear_attn_padding_mask,
759
- # None,
760
- # output_attentions,
761
- # use_cache,
762
- # slope_rate,
763
- # )
764
- # else:
765
- # layer_outputs = layer(
766
- # hidden_states,
767
- # attn_mask=mask,
768
- # attn_padding_mask=linear_attn_padding_mask,
769
- # past_key_value=past_key_value,
770
- # output_attentions=output_attentions,
771
- # use_cache=use_cache,
772
- # slope_rate=slope_rate,
773
- # )
774
 
775
  layer_outputs = layer(
776
  hidden_states,
@@ -789,9 +752,6 @@ class TransnormerModel(TransnormerPreTrainedModel):
789
 
790
  if output_attentions:
791
  all_self_attns += (layer_outputs[1],)
792
-
793
- # if idx == 0:
794
- # break
795
 
796
  hidden_states = self.final_norm(hidden_states)
797
 
 
734
  slope_rate = slope_rates[idx]
735
  slope_rate = slope_rate * (1 - idx / (self.num_layers - 1) + 1e-5)
736
  mask = linear_attn_mask
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
737
 
738
  layer_outputs = layer(
739
  hidden_states,
 
752
 
753
  if output_attentions:
754
  all_self_attns += (layer_outputs[1],)
 
 
 
755
 
756
  hidden_states = self.final_norm(hidden_states)
757