Spaces:

taka-yamakoshi
/

causal-intervention-demo

Runtime error

taka-yamakoshi commited on Mar 22, 2023

Commit

c1ed878

1 Parent(s): 9149baa

impl interv

Files changed (1) hide show

custom_modeling_albert_flax.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import Callable, Optional, Tuple
 import numpy as np
@@ -88,6 +89,27 @@ class CustomFlaxAlbertSelfAttention(nn.Module):
             hidden_states.shape[:2] + (self.config.num_attention_heads, head_dim)
         )
         # Convert the boolean attention mask to an attention bias.
         if attention_mask is not None:
             # attention mask in the form of attention bias

 from typing import Callable, Optional, Tuple
+from copy import deepcopy
 import numpy as np
             hidden_states.shape[:2] + (self.config.num_attention_heads, head_dim)
         )
+        reps = {
+                'lay': hidden_states,
+                'qry': query_states,
+                'key': key_states,
+                'val': value_states,
+                }
+        if layer_id in interv_dict:
+            interv = interv_dict[layer_id]
+            for rep_name in ['lay','qry','key','val']:
+                if rep_name in interv:
+                    new_state = deepcopy(reps[rep_name])
+                    for head_id, pos, swap_ids in interv[rep_name]:
+                        new_state[swap_ids[0],pos,head_id] = reps[rep_name][swap_ids[1],pos,head_id]
+                        new_state[swap_ids[1],pos,head_id] = reps[rep_name][swap_ids[0],pos,head_id]
+                    reps[rep_name] = deepcopy(new_state)
+        hidden_states = deepcopy(reps['lay'])
+        query_states = deepcopy(reps['qry'])
+        key_states = deepcopy(reps['key'])
+        value_states = deepcopy(reps['val'])
         # Convert the boolean attention mask to an attention bias.
         if attention_mask is not None:
             # attention mask in the form of attention bias