zwt123home123
/

13b_K_LUT_c10k_d1m_prerope

zwt123home123 commited on 10 days ago

Commit

647d21a

•

1 Parent(s): bc5c1e8

Update modeling_llama_LUT_prerope_prefill.py

Files changed (1) hide show

modeling_llama_LUT_prerope_prefill.py CHANGED Viewed

@@ -319,7 +319,7 @@ class LlamaAttention(nn.Module):
         self._init_rope()
         self.lut_layer = 40
         if self.layer_idx<self.lut_layer:
-            self.key_states_centroid = torch.tensor(np.load("/sensei-fs-3/users/wezhao/projects/KVcache/cluster/centroids_faiss_K_c10k_bs1m_iter_20_nonorm_all_layers_prerope/"+str(self.layer_idx)+".npy")).half().cuda()
     def _init_rope(self):
         if self.config.rope_scaling is None:

         self._init_rope()
         self.lut_layer = 40
         if self.layer_idx<self.lut_layer:
+            self.key_states_centroid = torch.tensor(np.load("centroids_faiss_K_c10k_bs1m_iter_20_nonorm_all_layers_prerope/"+str(self.layer_idx)+".npy")).half().cuda()
     def _init_rope(self):
         if self.config.rope_scaling is None: