zwt123home123 commited on
Commit
647d21a
1 Parent(s): bc5c1e8

Update modeling_llama_LUT_prerope_prefill.py

Browse files
modeling_llama_LUT_prerope_prefill.py CHANGED
@@ -319,7 +319,7 @@ class LlamaAttention(nn.Module):
319
  self._init_rope()
320
  self.lut_layer = 40
321
  if self.layer_idx<self.lut_layer:
322
- self.key_states_centroid = torch.tensor(np.load("/sensei-fs-3/users/wezhao/projects/KVcache/cluster/centroids_faiss_K_c10k_bs1m_iter_20_nonorm_all_layers_prerope/"+str(self.layer_idx)+".npy")).half().cuda()
323
 
324
  def _init_rope(self):
325
  if self.config.rope_scaling is None:
 
319
  self._init_rope()
320
  self.lut_layer = 40
321
  if self.layer_idx<self.lut_layer:
322
+ self.key_states_centroid = torch.tensor(np.load("centroids_faiss_K_c10k_bs1m_iter_20_nonorm_all_layers_prerope/"+str(self.layer_idx)+".npy")).half().cuda()
323
 
324
  def _init_rope(self):
325
  if self.config.rope_scaling is None: