zwt123home123
commited on
Commit
•
647d21a
1
Parent(s):
bc5c1e8
Update modeling_llama_LUT_prerope_prefill.py
Browse files
modeling_llama_LUT_prerope_prefill.py
CHANGED
@@ -319,7 +319,7 @@ class LlamaAttention(nn.Module):
|
|
319 |
self._init_rope()
|
320 |
self.lut_layer = 40
|
321 |
if self.layer_idx<self.lut_layer:
|
322 |
-
self.key_states_centroid = torch.tensor(np.load("
|
323 |
|
324 |
def _init_rope(self):
|
325 |
if self.config.rope_scaling is None:
|
|
|
319 |
self._init_rope()
|
320 |
self.lut_layer = 40
|
321 |
if self.layer_idx<self.lut_layer:
|
322 |
+
self.key_states_centroid = torch.tensor(np.load("centroids_faiss_K_c10k_bs1m_iter_20_nonorm_all_layers_prerope/"+str(self.layer_idx)+".npy")).half().cuda()
|
323 |
|
324 |
def _init_rope(self):
|
325 |
if self.config.rope_scaling is None:
|