|
import gguf |
|
import numpy as np |
|
from sklearn.decomposition import PCA |
|
import tqdm |
|
|
|
|
|
def load_hidden_states(path): |
|
|
|
print("\nyour mom\n") |
|
'''Load hidden states produced by the llama.cpp ./repeng tool.''' |
|
gguf_file = gguf.GGUFReader(path) |
|
print("\nyour dad\n") |
|
|
|
hidden_states = {} |
|
for t in gguf_file.tensors: |
|
if not t.name.startswith('l_out-'): |
|
continue |
|
layer = int(t.name[len('l_out-'):]) |
|
assert layer not in hidden_states, 'duplicate hidden states for layer %d' % layer |
|
data = t.data.reshape((t.shape[1], t.shape[0])) |
|
hidden_states[layer] = data |
|
|
|
return hidden_states |
|
|
|
def project_onto_direction(H, direction): |
|
"""Project matrix H (n, d_1) onto direction vector (d_2,)""" |
|
mag = np.linalg.norm(direction) |
|
assert not np.isinf(mag) |
|
return (H @ direction) / mag |
|
|
|
def read_representations( |
|
layer_hiddens: dict[int, np.ndarray], |
|
) -> dict[int, np.ndarray]: |
|
""" |
|
Extract the representations based on the contrast dataset. |
|
""" |
|
|
|
hidden_layers = sorted(layer_hiddens.keys()) |
|
num_inputs = next(iter(layer_hiddens.values())).shape[0] // 2 |
|
print('%d inputs' % num_inputs) |
|
|
|
|
|
relative_layer_hiddens = {} |
|
for layer in hidden_layers: |
|
relative_layer_hiddens[layer] = ( |
|
layer_hiddens[layer][::2] - layer_hiddens[layer][1::2] |
|
) |
|
|
|
|
|
directions: dict[int, np.ndarray] = {} |
|
for layer in tqdm.tqdm(hidden_layers): |
|
assert layer_hiddens[layer].shape[0] == num_inputs * 2 |
|
|
|
|
|
train = np.vstack( |
|
relative_layer_hiddens[layer] |
|
- relative_layer_hiddens[layer].mean(axis=0, keepdims=True) |
|
) |
|
pca_model = PCA(n_components=1, whiten=False).fit(train) |
|
|
|
directions[layer] = pca_model.components_.astype(np.float32).squeeze(axis=0) |
|
|
|
|
|
projected_hiddens = project_onto_direction( |
|
layer_hiddens[layer], directions[layer] |
|
) |
|
|
|
|
|
positive_smaller_mean = np.mean( |
|
[ |
|
projected_hiddens[i] < projected_hiddens[i + 1] |
|
for i in range(0, num_inputs * 2, 2) |
|
] |
|
) |
|
positive_larger_mean = np.mean( |
|
[ |
|
projected_hiddens[i] > projected_hiddens[i + 1] |
|
for i in range(0, num_inputs * 2, 2) |
|
] |
|
) |
|
|
|
if positive_smaller_mean > positive_larger_mean: |
|
directions[layer] *= -1 |
|
|
|
return directions |
|
|
|
def export_gguf(directions, path: str): |
|
""" |
|
Export a trained ControlVector to a llama.cpp .gguf file. |
|
""" |
|
|
|
arch = "controlvector" |
|
writer = gguf.GGUFWriter(path, arch) |
|
|
|
|
|
for layer in directions.keys(): |
|
if layer == 0: |
|
|
|
|
|
continue |
|
writer.add_tensor(f"direction.{layer}", directions[layer]) |
|
writer.write_header_to_file() |
|
writer.write_kv_data_to_file() |
|
writer.write_tensors_to_file() |
|
writer.close() |
|
|
|
def test_model(model_name, directions): |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from repeng import ControlVector, ControlModel |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
tokenizer.pad_token_id = 0 |
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16) |
|
model = model.to("cuda:0" if torch.cuda.is_available() |
|
else "mps:0" if torch.backends.mps.is_available() |
|
else "cpu") |
|
model = ControlModel(model, list(range(-5, -18, -1))) |
|
|
|
control_vector = ControlVector(model.config.model_type, directions) |
|
|
|
user_tag, asst_tag = "[INST]", "[/INST]" |
|
|
|
|
|
|
|
input = f"{user_tag} What are human beings like? {asst_tag}" |
|
|
|
|
|
input_ids = tokenizer(input, return_tensors="pt").to(model.device) |
|
settings = { |
|
"pad_token_id": tokenizer.eos_token_id, |
|
"do_sample": False, |
|
"max_new_tokens": 128, |
|
"repetition_penalty": 1.1, |
|
} |
|
|
|
print("==baseline") |
|
model.reset() |
|
print(tokenizer.decode(model.generate(**input_ids, **settings).squeeze())) |
|
|
|
print("\n++control") |
|
|
|
model.set_control(control_vector, 1.0) |
|
print(tokenizer.decode(model.generate(**input_ids, **settings).squeeze())) |
|
|
|
print("\n--control") |
|
|
|
|
|
|
|
model.set_control(control_vector, -1.0) |
|
print(tokenizer.decode(model.generate(**input_ids, **settings).squeeze())) |
|
model.reset() |
|
|
|
|
|
print("\nLoad hidden shit\n") |
|
hidden_states = load_hidden_states('control_vector_data.gguf') |
|
print("\nHidden shit loaded\n") |
|
directions = read_representations(hidden_states) |
|
print("\nExport this motherfucker\n") |
|
export_gguf(directions, 'control_vector.gguf') |
|
|
|
TEST_MODEL_NAME = 'mistralai/Mistral-7B-Instruct-v0.1' |
|
|
|
|