Spaces:

zjunlp
/

EasyEdit

Running

App Files Files Community

EasyEdit / easyeditor /models /grace /GRACE.py

ZJUPeng

initial commit

3494c6b 5 months ago

raw

history blame

22.4 kB

	# import torch
	# from .utils import parent_module, brackets_to_periods
	# import transformers
	# import os
	# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

	# def euc(query, key):
	# # Euclidean distance
	# if len(key.shape) < 2:
	# key = key.view(1, -1)
	# return torch.cdist(key, query, p=2)

	# def perturb_values(chosen_value, num_pert, device):
	# # Create a bunch of noised versions of the value, then create batch, then train value
	# chosen_value = chosen_value
	# noise = torch.normal(0, 1, chosen_value.shape, device=device)
	# noise[0] = noise[0]*0
	# noise.requires_grad = True
	# chosen_value = chosen_value + noise
	# return chosen_value

	# class GRACE(torch.nn.Module):
	# def __init__(self, config, model, device):
	# super(GRACE, self).__init__()
	# self.config = config
	# self.log_dict = {}
	# self.model = model
	# # self.tokenizer = model.tokenizer
	# layer = config.inner_params[0]
	# self.device = device

	# # --- ensure proper formatting (GRACE edits ~layers~ not weights matrices) ---
	# suffixes = [".weight", ".bias"]
	# self.layer = layer.rsplit(".", 1)[0] if any(layer.endswith(x) for x in suffixes) else layer

	# for n, p in self.model.named_parameters():
	# p.requires_grad = False

	# if isinstance(self.model, transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel):
	# transpose = False
	# else:
	# transpose = True

	# # --- Add GRACE to chosen layers ---
	# edit_module = parent_module(self.model, brackets_to_periods(self.layer))
	# layer_name = self.layer.rsplit(".", 1)[-1]
	# original_layer = getattr(edit_module, layer_name)

	# if type(original_layer) is not GRACEAdapter:
	# setattr(edit_module, layer_name, GRACEAdapter(config, original_layer, transpose=transpose).to(self.device))

	# def __call__(self, **kwargs):
	# # if self.config.task == "hallucination":
	# # print(kwargs)
	# # key_id = (kwargs["labels"] == -100).sum() - 1
	# # setattr(eval(f"self.model.{self.layer}"), "key_id", key_id) # Tell GRACE which token to use for its query (default is the last token)
	# return self.model(**kwargs)

	# def generate(self, args, *kwargs):
	# setattr(eval(f"self.model.{self.layer}"), "key_id", -1)
	# return self.model.generate(args, *kwargs)

	# def edit(self, config, tokens):
	# key_id = (tokens["labels"] == -100).sum() - 1
	# setattr(eval(f"self.model.{self.layer}"), "key_id", key_id)

	# # --- pass edit label, training mode, and key_id into GRACE ---
	# setattr(eval(f"self.model.{self.layer}"), "training", True)
	# setattr(eval(f"self.model.{self.layer}"), "edit_label", tokens["labels"])

	# self.losses = []
	# # --- train GRACE value ---
	# for i in range(config.n_iter):
	# # --- insert iteration into each layer (only initiate keys on iteration 1) ---
	# setattr(eval(f"self.model.{self.layer}"), "iter", i)

	# # --- pass tokens through model (including through the GRACE layer) ---
	# outputs = self.model(**tokens)
	# if i == 0:
	# # --- we only need to create an optimizer for the first iteration (but forward pass instantiates the key, so optimzer is passed after first inference) ---
	# optimizer = torch.optim.Adam(self.model.parameters(), config.edit_lr)
	# loss = outputs.loss
	# loss.backward()
	# optimizer.step()
	# optimizer.zero_grad()
	# self.losses.append(loss.detach().cpu().numpy())

	# self.loss = loss # Log final loss

	# # --- pull out info we want to log from the GRACE layer ---
	# setattr(eval(f"self.model.{self.layer}"), "training", False)
	# chosen_key = getattr(eval(f"self.model.{self.layer}"), "chosen_key")
	# nkeys = len(getattr(eval(f"self.model.{self.layer}"), "keys"))

	# self.log_dict["chosen_key"] = chosen_key
	# self.log_dict["nkeys"] = nkeys

	# class GRACEAdapter(torch.nn.Module):
	# def __init__(self, config, layer, transpose):
	# super(GRACEAdapter, self).__init__()

	# self.layer = layer
	# self.weight = self.layer.weight
	# self.init_epsilon = config.eps
	# self.dist_fn = config.dist_fn
	# self.replacement = config.replacement
	# self.device = layer.weight.device
	# self.config = config
	# self.num_pert = config.num_pert
	# self.key_id = -1
	# self.ensure_replace_token_loc = False

	# if transpose:
	# self.key_shape = layer.weight.shape[1]
	# self.value_shape = layer.weight.shape[0]
	# else:
	# self.key_shape = layer.weight.shape[0]
	# self.value_shape = layer.weight.shape[1]
	# self.training = False

	# def add_key(self, new_key, new_value):
	# keys = torch.vstack([self.keys, new_key.detach()]) # Add new key to list of keys

	# values = torch.nn.Parameter(torch.vstack([self.values, new_value]), requires_grad=True) # Add new value to list of values

	# new_epsilon = torch.tensor(self.init_epsilon, device=self.device).view(1)
	# epsilons = torch.vstack([self.epsilons, new_epsilon]) # Add new epsilon to list of epsilons

	# key_labels = self.key_labels + [self.edit_label] # Add new key_label to list of key_labels

	# return keys, values, epsilons, key_labels

	# def init_key_value(self, query, value):
	# key = query.detach()
	# epsilon = torch.tensor(self.init_epsilon, device=self.device, requires_grad=False).view(1)
	# key_label = [self.edit_label]
	# return key, value, epsilon, key_label

	# def label_match(self, edit_label, key_label):
	# return edit_label.float().mean() == key_label.float().mean()

	# def split_epsilons_in_half(self, nearest_key, smallest_distance):
	# self.epsilons[nearest_key] = (smallest_distance / 2) - 1e-5 # Cut nearest epsilon in half
	# self.epsilons[-1] = smallest_distance / 2 # Cut new epsilon in half

	# def forward(self, *args):
	# # Run layer forward and save what it would have returned for this instance
	# layer_out = self.layer(*args)

	# ### If training, we need to modify the codebook
	# if (not self.training) & ('keys' not in self.__dict__):
	# # If it's not training time and we haven't added any keys yet (this is before doing any editing)
	# # print(self.__dict__)
	# return layer_out
	# else:
	# if not self.training and not self.ensure_replace_token_loc and self.key_id == -1:
	# token_to_edit = args[0].shape[1]-1
	# self.key_id = args[0].shape[1]-1
	# self.ensure_replace_token_loc = True
	# else:
	# token_to_edit = min(self.key_id, args[0].shape[1]-1) # args[0].shape[1] - 1 is sequence length
	# query = args[0][:, token_to_edit, :] # Just use activation for last token
	# if self.config.val_init == "cold":
	# new_value = torch.nn.Parameter(torch.rand(1, self.value_shape, requires_grad=True, device=self.device))
	# elif self.config.val_init == "warm":
	# new_value = torch.nn.Parameter(layer_out[:, token_to_edit, :].detach(), requires_grad=True)

	# if 'keys' not in self.__dict__:
	# # If no keys exist, initialize keys, values, epsilons, and key labels
	# self.keys, self.values, self.epsilons, self.key_labels = self.init_key_value(query, new_value)
	# elif self.iter == 0:
	# # Keys exist, so we have decide whether or not to update them (the fact that we've made it to this point means there was an error!)

	# # --- search through keys for a match for query ---
	# dists = torch.cdist(self.keys, query, p=2).view(-1, len(query))
	# smallest_distance, nearest_key = dists.min(0)

	# if smallest_distance > (self.init_epsilon + self.epsilons[nearest_key]):
	# # If there's no close key, make a new key
	# self.keys, self.values, self.epsilons, self.key_labels = self.add_key(query, new_value)
	# else:
	# # If there is a close key, we need to handle conflicts
	# if not self.label_match(self.edit_label, self.key_labels[nearest_key]):
	# self.keys, self.values, self.epsilons, self.key_labels = self.add_key(query, new_value)
	# self.split_epsilons_in_half(nearest_key, smallest_distance)
	# else:
	# # If the current label is the SAME as the nearest label, just make the nearest epsilon bigger
	# if smallest_distance > self.epsilons[nearest_key]:
	# if self.config.eps_expand== "coverage":
	# self.epsilons[nearest_key] = smallest_distance # Replace nearest epsilon with dist between old key and new key
	# elif self.config.eps_expand == "moving_average":
	# a = 0.5
	# self.keys[nearest_key] = aself.keys[nearest_key] + (1-a)query # Move old key to be halfway between
	# self.epsilons[nearest_key] = smallest_distance
	# # self.epsilons[nearest_key] = smallest_distance + self.init_epsilon
	# else:
	# # If not iter 0, we don't need to change keys, we just need to learn the value
	# pass
	# # print(token_to_edit)
	# # compute distance from query to all keys and find the closest keys
	# dists = torch.cdist(self.keys, query, p=2).view(-1, len(query))
	# smallest_dist, self.chosen_key = dists.min(0)
	# smallest_dist = smallest_dist.view(-1, 1)
	# chosen_value = self.values[self.chosen_key]
	# eps = self.epsilons[self.chosen_key].view(-1, 1)

	# if (self.config.val_train == "adv") and (self.training):
	# chosen_value = perturb_values(chosen_value, self.num_pert, self.device)

	# if self.replacement == "replace_all":
	# layer_out = torch.where((smallest_dist <= eps).view(-1, 1, 1), chosen_value.unsqueeze(1).repeat_interleave(layer_out.shape[1], 1), layer_out)
	# elif self.replacement == "replace_last":
	# layer_out[:, token_to_edit] = torch.where((smallest_dist <= eps), chosen_value, layer_out[:, token_to_edit])
	# elif self.replacement == "replace_prompt":
	# layer_out[:, :token_to_edit] = torch.where((smallest_dist <= eps), chosen_value, layer_out[:, :token_to_edit])
	# else:
	# print("token replacement choice not found")
	# return layer_out
	import copy

	import torch
	from .utils import parent_module, brackets_to_periods
	import transformers
	import os
	os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

	def euc(query, key):
	# Euclidean distance
	if len(key.shape) < 2:
	key = key.view(1, -1)
	return torch.cdist(key, query, p=2)

	def perturb_values(chosen_value, num_pert, device):
	# Create a bunch of noised versions of the value, then create batch, then train value
	chosen_value = chosen_value
	noise = torch.normal(0, 1, chosen_value.shape, device=device)
	noise[0] = noise[0]*0
	noise.requires_grad = True
	chosen_value = chosen_value + noise
	return chosen_value

	class GRACE(torch.nn.Module):
	def __init__(self, config, model, device):
	super(GRACE, self).__init__()
	self.config = config
	self.log_dict = {}
	self.model = model
	self.config = config
	# self.tokenizer = model.tokenizer
	layer = config.inner_params[0]
	self.device = device
	self.original_layer = None

	# --- ensure proper formatting (GRACE edits ~layers~ not weights matrices) ---
	suffixes = [".weight", ".bias"]
	self.layer = layer.rsplit(".", 1)[0] if any(layer.endswith(x) for x in suffixes) else layer

	for n, p in self.model.named_parameters():
	p.requires_grad = False

	if isinstance(self.model, transformers.models.gpt2.modeling_gpt2.GPT2LMHeadModel):
	transpose = False
	else:
	transpose = True

	# --- Add GRACE to chosen layers ---
	edit_module = parent_module(self.model, brackets_to_periods(self.layer))
	layer_name = self.layer.rsplit(".", 1)[-1]
	original_layer = getattr(edit_module, layer_name)
	if type(original_layer) is not GRACEAdapter:
	setattr(edit_module, layer_name, GRACEAdapter(config, original_layer, transpose=transpose).to(self.device))
	self.original_layer = copy.deepcopy(original_layer)

	def __call__(self, **kwargs):
	# if self.config.task == "hallucination":
	# print(kwargs)
	# key_id = (kwargs["labels"] == -100).sum() - 1
	# setattr(eval(f"self.model.{self.layer}"), "key_id", key_id) # Tell GRACE which token to use for its query (default is the last token)
	return self.model(**kwargs)

	def reset_layer(self):
	layer_name = self.layer.rsplit(".", 1)[-1]
	edit_module = parent_module(self.model, brackets_to_periods(self.layer))
	setattr(edit_module, layer_name, self.original_layer.to(self.device))

	def generate(self, args, *kwargs):
	setattr(eval(f"self.model.{self.layer}"), "key_id", -1)
	return self.model.generate(args, *kwargs)

	def edit(self, config, tokens):
	key_id = (tokens["labels"] == -100).sum() - 1
	setattr(eval(f"self.model.{self.layer}"), "key_id", key_id)

	# --- pass edit label, training mode, and key_id into GRACE ---
	setattr(eval(f"self.model.{self.layer}"), "training", True)
	setattr(eval(f"self.model.{self.layer}"), "edit_label", tokens["labels"])

	self.losses = []
	# --- train GRACE value ---
	for i in range(config.n_iter):
	# --- insert iteration into each layer (only initiate keys on iteration 1) ---
	setattr(eval(f"self.model.{self.layer}"), "iter", i)

	# --- pass tokens through model (including through the GRACE layer) ---
	outputs = self.model(**tokens)
	if i == 0:
	# --- we only need to create an optimizer for the first iteration (but forward pass instantiates the key, so optimzer is passed after first inference) ---
	optimizer = torch.optim.Adam(self.model.parameters(), config.edit_lr)
	loss = outputs.loss
	loss.backward()
	optimizer.step()
	optimizer.zero_grad()
	self.losses.append(loss.detach().cpu().numpy())

	self.loss = loss # Log final loss

	# --- pull out info we want to log from the GRACE layer ---
	setattr(eval(f"self.model.{self.layer}"), "training", False)
	chosen_key = getattr(eval(f"self.model.{self.layer}"), "chosen_key")
	nkeys = len(getattr(eval(f"self.model.{self.layer}"), "keys"))

	self.log_dict["chosen_key"] = chosen_key
	self.log_dict["nkeys"] = nkeys

	class GRACEAdapter(torch.nn.Module):
	def __init__(self, config, layer, transpose):
	super(GRACEAdapter, self).__init__()

	self.layer = layer
	self.weight = self.layer.weight
	self.init_epsilon = config.eps
	self.dist_fn = config.dist_fn
	self.replacement = config.replacement
	self.device = layer.weight.device
	self.config = config
	self.num_pert = config.num_pert
	self.key_id = -1
	self.ensure_replace_token_loc = False

	if transpose:
	self.key_shape = layer.weight.shape[1]
	self.value_shape = layer.weight.shape[0]
	else:
	self.key_shape = layer.weight.shape[0]
	self.value_shape = layer.weight.shape[1]
	self.training = False

	def add_key(self, new_key, new_value):
	keys = torch.vstack([self.keys, new_key.detach()]) # Add new key to list of keys

	values = torch.nn.Parameter(torch.vstack([self.values, new_value]), requires_grad=True) # Add new value to list of values

	new_epsilon = torch.tensor(self.init_epsilon, device=self.device).view(1)
	epsilons = torch.vstack([self.epsilons, new_epsilon]) # Add new epsilon to list of epsilons

	key_labels = self.key_labels + [self.edit_label] # Add new key_label to list of key_labels

	return keys, values, epsilons, key_labels

	def init_key_value(self, query, value):
	key = query.detach()
	epsilon = torch.tensor(self.init_epsilon, device=self.device, requires_grad=False).view(1)
	key_label = [self.edit_label]
	return key, value, epsilon, key_label

	def label_match(self, edit_label, key_label):
	return edit_label.float().mean() == key_label.float().mean()

	def split_epsilons_in_half(self, nearest_key, smallest_distance):
	self.epsilons[nearest_key] = (smallest_distance / 2) - 1e-5 # Cut nearest epsilon in half
	self.epsilons[-1] = smallest_distance / 2 # Cut new epsilon in half

	def forward(self, *args):
	# Run layer forward and save what it would have returned for this instance
	layer_out = self.layer(*args)

	### If training, we need to modify the codebook
	if (not self.training) & ('keys' not in self.__dict__):
	# If it's not training time and we haven't added any keys yet (this is before doing any editing)
	# print(self.__dict__)
	return layer_out
	else:
	if not self.training and not self.ensure_replace_token_loc and self.key_id == -1:
	token_to_edit = args[0].shape[1]-1
	self.key_id = args[0].shape[1]-1
	self.ensure_replace_token_loc = True
	else:
	token_to_edit = min(self.key_id, args[0].shape[1]-1) # args[0].shape[1] - 1 is sequence length
	query = args[0][:, token_to_edit, :] # Just use activation for last token
	if self.config.val_init == "cold":
	new_value = torch.nn.Parameter(torch.rand(1, self.value_shape, requires_grad=True, device=self.device))
	elif self.config.val_init == "warm":
	new_value = torch.nn.Parameter(layer_out[:, token_to_edit, :].detach(), requires_grad=True)

	if 'keys' not in self.__dict__:
	# If no keys exist, initialize keys, values, epsilons, and key labels
	self.keys, self.values, self.epsilons, self.key_labels = self.init_key_value(query, new_value)
	elif self.iter == 0:
	# Keys exist, so we have decide whether or not to update them (the fact that we've made it to this point means there was an error!)

	# --- search through keys for a match for query ---
	dists = torch.cdist(self.keys, query, p=2).view(-1, len(query))
	smallest_distance, nearest_key = dists.min(0)

	if smallest_distance > (self.init_epsilon + self.epsilons[nearest_key]):
	# If there's no close key, make a new key
	self.keys, self.values, self.epsilons, self.key_labels = self.add_key(query, new_value)
	else:
	# If there is a close key, we need to handle conflicts
	if not self.label_match(self.edit_label, self.key_labels[nearest_key]):
	self.keys, self.values, self.epsilons, self.key_labels = self.add_key(query, new_value)
	self.split_epsilons_in_half(nearest_key, smallest_distance)
	else:
	# If the current label is the SAME as the nearest label, just make the nearest epsilon bigger
	if smallest_distance > self.epsilons[nearest_key]:
	if self.config.eps_expand== "coverage":
	self.epsilons[nearest_key] = smallest_distance # Replace nearest epsilon with dist between old key and new key
	elif self.config.eps_expand == "moving_average":
	a = 0.5
	self.keys[nearest_key] = aself.keys[nearest_key] + (1-a)query # Move old key to be halfway between
	self.epsilons[nearest_key] = smallest_distance
	# self.epsilons[nearest_key] = smallest_distance + self.init_epsilon
	else:
	# If not iter 0, we don't need to change keys, we just need to learn the value
	pass
	# print(token_to_edit)
	# compute distance from query to all keys and find the closest keys
	dists = torch.cdist(self.keys, query, p=2).view(-1, len(query))
	smallest_dist, self.chosen_key = dists.min(0)
	smallest_dist = smallest_dist.view(-1, 1)
	chosen_value = self.values[self.chosen_key]
	eps = self.epsilons[self.chosen_key].view(-1, 1)

	if (self.config.val_train == "adv") and (self.training):
	chosen_value = perturb_values(chosen_value, self.num_pert, self.device)

	if self.replacement == "replace_all":
	layer_out = torch.where((smallest_dist <= eps).view(-1, 1, 1), chosen_value.unsqueeze(1).repeat_interleave(layer_out.shape[1], 1), layer_out)
	elif self.replacement == "replace_last":
	layer_out[:, token_to_edit] = torch.where((smallest_dist <= eps), chosen_value, layer_out[:, token_to_edit])
	elif self.replacement == "replace_prompt":
	layer_out[:, :token_to_edit] = torch.where((smallest_dist <= eps), chosen_value, layer_out[:, :token_to_edit])
	else:
	print("token replacement choice not found")
	return layer_out