Spaces:
Runtime error
Runtime error
File size: 14,581 Bytes
438fb10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 |
#!/usr/bin/env python3
from huggingface_hub import model_info
import argparse
from copy import deepcopy
import inspect
from logging import warn
from pathlib import Path
from tqdm import tqdm
import json
from tuned_lens.model_surgery import get_final_norm, get_transformer_layers
from tuned_lens.load_artifacts import load_lens_artifacts
from tuned_lens.nn import TunedLens
from transformers.models.bloom.modeling_bloom import BloomBlock
from transformers import PreTrainedModel, AutoModelForCausalLM
from typing import Optional, Generator, Union
import torch as th
from tuned_lens.stats.distance import js_divergence
def instantiate_layer(model_config, layer_idx: int, model_type: str) -> th.nn.Module:
if model_type == "bloom":
from transformers.models.bloom.modeling_bloom import BloomBlock
return _BloomBlockWrapper(BloomBlock(model_config)) # type: ignore[arg-type]
if model_type == "gpt_neo":
from transformers.models.gpt_neo.modeling_gpt_neo import GPTNeoBlock
return GPTNeoBlock(model_config, layer_idx)
if model_type == "gpt_neox":
from transformers.models.gpt_neox.modeling_gpt_neox import (
GPTNeoXLayer,
)
return GPTNeoXLayer(model_config) # type: ignore[arg-type]
if model_type == "gpt2":
from transformers.models.gpt2.modeling_gpt2 import GPT2Block
return GPT2Block(model_config, layer_idx) # type: ignore[arg-type]
if model_type == "opt":
from transformers.models.opt.modeling_opt import OPTDecoderLayer
return OPTDecoderLayer(model_config) # type: ignore[arg-type]
else:
raise ValueError(f"Unknown model type '{model_type}'")
def maybe_wrap(layer: th.nn.Module) -> th.nn.Module:
return _BloomBlockWrapper(layer) if isinstance(layer, BloomBlock) else layer
# Very annoying that we have to do this. See https://bit.ly/3XSQ7W6 for context on
# what we're doing here.
class _BloomBlockWrapper(th.nn.Module):
def __init__(self, block: BloomBlock):
super().__init__()
self.block = block
def forward(self, x: th.Tensor) -> th.Tensor:
from transformers.models.bloom.modeling_bloom import (
BloomModel,
build_alibi_tensor,
)
batch_size, seq_len, _ = x.shape
dummy_mask = x.new_ones([batch_size, seq_len])
# Causal mask isn't created inside the block itself, so we have to do it here.
# Weirdly _prepare_attn_mask doesn't depend on `self` at all but is still an
# instance method for some reason, so we pass `None` as the first argument.
causal_mask = BloomModel._prepare_attn_mask(
None, dummy_mask, (batch_size, seq_len), 0 # type: ignore[arg-type]
)
alibi = build_alibi_tensor(dummy_mask, self.block.num_heads, x.dtype)
h, *_ = self.block(x, alibi, causal_mask)
return h
class TunedLensOld(th.nn.Module):
"""A tuned lens for decoding hidden states into logits."""
layer_norm: th.nn.LayerNorm
unembedding: th.nn.Linear
extra_layers: th.nn.Sequential
layer_translators: th.nn.ModuleList
def __init__(
self,
model: Optional[PreTrainedModel] = None,
*,
bias: bool = True,
extra_layers: int = 0,
include_input: bool = True,
reuse_unembedding: bool = True,
# Used when saving and loading the lens
model_config: Optional[dict] = None,
d_model: Optional[int] = None,
num_layers: Optional[int] = None,
vocab_size: Optional[int] = None,
):
"""Create a TunedLensOld.
Args:
model : A pertained model from the transformers library you wish to inspect.
bias : Whether to include a bias term in the translator layers.
extra_layers : The number of extra layers to apply to the hidden states
before decoding into logits.
include_input : Whether to include a lens that decodes the word embeddings.
reuse_unembedding : Weather to reuse the unembedding matrix from the model.
model_config : The config of the model. Used for saving and loading.
d_model : The models hidden size. Used for saving and loading.
num_layers : The number of layers in the model. Used for saving and loading.
vocab_size : The size of the vocabulary. Used for saving and loading.
Raises:
ValueError: if neither a model or d_model, num_layers, and vocab_size,
are provided.
"""
super().__init__()
self.extra_layers = th.nn.Sequential()
if (
model
is None
== (d_model is None or num_layers is None or vocab_size is None)
):
raise ValueError(
"Must provide either a model or d_model, num_layers, and vocab_size"
)
# Initializing from scratch without a model
if not model:
assert d_model and num_layers and vocab_size
self.layer_norm = th.nn.LayerNorm(d_model)
self.unembedding = th.nn.Linear(d_model, vocab_size, bias=False)
# Use HuggingFace methods to get decoder layers
else:
assert not (d_model or num_layers or vocab_size)
d_model = model.config.hidden_size
num_layers = model.config.num_hidden_layers
vocab_size = model.config.vocab_size
assert isinstance(d_model, int) and isinstance(vocab_size, int)
model_config = model.config.to_dict() # type: ignore[F841]
# Currently we convert the decoder to full precision
self.unembedding = deepcopy(model.get_output_embeddings()).float()
if ln := get_final_norm(model):
self.layer_norm = deepcopy(ln).float()
else:
self.layer_norm = th.nn.Identity()
if extra_layers:
_, layers = get_transformer_layers(model)
self.extra_layers.extend(
[maybe_wrap(layer) for layer in layers[-extra_layers:]]
)
# Save config for later
config_keys = set(inspect.getfullargspec(TunedLensOld).kwonlyargs)
self.config = {k: v for k, v in locals().items() if k in config_keys}
del model_config
# Try to prevent finetuning the decoder
assert d_model and num_layers
self.layer_norm.requires_grad_(False)
self.unembedding.requires_grad_(False)
out_features = d_model if reuse_unembedding else vocab_size
translator = th.nn.Linear(d_model, out_features, bias=bias)
if not reuse_unembedding:
translator.weight.data = self.unembedding.weight.data.clone()
translator.bias.data.zero_()
else:
translator.weight.data.zero_()
translator.bias.data.zero_()
self.add_module("input_translator", translator if include_input else None)
# Don't include the final layer
num_layers -= 1
self.layer_translators = th.nn.ModuleList(
[deepcopy(translator) for _ in range(num_layers)]
)
def __getitem__(self, item: int) -> th.nn.Module:
"""Get the probe module at the given index."""
if isinstance(self.input_translator, th.nn.Module):
if item == 0:
return self.input_translator
else:
item -= 1
return self.layer_translators[item]
def __iter__(self) -> Generator[th.nn.Module, None, None]:
"""Get iterator over the translators within the lens."""
if isinstance(self.input_translator, th.nn.Module):
yield self.input_translator
yield from self.layer_translators
@classmethod
def load(cls, resource_id: str, **kwargs) -> "TunedLensOld":
"""Load a tuned lens from a or hugging face hub.
Args:
resource_id : The path to the directory containing the config and checkpoint
or the name of the model on the hugging face hub.
**kwargs : Additional arguments to pass to torch.load.
Returns:
A TunedLensOld instance.
"""
config_path, ckpt_path = load_lens_artifacts(resource_id)
# Load config
with open(config_path, "r") as f:
config = json.load(f)
# Load parameters
state = th.load(ckpt_path, **kwargs)
# Backwards compatibility we really need to stop renaming things
keys = list(state.keys())
for key in keys:
for old_key in ["probe", "adapter"]:
if old_key in key:
warn(
f"Loading a checkpoint with a '{old_key}' key. "
"This is deprecated and may be removed in a future version. "
)
new_key = key.replace(old_key, "translator")
state[new_key] = state.pop(key)
# Drop unrecognized config keys
unrecognized = set(config) - set(inspect.getfullargspec(cls).kwonlyargs)
for key in unrecognized:
warn(f"Ignoring config key '{key}'")
del config[key]
lens = cls(**config)
if num_extras := config.get("extra_layers"):
# This is sort of a hack but AutoConfig doesn't appear to have a from_dict
# for some reason.
from transformers.models.auto import CONFIG_MAPPING
model_conf_dict = config.get("model_config")
del model_conf_dict["torch_dtype"]
assert model_conf_dict, "Need a 'model_config' entry to load extra layers"
model_type = model_conf_dict["model_type"]
config_cls = CONFIG_MAPPING[model_type]
model_config = config_cls.from_dict(model_conf_dict)
lens.extra_layers = th.nn.Sequential(
*[
instantiate_layer(
model_config, model_config.num_hidden_layers - i - 1, model_type
)
for i in range(num_extras)
]
)
lens.load_state_dict(state)
return lens
def save(
self,
path: Union[Path, str],
ckpt: str = "params.pt",
config: str = "config.json",
) -> None:
"""Save the lens to a directory.
Args:
path : The path to the directory to save the lens to.
ckpt : The name of the checkpoint file to save the parameters to.
config : The name of the config file to save the config to.
"""
path = Path(path)
path.mkdir(exist_ok=True, parents=True)
th.save(self.state_dict(), path / ckpt)
with open(path / config, "w") as f:
json.dump(self.config, f)
def normalize_(self):
"""Canonicalize the transforms by centering their weights and biases."""
for linear in self:
assert isinstance(linear, th.nn.Linear)
A, b = linear.weight.data, linear.bias.data
A -= A.mean(dim=0, keepdim=True)
b -= b.mean()
def transform_hidden(self, h: th.Tensor, idx: int) -> th.Tensor:
"""Transform hidden state from layer `idx`."""
if not self.config["reuse_unembedding"]:
raise RuntimeError("TunedLensOld.transform_hidden requires reuse_unembedding")
# Note that we add the translator output residually, in contrast to the formula
# in the paper. By parametrizing it this way we ensure that weight decay
# regularizes the transform toward the identity, not the zero transformation.
return h + self[idx](h)
def to_logits(self, h: th.Tensor) -> th.Tensor:
"""Decode a hidden state into logits."""
h = self.extra_layers(h)
while isinstance(h, tuple):
h, *_ = h
return self.unembedding(self.layer_norm(h))
def forward(self, h: th.Tensor, idx: int) -> th.Tensor:
"""Transform and then decode the hidden states into logits."""
# Sanity check to make sure we don't finetune the decoder
# if any(p.requires_grad for p in self.parameters(recurse=False)):
# raise RuntimeError("Make sure to freeze the decoder")
# We're learning a separate unembedding for each layer
if not self.config["reuse_unembedding"]:
h_ = self.layer_norm(h)
return self[idx](h_)
h = self.transform_hidden(h, idx)
return self.to_logits(h)
def __len__(self) -> int:
"""Return the number of layer translators in the lens."""
N = len(self.layer_translators)
if self.input_translator:
N += 1
return N
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="gpt2")
parser.add_argument("--resource-id", type=str, default="gpt2")
parser.add_argument("--output-dir", type=str, default="lens/gpt2")
args = parser.parse_args()
model = AutoModelForCausalLM.from_pretrained(args.model)
revision = model_info(args.model).sha
model.eval()
model.requires_grad_(False)
device = th.device("cuda:0" if th.cuda.is_available() else "cpu")
print("Loading old lens")
tuned_lens_old = TunedLensOld.load(args.resource_id, map_location=device)
print("Initializing new lens")
tuned_lens = TunedLens.from_model(
model, bias=tuned_lens_old.config['bias'], revision=revision
)
for i in tqdm(range(len(tuned_lens_old)), desc="Copying parameters"):
tuned_lens[i].load_state_dict(tuned_lens_old[i].state_dict())
tuned_lens = tuned_lens.to(device)
tuned_lens_old = tuned_lens_old.to(device)
model = model.to(device)
# Fuzz the new lens against the old one's
with th.no_grad():
for i in tqdm(range(len(tuned_lens)), desc="Fuzzing layers"):
for _ in range(10):
a = th.randn(1, 1, tuned_lens.config.d_model, device=device)
logits_new = tuned_lens(a, i)
logits_old = tuned_lens_old(a, i)
log_ps_new = logits_new.log_softmax(-1)
log_ps_old = logits_old.log_softmax(-1)
print("js div", js_divergence(log_ps_new, log_ps_old))
assert (th.allclose(log_ps_new, log_ps_old, atol=1e-4)), (log_ps_new - log_ps_old).abs().max()
print("Saving new lens to", args.output_dir)
tuned_lens.to(th.device("cpu")).save(args.output_dir)
|