tinystories-1M-SAES / gelu-2l_1_16384_z_2_cfg.json
3v324v23's picture
add hook_z concat
70891a6
raw
history blame
475 Bytes
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 200000000, "l1_coeff": 0.215, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": false, "concat_heads": true, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}