tinystories-1M-SAES / gelu-2l-L1-z-cat-sweep-anthropic-resample /gelu-2l_L0_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v12_cfg.json
Connor
add L0 runs
d801037
raw
history blame contribute delete
827 Bytes
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 0, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "resample_scheme": "anthropic", "anthropic_neuron_resample_scale": 0.2, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "gelu-2l-L1-z-cat-sweep-anthropic-resample", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_0_16384_z"}