{ "trainer": { "trainer_class": "BatchTopKCrossCoderTrainer", "dict_class": "BatchTopKCrossCoder", "lr": 0.0001, "steps": 97656, "auxk_alpha": 0.03125, "warmup_steps": 1000, "decay_start": null, "threshold_beta": 0.999, "threshold_start_step": 1000, "top_k_aux": 1152, "seed": null, "activation_dim": 2304, "dict_size": 73728, "k": 100, "sparsity_loss_type": "LossType.CROSSCODER", "sparsity_loss_alpha_sae": 1.0, "sparsity_loss_alpha_cc": 0.1, "device": "cuda", "layer": 13, "lm_name": "google/gemma-2-2b-it-google/gemma-2-2b", "wandb_name": "gemma-2-2b-L13-k100-lr1e-04-local-shuffling-CCLoss", "submodule_name": null } }