Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Forward_TL/33zj1f4a/1351680/cfg.json +1 -0
- Forward_TL/33zj1f4a/1351680/sae_weights.safetensors +3 -0
- Forward_TL/33zj1f4a/1351680/sparsity.safetensors +3 -0
- Forward_TL/6s2vc5va/100003840/cfg.json +1 -0
- Forward_TL/6s2vc5va/100003840/sae_weights.safetensors +3 -0
- Forward_TL/6s2vc5va/100003840/sparsity.safetensors +3 -0
- Forward_TL/6s2vc5va/200003584/cfg.json +1 -0
- Forward_TL/6s2vc5va/200003584/sae_weights.safetensors +3 -0
- Forward_TL/6s2vc5va/200003584/sparsity.safetensors +3 -0
- Forward_TL/6s2vc5va/300003328/cfg.json +1 -0
- Forward_TL/6s2vc5va/300003328/sae_weights.safetensors +3 -0
- Forward_TL/6s2vc5va/300003328/sparsity.safetensors +3 -0
- Forward_TL/6s2vc5va/400003072/cfg.json +1 -0
- Forward_TL/6s2vc5va/400003072/sae_weights.safetensors +3 -0
- Forward_TL/6s2vc5va/400003072/sparsity.safetensors +3 -0
- Forward_TL/6s2vc5va/final_500002816/cfg.json +1 -0
- Forward_TL/6s2vc5va/final_500002816/sae_weights.safetensors +3 -0
- Forward_TL/6s2vc5va/final_500002816/sparsity.safetensors +3 -0
- Forward_TL/b6n3tsnf/100003840/cfg.json +1 -0
- Forward_TL/b6n3tsnf/100003840/sae_weights.safetensors +3 -0
- Forward_TL/b6n3tsnf/100003840/sparsity.safetensors +3 -0
- Forward_TL/b6n3tsnf/200003584/cfg.json +1 -0
- Forward_TL/b6n3tsnf/200003584/sae_weights.safetensors +3 -0
- Forward_TL/b6n3tsnf/200003584/sparsity.safetensors +3 -0
- Forward_TL/b6n3tsnf/300003328/cfg.json +1 -0
- Forward_TL/b6n3tsnf/300003328/sae_weights.safetensors +3 -0
- Forward_TL/b6n3tsnf/300003328/sparsity.safetensors +3 -0
- Forward_TL/b6n3tsnf/400003072/cfg.json +1 -0
- Forward_TL/b6n3tsnf/400003072/sae_weights.safetensors +3 -0
- Forward_TL/b6n3tsnf/400003072/sparsity.safetensors +3 -0
- Forward_TL/b6n3tsnf/final_500002816/cfg.json +1 -0
- Forward_TL/b6n3tsnf/final_500002816/sae_weights.safetensors +3 -0
- Forward_TL/b6n3tsnf/final_500002816/sparsity.safetensors +3 -0
- Forward_TL/blne5ifx/100003840/cfg.json +1 -0
- Forward_TL/blne5ifx/100003840/sae_weights.safetensors +3 -0
- Forward_TL/blne5ifx/100003840/sparsity.safetensors +3 -0
- Forward_TL/blne5ifx/200003584/cfg.json +1 -0
- Forward_TL/blne5ifx/200003584/sae_weights.safetensors +3 -0
- Forward_TL/blne5ifx/200003584/sparsity.safetensors +3 -0
- Forward_TL/blne5ifx/300003328/cfg.json +1 -0
- Forward_TL/blne5ifx/300003328/sae_weights.safetensors +3 -0
- Forward_TL/blne5ifx/300003328/sparsity.safetensors +3 -0
- Forward_TL/blne5ifx/400003072/cfg.json +1 -0
- Forward_TL/blne5ifx/400003072/sae_weights.safetensors +3 -0
- Forward_TL/blne5ifx/400003072/sparsity.safetensors +3 -0
- Forward_TL/blne5ifx/final_500002816/cfg.json +1 -0
- Forward_TL/blne5ifx/final_500002816/sae_weights.safetensors +3 -0
- Forward_TL/blne5ifx/final_500002816/sparsity.safetensors +3 -0
- Forward_TL/hqc3zt9g/100003840/cfg.json +1 -0
- Forward_TL/hqc3zt9g/100003840/sae_weights.safetensors +3 -0
Forward_TL/33zj1f4a/1351680/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.10.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 10, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L9", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L10_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/33zj1f4a", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/33zj1f4a/1351680/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e94a5e833e3060f33c561558b262392a2da1b6705da4d27a5270933388a51dd
|
3 |
+
size 37801344
|
Forward_TL/33zj1f4a/1351680/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7753bb93b0250a4961d76e5895d6b03e43a9f8663a514cd1a7c8d97894637ad2
|
3 |
+
size 24656
|
Forward_TL/6s2vc5va/100003840/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/6s2vc5va", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/6s2vc5va/100003840/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1c6fe2415252cf190af2cfa66def158aa016efe048d9118a6a9e01cce07bd19
|
3 |
+
size 37801344
|
Forward_TL/6s2vc5va/100003840/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:154ec55449800692839c1e332a94beaf8c9260b5a0e6d7757ead91d159431116
|
3 |
+
size 24656
|
Forward_TL/6s2vc5va/200003584/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/6s2vc5va", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/6s2vc5va/200003584/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:479e8d53a4de3697a9e1aa23ac60d8d5e9fdd2352c8ba9bdc39844e3c735a75f
|
3 |
+
size 37801344
|
Forward_TL/6s2vc5va/200003584/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f77c97270ae3582c849a2c209918bcb47d3d3f05d07d4433ec0a04c8278815d1
|
3 |
+
size 24656
|
Forward_TL/6s2vc5va/300003328/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/6s2vc5va", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/6s2vc5va/300003328/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ffa323947044fbc7d9eb21a8b4e7f6213db0e07f46fd0e1c85aa52405ad03b1
|
3 |
+
size 37801344
|
Forward_TL/6s2vc5va/300003328/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f452a250d1c1a864cee7ad8289c33de4b95626ae6d416c9bf32c631f0db37ba
|
3 |
+
size 24656
|
Forward_TL/6s2vc5va/400003072/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/6s2vc5va", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/6s2vc5va/400003072/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2b4bc50fb1aff654e724e2a9f0bfa38009837ebe9a6e09ae739475fc3702431
|
3 |
+
size 37801344
|
Forward_TL/6s2vc5va/400003072/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04e1d336928e92cc0c050e1bc74928cdcecf13704c6585320043e7e505cd9512
|
3 |
+
size 24656
|
Forward_TL/6s2vc5va/final_500002816/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/6s2vc5va", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/6s2vc5va/final_500002816/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a34ff9264370aaba2935d6d2943e4e94e460babb7385b60d9765ad6e5a13545
|
3 |
+
size 37801344
|
Forward_TL/6s2vc5va/final_500002816/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90daeec0a1a7e4dca8267f35171f84c61d99a08d356411ee37248e75f1f4eade
|
3 |
+
size 24656
|
Forward_TL/b6n3tsnf/100003840/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.11.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 11, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L11_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/b6n3tsnf", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/b6n3tsnf/100003840/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6adea5bd9a8645fe0cd881e55de4a9ce6ca8b2d09829e67f6a1e952ba3d36b83
|
3 |
+
size 37801344
|
Forward_TL/b6n3tsnf/100003840/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abe802a8188fe084c69aea93c99812d0d6b7a4406e4cf5f860bd484b5a073449
|
3 |
+
size 24656
|
Forward_TL/b6n3tsnf/200003584/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.11.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 11, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L11_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/b6n3tsnf", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/b6n3tsnf/200003584/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0eea23cd1997e30d5c8b86fddbf7405cc54d5a5fc3eebc29236fade4c3c725bb
|
3 |
+
size 37801344
|
Forward_TL/b6n3tsnf/200003584/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74ae2659c67e2408ccdc44b30da31289656fde82f8e9329d0d0d879ed9495622
|
3 |
+
size 24656
|
Forward_TL/b6n3tsnf/300003328/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.11.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 11, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L11_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/b6n3tsnf", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/b6n3tsnf/300003328/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ddf2aaea67f7a301ade94f89fefd0ddb6f77967cef932fc01c4f9127944a05b1
|
3 |
+
size 37801344
|
Forward_TL/b6n3tsnf/300003328/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:054bd93c3f13b2cfaca585ec810d85f983057fa9567a94dfa70e7ba19d0676ea
|
3 |
+
size 24656
|
Forward_TL/b6n3tsnf/400003072/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.11.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 11, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L11_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/b6n3tsnf", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/b6n3tsnf/400003072/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aca7570aec8770477d6b8a086778b8abea6f765046a106d029aab7891a8888fa
|
3 |
+
size 37801344
|
Forward_TL/b6n3tsnf/400003072/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42d7f8dac03916b330f2a5521296f4f744589d82a19a2ce90ccbae8ae07e9cd9
|
3 |
+
size 24656
|
Forward_TL/b6n3tsnf/final_500002816/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.11.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 11, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L11_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/b6n3tsnf", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/b6n3tsnf/final_500002816/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be193dee14169955d78669513b5a2370a7e27bedda1fdcd18fb57cfcad09450b
|
3 |
+
size 37801344
|
Forward_TL/b6n3tsnf/final_500002816/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11cf36b0a5546fa71450bb30ee9fa0007b84b9a30b4f813c700a81abce61a3b0
|
3 |
+
size 24656
|
Forward_TL/blne5ifx/100003840/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/blne5ifx", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/blne5ifx/100003840/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60af76fe6802592609864cd968e2bbd8dfcd115cef194b8e721526f4278969ac
|
3 |
+
size 37801344
|
Forward_TL/blne5ifx/100003840/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1815b7b64de697f3b7fdb537d7d9a48657deaf14f2fd2e8c1cb2b1b9fb3bc87c
|
3 |
+
size 24656
|
Forward_TL/blne5ifx/200003584/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/blne5ifx", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/blne5ifx/200003584/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6631f2a17894881bef7119cffd9988721f8c2b75685554fe717d0f46c612074
|
3 |
+
size 37801344
|
Forward_TL/blne5ifx/200003584/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb5d6f7be6e09f1469e0a3f6b9804c70de1f0b26ff3486e9ea9f59efbbec98a0
|
3 |
+
size 24656
|
Forward_TL/blne5ifx/300003328/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/blne5ifx", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/blne5ifx/300003328/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9a396a5d5f2669a2d86b5d2fb977eddf5182d0b1717df2a32aa14758deddf09
|
3 |
+
size 37801344
|
Forward_TL/blne5ifx/300003328/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5d9a841bf278ad3c54978256aaba031a4df5c2365006f9ba91bfd2c360a1036
|
3 |
+
size 24656
|
Forward_TL/blne5ifx/400003072/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/blne5ifx", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/blne5ifx/400003072/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b5a6162aa25f3ed51226eb899e0e9c31cbf11d35be4666b4a8acc9d01fb93fa
|
3 |
+
size 37801344
|
Forward_TL/blne5ifx/400003072/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72991c09f28ed2449ab395392fa6ba504f7571b28a3fc974752458ab90d5e585
|
3 |
+
size 24656
|
Forward_TL/blne5ifx/final_500002816/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/blne5ifx", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/blne5ifx/final_500002816/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e9566556ec0e978dcdbc8f65402473c2b6b94f87d53ccdc1b1991509a106670
|
3 |
+
size 37801344
|
Forward_TL/blne5ifx/final_500002816/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:328caa01ae7c0ef81f7e6fca48e310e23a7aa05902bbf7853f28242f91e0b00a
|
3 |
+
size 24656
|
Forward_TL/hqc3zt9g/100003840/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.9.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 9, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L8", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L9_hook_resid_post_L1_1_FW", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/hqc3zt9g", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912}
|
Forward_TL/hqc3zt9g/100003840/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5b08b2c356bec6aa20ab492c5028287992be95d64d81fa8ada457564297a532
|
3 |
+
size 37801344
|