Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- k128-sae-mlp-32k-seed2/config.json +1 -0
- k128-sae-mlp-32k-seed2/layers.6.mlp/cfg.json +1 -0
- k128-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors +3 -0
- k128-sae-mlp-32k-seed2/lr_scheduler.pt +3 -0
- k128-sae-mlp-32k-seed2/optimizer.pt +3 -0
- k128-sae-mlp-32k-seed2/state.pt +3 -0
- k128-sae-mlp-32k/config.json +1 -0
- k128-sae-mlp-32k/layers.0/cfg.json +1 -0
- k128-sae-mlp-32k/layers.0/sae.safetensors +3 -0
- k128-sae-mlp-32k/layers.3/cfg.json +1 -0
- k128-sae-mlp-32k/layers.3/sae.safetensors +3 -0
- k128-sae-mlp-32k/layers.6.mlp/cfg.json +1 -0
- k128-sae-mlp-32k/layers.6.mlp/sae.safetensors +3 -0
- k128-sae-mlp-32k/layers.6/cfg.json +1 -0
- k128-sae-mlp-32k/layers.6/sae.safetensors +3 -0
- k128-sae-mlp-32k/layers.9/cfg.json +1 -0
- k128-sae-mlp-32k/layers.9/sae.safetensors +3 -0
- k128-sae-mlp-32k/lr_scheduler.pt +3 -0
- k128-sae-mlp-32k/optimizer.pt +3 -0
- k128-sae-mlp-32k/state.pt +3 -0
- k256-gpt2-exp36-seed2/h.6/cfg.json +1 -0
- k256-gpt2-exp36-seed2/h.6/sae.safetensors +3 -0
- k256-gpt2-exp36/config.json +1 -0
- k256-gpt2-exp36/h.6/cfg.json +1 -0
- k256-gpt2-exp36/h.6/sae.safetensors +3 -0
- k256-gpt2-exp36/lr_scheduler.pt +3 -0
- k256-gpt2-exp36/optimizer.pt +3 -0
- k256-gpt2-exp36/state.pt +3 -0
- k256-sae-mlp-32k-seed2/config.json +1 -0
- k256-sae-mlp-32k-seed2/layers.0/cfg.json +1 -0
- k256-sae-mlp-32k-seed2/layers.0/sae.safetensors +3 -0
- k256-sae-mlp-32k-seed2/layers.3/cfg.json +1 -0
- k256-sae-mlp-32k-seed2/layers.3/sae.safetensors +3 -0
- k256-sae-mlp-32k-seed2/layers.6.mlp/cfg.json +1 -0
- k256-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors +3 -0
- k256-sae-mlp-32k-seed2/layers.6/cfg.json +1 -0
- k256-sae-mlp-32k-seed2/layers.6/sae.safetensors +3 -0
- k256-sae-mlp-32k-seed2/layers.9/cfg.json +1 -0
- k256-sae-mlp-32k-seed2/layers.9/sae.safetensors +3 -0
- k256-sae-mlp-32k-seed2/lr_scheduler.pt +3 -0
- k256-sae-mlp-32k-seed2/optimizer.pt +3 -0
- k256-sae-mlp-32k-seed2/state.pt +3 -0
- k256-sae-mlp-32k/config.json +1 -0
- k256-sae-mlp-32k/layers.6.mlp/cfg.json +1 -0
- k256-sae-mlp-32k/layers.6.mlp/sae.safetensors +3 -0
- k256-sae-mlp-32k/lr_scheduler.pt +3 -0
- k256-sae-mlp-32k/optimizer.pt +3 -0
- k256-sae-mlp-32k/state.pt +3 -0
- k256-sae-mlp-4k-seed2/config.json +1 -0
- k256-sae-mlp-4k-seed2/layers.6.mlp/cfg.json +1 -0
k128-sae-mlp-32k-seed2/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "init_seeds": [12], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k128-sae-mlp-32k-seed2", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
|
k128-sae-mlp-32k-seed2/layers.6.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k128-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:982e007f252a2a25ec7c710d4aa0f91c885105a4b1f306a29014fcae08791f34
|
3 |
+
size 201461072
|
k128-sae-mlp-32k-seed2/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4
|
3 |
+
size 1012
|
k128-sae-mlp-32k-seed2/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d2b3073f5967404890d8a79cbd64d43bb0d7b879689950b689c218dcd77656a
|
3 |
+
size 102316366
|
k128-sae-mlp-32k-seed2/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:737bd1d625a5ef15ed9dfef1d3972156ee7a9eeb778f2e3314a0eb79957c340e
|
3 |
+
size 263314
|
k128-sae-mlp-32k/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k128-sae-mlp-32k", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48}
|
k128-sae-mlp-32k/layers.0/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k128-sae-mlp-32k/layers.0/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:381e53945c4e729d56340c87186304271066abfbaaec1daea36570cb7305ba72
|
3 |
+
size 201461072
|
k128-sae-mlp-32k/layers.3/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k128-sae-mlp-32k/layers.3/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e196f4c0d8dcecbcec820ecaf99740a7d57953e9e4ffeda6ab6706ef527e161
|
3 |
+
size 201461072
|
k128-sae-mlp-32k/layers.6.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k128-sae-mlp-32k/layers.6.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be708e5c0f44bb82718ec6131451a2261ce932632a46891e752f0e5ca5ccd54c
|
3 |
+
size 201461072
|
k128-sae-mlp-32k/layers.6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k128-sae-mlp-32k/layers.6/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:841debc7b22d1c18f30034cfd317c17ad5335cbeea90963138e8d6672455a9f6
|
3 |
+
size 201461072
|
k128-sae-mlp-32k/layers.9/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 128, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k128-sae-mlp-32k/layers.9/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfbe2c11cc22575ff61cdfe7217224065696228ac8dbe4e4b0bbb7b32b3ce072
|
3 |
+
size 201461072
|
k128-sae-mlp-32k/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4
|
3 |
+
size 1012
|
k128-sae-mlp-32k/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c57f542b71d5e3f8ac02a440577311bee95125ed3a6fe25cb1c426e743f436e1
|
3 |
+
size 102316366
|
k128-sae-mlp-32k/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0320a194ff05c45b557bd1f8239ddf3513695a8b17d1a50512bac5eb72e2f6b0
|
3 |
+
size 263314
|
k256-gpt2-exp36-seed2/h.6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k256-gpt2-exp36-seed2/h.6/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32d9338237034971b45396431c0631a8e3d2e50e78b9a4541e5dc16d4ce25a56
|
3 |
+
size 169983304
|
k256-gpt2-exp36/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["h.6"], "init_seeds": [23, 2], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-gpt2-exp36", "wandb_log_frequency": 1, "model": "openai-community/gpt2", "dataset": "Skylion007/openwebtext", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
|
k256-gpt2-exp36/h.6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 36, "normalize_decoder": true, "num_latents": 0, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k256-gpt2-exp36/h.6/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b703f22d92c8f998a44a756e843cfe57e3d822e72b2509b178547e006c7feab
|
3 |
+
size 169983304
|
k256-gpt2-exp36/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b6c10047d444b83edad95e97d140ba3caa158f05ea2fcc70e7ead72b318db6c
|
3 |
+
size 1076
|
k256-gpt2-exp36/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f701e3797f8a65ec9593c3c4cb571badb5656e01dc4cd1e883638731b82a1f
|
3 |
+
size 172659328
|
k256-gpt2-exp36/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9c839c952a20382a40df09ee7e54964e9325753e0981fea02fb600abc2dab03
|
3 |
+
size 443724
|
k256-sae-mlp-32k-seed2/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-sae-mlp-32k-seed2", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 5, "number_seeds": 1, "data_preprocessing_num_proc": 48}
|
k256-sae-mlp-32k-seed2/layers.0/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k256-sae-mlp-32k-seed2/layers.0/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8131653080e96b66b9d30ce0ee7677f72cbf8819060d73b2d2f2a37b25340357
|
3 |
+
size 201461072
|
k256-sae-mlp-32k-seed2/layers.3/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k256-sae-mlp-32k-seed2/layers.3/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4879c86e864cedc4443017bd665e3e15bfa580bc9a82028beeb24408a32f3e9d
|
3 |
+
size 201461072
|
k256-sae-mlp-32k-seed2/layers.6.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k256-sae-mlp-32k-seed2/layers.6.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8101a6175d09bc3df7a1e2a279e48420a4420b6353b864b543a104875a26938f
|
3 |
+
size 201461072
|
k256-sae-mlp-32k-seed2/layers.6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k256-sae-mlp-32k-seed2/layers.6/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2dfc911a48ea63fdc6447df969fe85fa7271190d56ddff72859b13250de9644a
|
3 |
+
size 201461072
|
k256-sae-mlp-32k-seed2/layers.9/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k256-sae-mlp-32k-seed2/layers.9/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:874273198e718a5742f0b4356c43e4448e23e329a43fb32e81fe94fe1b4407f6
|
3 |
+
size 201461072
|
k256-sae-mlp-32k-seed2/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4
|
3 |
+
size 1012
|
k256-sae-mlp-32k-seed2/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:193cb2b91a48d8aacbf04f4676a1bd79a0a0e6b768e59e883c904e6e8dbf566f
|
3 |
+
size 102316366
|
k256-sae-mlp-32k-seed2/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cc77cffb9fe1350dafac9e95c6aee317cac10f58094321e9955bc6ed8f5a2db
|
3 |
+
size 263314
|
k256-sae-mlp-32k/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-sae-mlp-32k", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 5, "number_seeds": 1, "data_preprocessing_num_proc": 48}
|
k256-sae-mlp-32k/layers.6.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 32768, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|
k256-sae-mlp-32k/layers.6.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4344706585162c50e35c2ba9e6124db8a62be22c981b2d145e6f2779aa1fb9fd
|
3 |
+
size 201461072
|
k256-sae-mlp-32k/lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a645210450394a692a612289cb5fe097161dfa420f3c634f6516bc67841ac2b4
|
3 |
+
size 1012
|
k256-sae-mlp-32k/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd3163d8e255e7f61e19f4e8289374c76e105631da8aea840d5017dd5b6444b9
|
3 |
+
size 102316366
|
k256-sae-mlp-32k/state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50c5737c16bf78e4e78cbc9284778cdf6e04acd915501c1957827310ddd71b98
|
3 |
+
size 263314
|
k256-sae-mlp-4k-seed2/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 256, "multi_topk": false, "skip_connection": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.6.mlp"], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "k256-sae-mlp-4k-seed2", "wandb_log_frequency": 1, "model": "EleutherAI/pythia-160m", "dataset": "/mnt/ssd-1/pile_preshuffled/standard/document.bin", "split": "train", "ctx_len": 2049, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 4000000, "resume": false, "finetune": null, "seed": 42, "number_seeds": 1, "data_preprocessing_num_proc": 48}
|
k256-sae-mlp-4k-seed2/layers.6.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 32, "normalize_decoder": true, "num_latents": 4084, "k": 256, "multi_topk": false, "skip_connection": false, "d_in": 768}
|