belerico commited on
Commit
4f939b2
·
verified ·
1 Parent(s): 5bc7f13

Upload folder using huggingface_hub

Browse files
baseline-10/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "jumprelu": false, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
baseline-10/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67380d850002887d194d9a3633961d3a1c3086d3c1c5f10cebd97aaf04c34bf4
3
+ size 679634256
baseline-6/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "jumprelu": false, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
baseline-6/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe25020847299da53dce0b3bd3754948b5a701201a0b9c283a3e60cecb06afd
3
+ size 679634256
baseline-7/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "jumprelu": false, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
baseline-7/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8432c2115b8061c9e73ea507d37b2c5d15f3e00233ae7886f837ddc6f31eda6d
3
+ size 679634256
baseline-8/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "jumprelu": false, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
baseline-8/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba1e1aeb88ce21dc5a2c72ad34e16ba0111b011d1a91a623e9e3ebc1f5206f68
3
+ size 679634256
baseline-9/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "jumprelu": false, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
baseline-9/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7940564d4b31b4683cf77eb1dacd692460f2f66c3385d5a79c14d2e02ed0e2c8
3
+ size 679634256
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "jumprelu": false, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false}, "batch_size": 4, "max_seq_len": 1024, "num_training_tokens": 1000000000, "cycle_iterator": true, "grad_acc_steps": 1, "micro_acc_steps": 1, "adam_8bit": false, "adam_epsilon": 1e-08, "adam_betas": [0.9, 0.999], "lr": 0.0012, "lr_scheduler_name": "constant", "lr_warmup_steps": 0.0, "l1_coefficient": 0.0, "l1_warmup_steps": 0.0, "use_l2_loss": true, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0", "layers.1", "layers.2", "layers.3", "layers.4", "layers.5", "layers.6", "layers.7", "layers.8", "layers.9", "layers.10"], "layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "layer_stride": 1, "distribute_modules": false, "save_every": 10000, "normalize_activations": 1, "num_norm_estimation_tokens": 2000000, "clusters": {"k6-c2": [6, 7, 8, 9, 10], "baseline-6": [6], "baseline-7": [7], "baseline-8": [8], "baseline-9": [9], "baseline-10": [10]}, "cluster_hookpoints": {"k6-c2": ["layers.6", "layers.7", "layers.8", "layers.9", "layers.10"], "baseline-6": ["layers.6"], "baseline-7": ["layers.7"], "baseline-8": ["layers.8"], "baseline-9": ["layers.9"], "baseline-10": ["layers.10"]}, "hook": null, "resume_from": null, "keep_last_n": 1, "log_to_wandb": true, "run_name": "checkpoints-clusters/google/gemma-2-2b-1024-topk-128-lambda-0.0-target-L0-None-lr-0.0012", "wandb_log_frequency": 1}
k6-c2/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "jumprelu": false, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false, "d_in": 2304}
k6-c2/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409961fc4784c6a2d4615febe6982fc9fb63a0000c57491dd4221e188d85128a
3
+ size 679634256
scaling_factors.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05e6e5029edbc7705f386f17d553413a15d16742bbdb27ce42d8adc635953c81
3
+ size 1152
state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db5bac701621082abbf949d3d405c648a4f4b0b98a70081d0670a2ea54428583
3
+ size 1771636