unassigned commited on
Commit
c833498
1 Parent(s): 41c65b6

add 2b token runs

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15.pt +3 -0
  2. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15_cfg.json +1 -0
  3. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23.pt +3 -0
  4. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23_cfg.json +1 -0
  5. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31.pt +3 -0
  6. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31_cfg.json +1 -0
  7. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39.pt +3 -0
  8. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39_cfg.json +1 -0
  9. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47.pt +3 -0
  10. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47_cfg.json +1 -0
  11. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55.pt +3 -0
  12. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55_cfg.json +1 -0
  13. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63.pt +3 -0
  14. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63_cfg.json +1 -0
  15. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7.pt +3 -0
  16. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71.pt +3 -0
  17. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71_cfg.json +1 -0
  18. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79.pt +3 -0
  19. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79_cfg.json +1 -0
  20. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7_cfg.json +1 -0
  21. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14.pt +3 -0
  22. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14_cfg.json +1 -0
  23. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22.pt +3 -0
  24. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22_cfg.json +1 -0
  25. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30.pt +3 -0
  26. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30_cfg.json +1 -0
  27. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38.pt +3 -0
  28. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38_cfg.json +1 -0
  29. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46.pt +3 -0
  30. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46_cfg.json +1 -0
  31. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54.pt +3 -0
  32. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54_cfg.json +1 -0
  33. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6.pt +3 -0
  34. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62.pt +3 -0
  35. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62_cfg.json +1 -0
  36. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6_cfg.json +1 -0
  37. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70.pt +3 -0
  38. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70_cfg.json +1 -0
  39. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78.pt +3 -0
  40. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78_cfg.json +1 -0
  41. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11.pt +3 -0
  42. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11_cfg.json +1 -0
  43. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19.pt +3 -0
  44. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19_cfg.json +1 -0
  45. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27.pt +3 -0
  46. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27_cfg.json +1 -0
  47. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3.pt +3 -0
  48. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35.pt +3 -0
  49. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35_cfg.json +1 -0
  50. concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3_cfg.json +1 -0
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e97c72914e51b45d477e4d3d081d1fdd144d02384f8e31b03fba2fb7d72a40b
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c382c9450e29c3f14c6e9b640640103ec92e56839f6e17996638c9a57a190eb
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f465f20e848bb3715e9cc8624db956df5fef6c6fcc90b41ea5c5557ed93abb4e
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf89777c8e6616c4189c0f392b5c1393792ce582ebc0d8daee1c5a3b5bf12d33
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:032b1130fe9077742abe44f7903346057dc3af9a5954bf07cdf42e449a7f44ac
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d46a18cba74e90cae299762ed1431774a78f8e278427710a064ecf0dd6ab202
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3273fd7876079820290f4fdc3f5bb209dad7a518bb8ca24bfd398ae7576be599
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03ea8f38194fc7fc3c6fc34138aed35f307e4cbccaf7b168cea152d7848e4db0
3
+ size 67179616
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5733bad4d356667db55867a411d8b2d45d3fa996c632fb08b10fc7a4a997e482
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3e0bf49f971047ce76fbc2c7c3e103873da31fc8b037eccfa9875e2577b71c3
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7afacacac684a397a856fe55389a4dcee08470100e679d5a2e0874ae1e481585
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6523e76ebd27aeb7931533f7a624b66093b7aa3154498b42472adddfcd9c68fc
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88d8b2fd55028a37e567736fe502fd23d6acfc4908c3a63345c712b3480f090a
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5233bbfdfe40a9312d5d6133ad877eb0b0d183e47b2eb20c27bbfd8292afab4
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e47de6b3c846d938bfbf5bef41d8b9d97362e1b9ecb4bfad331c4dc66b31e87f
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f39ce5d0eb3c49ecdfc948062a28bf81cc88ca93aa09e110945d8c1b4a2ac04
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:846bfb4552ffd78868642f38a6909033a03c5528a7d33016808fa528c311c961
3
+ size 67179616
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91521b5decd333ab26ad70c593e4305d3bc74980a39669fb78257438787c4f79
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73bb85f80ff54fa2ee77d7a9ddf089f62bab1294e89bc092859cb259cef90004
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2604a5383ab7cfed42a0309a9f45fcf603e9a4953458551dbb1bbd1554150d
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4d74e31c0826fce61245aec45202d4a41411ac755c92247eeb51375b9baf793
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:197afa3bec7503155d55caeb9ceaef6a73d82290c07152c0366e582524c3f723
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e51babc82c8c8d3dd01ed0b659fd4ed4e92f07a09e9a00e0c57b508729a9680
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f806c03038bd695370b08bc1fa9e16256b32ea47681d92f603941a6a15fdf71
3
+ size 67179616
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:297df2b990f03aa3d4b7a12ca68d36d43fb68a8fbd4e9dd8920b1ef6c632771e
3
+ size 67179624
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}
concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"}