diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15.pt new file mode 100644 index 0000000000000000000000000000000000000000..51e5752a1f3c0053bff9a17f6af0179a5975d18e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e97c72914e51b45d477e4d3d081d1fdd144d02384f8e31b03fba2fb7d72a40b +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v15_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8b7995ff98f2238c498b8884917bc1c245dadd6 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c382c9450e29c3f14c6e9b640640103ec92e56839f6e17996638c9a57a190eb +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v23_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e1df2e4b9decdd2aceb2a4736a899c17fb413ee --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f465f20e848bb3715e9cc8624db956df5fef6c6fcc90b41ea5c5557ed93abb4e +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v31_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9a0dcb03256d8da154ced32b66a41e5ec83a935 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf89777c8e6616c4189c0f392b5c1393792ce582ebc0d8daee1c5a3b5bf12d33 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v39_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47.pt new file mode 100644 index 0000000000000000000000000000000000000000..e502015035c3d20d1d94a0b808fde5f6294ed5b3 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032b1130fe9077742abe44f7903346057dc3af9a5954bf07cdf42e449a7f44ac +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v47_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa25243ad298f6a4b8e3d924435680a2470ffd82 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d46a18cba74e90cae299762ed1431774a78f8e278427710a064ecf0dd6ab202 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v55_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63.pt new file mode 100644 index 0000000000000000000000000000000000000000..38efadc60ad60288646ced38a2417f41a84a4872 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3273fd7876079820290f4fdc3f5bb209dad7a518bb8ca24bfd398ae7576be599 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v63_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ebc653e1dd9ac20fdb5b054e9fb99c40fb8357f --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ea8f38194fc7fc3c6fc34138aed35f307e4cbccaf7b168cea152d7848e4db0 +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bbd4e3e954d121b21a4a97d8efa74c6df21d1c3 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5733bad4d356667db55867a411d8b2d45d3fa996c632fb08b10fc7a4a997e482 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v71_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79.pt new file mode 100644 index 0000000000000000000000000000000000000000..dabe292b7b65ed2eee303d6806684898c2490c7b --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e0bf49f971047ce76fbc2c7c3e103873da31fc8b037eccfa9875e2577b71c3 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v79_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..19e79d2b6eecba88e2049f897a3a91beeaadb634 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v7_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cea11056b9449f3a87086b55453a3532ce2965e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7afacacac684a397a856fe55389a4dcee08470100e679d5a2e0874ae1e481585 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v14_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ed8cba58929125c9e774faff6452dfb02efdc77 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6523e76ebd27aeb7931533f7a624b66093b7aa3154498b42472adddfcd9c68fc +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v22_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30.pt new file mode 100644 index 0000000000000000000000000000000000000000..f481878e059e1afc30617c8e3bb437fdd0cd3762 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d8b2fd55028a37e567736fe502fd23d6acfc4908c3a63345c712b3480f090a +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v30_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38.pt new file mode 100644 index 0000000000000000000000000000000000000000..53cfa072dd10adfd159690e03bb85f876dae7e7f --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5233bbfdfe40a9312d5d6133ad877eb0b0d183e47b2eb20c27bbfd8292afab4 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v38_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46.pt new file mode 100644 index 0000000000000000000000000000000000000000..47575ae013b25ade19ee07b93c1e6108ce8e29fd --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47de6b3c846d938bfbf5bef41d8b9d97362e1b9ecb4bfad331c4dc66b31e87f +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v46_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54.pt new file mode 100644 index 0000000000000000000000000000000000000000..93f987e7251d6817ef8eaa07f16c78d19505335e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f39ce5d0eb3c49ecdfc948062a28bf81cc88ca93aa09e110945d8c1b4a2ac04 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v54_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6.pt new file mode 100644 index 0000000000000000000000000000000000000000..9503fd7c65c6368a4d26ca8c75cba47f886d575f --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:846bfb4552ffd78868642f38a6909033a03c5528a7d33016808fa528c311c961 +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec6d45cc59feeda9b9a8d1117de0e16c14148255 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91521b5decd333ab26ad70c593e4305d3bc74980a39669fb78257438787c4f79 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v62_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v6_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed2c775101e8b6b2d14d121726080bbd6cb6d044 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73bb85f80ff54fa2ee77d7a9ddf089f62bab1294e89bc092859cb259cef90004 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v70_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7d545dd898ae843dfbec4f652a9f6c123d63d9d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2604a5383ab7cfed42a0309a9f45fcf603e9a4953458551dbb1bbd1554150d +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..bbbd3d41c8554ee48c02d1338354327606350a21 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v78_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6f8c6fb3d23f54ec92c146b20fb95ab69a7f2f0 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d74e31c0826fce61245aec45202d4a41411ac755c92247eeb51375b9baf793 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v11_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a24bd176a2e052956886b80709521a61b3400dc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197afa3bec7503155d55caeb9ceaef6a73d82290c07152c0366e582524c3f723 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v19_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27.pt new file mode 100644 index 0000000000000000000000000000000000000000..48c3f3aa350cd4769f7d336fd2f2307c3bbbb7c9 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e51babc82c8c8d3dd01ed0b659fd4ed4e92f07a09e9a00e0c57b508729a9680 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v27_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3.pt new file mode 100644 index 0000000000000000000000000000000000000000..c985e7a01cc9f6371393c30689c43bb6784da288 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f806c03038bd695370b08bc1fa9e16256b32ea47681d92f603941a6a15fdf71 +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35.pt new file mode 100644 index 0000000000000000000000000000000000000000..c54ddfa8798ef3208acfc2cc91f37fa5966cf010 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297df2b990f03aa3d4b7a12ca68d36d43fb68a8fbd4e9dd8920b1ef6c632771e +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v35_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v3_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v43.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v43.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a53a0c655b969327f344f41a92abaafe70f7b4f --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v43.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05338c0c88477141720258a98413b7439db04ac69f6da98c07153fb17d8862b +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v43_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v43_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v43_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v50.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v50.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee7c40274ee5f6be2c4cf070f972a89fed0f4707 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b54f11a7dc9852eb563a23f8a783b1f8ff0951eadbf36a15a12b86e558c082 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v50_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v50_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v50_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v59.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v59.pt new file mode 100644 index 0000000000000000000000000000000000000000..52b291aedc366a6cff3f62883b2eb7cbfb8a4301 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v59.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56f4bf52c777a7138c4805d21b0238cd85a73df31e191baf7d3c6f91238d4d6 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v59_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v59_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v59_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v66.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v66.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe90ba29aedefcb16bc35d7444e937a217bb404c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v66.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c8a5bd1357ef760f3a2b8a21050be444c1f48584a0b039e25cda9bb4eb4343 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v66_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v66_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v66_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v74.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v74.pt new file mode 100644 index 0000000000000000000000000000000000000000..98f371a4c6d09c4c29cfe9592465daf9d803cc9a --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v74.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9995a97e7400274c963c020a51cb9d0827d270063c396661807a060ded721cd0 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v74_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v74_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..82a9a8daa2d22f8e4b6bb796db6645b870811a4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v74_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v0.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v0.pt new file mode 100644 index 0000000000000000000000000000000000000000..61645c5a8b6342642d2b0585f7b250b59fdb3303 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c490b5d18752713810b5241c23ed4d638bdf0c4e22802f586454a7c222e88db +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v0_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v0_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v0_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v17.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v17.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c6340f14b04e2f1aba5716a62027ff069b5b449 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8d107ef524efd3da75e4f66b996685c31c6379ec0a98339f1f582cbe6cc0b2 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v17_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v17_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v17_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v25.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v25.pt new file mode 100644 index 0000000000000000000000000000000000000000..a925a5667f0868298fc118e3c70682d9355843e6 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5706c773cfcb785f363d3ba1ef9a5b1a9e7f99f08f555f9d32a6e0b277c67fdc +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v25_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v25_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v25_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v33.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v33.pt new file mode 100644 index 0000000000000000000000000000000000000000..07c42b6e19a7ec5b69dd5b279d1b3277be05bdbf --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v33.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032915f3d9cbbfe633b5f75250c50f01ea0343ff4a9d61c7658936cec886449b +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v33_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v33_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v33_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v41.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v41.pt new file mode 100644 index 0000000000000000000000000000000000000000..4595471a7f486b06a4505545e33aaff9b31a7fa8 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v41.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bce147f6e80b42b9e2dc30524cd9235779b69f74ae63b4e66d842e6dc30caa1 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v41_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v41_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v41_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v49.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v49.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1996cee9cd2583124ccba9f3f66cd9c456133f2 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v49.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a9cb5bac63fde6b346063dcd3615cbf47b95443073c8d59828f3e642970c742 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v49_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v49_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v49_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v57.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v57.pt new file mode 100644 index 0000000000000000000000000000000000000000..01e22d4c6acf56c3c9efe947fe2c17f3ab725eda --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v57.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a95b947aa80892dfa11004623b98d2ee633e84aa8eaa67b1ddd2c595f684c093 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v57_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v57_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v57_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v65.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v65.pt new file mode 100644 index 0000000000000000000000000000000000000000..b86639974c77c6a0b8b157228fb33f5fae6c3b4d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v65.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51bb71424a0fb1b7ab17d2df89798d389b8f508110f8eab7c22ab218b88bfc95 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v65_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v65_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v65_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v73.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v73.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3222a9fbe2c70851ad6aed6955fb24c0cfe8c35 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v73.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448f10a9553ed7aaf01c4e26a7bddeb043f66efab0ef9ac5a6bb88b44907eeba +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v73_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v73_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v73_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v8.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v8.pt new file mode 100644 index 0000000000000000000000000000000000000000..d74719d296fceab160712c9878292877d79d45f9 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22663444c9726b28e18cbdc4ed9fa1ca6b679ebd934eb3e17420477ec64db38 +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v8_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v8_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..7142e4b3b84976c30c15532847187911e04ce29e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-03_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v8_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v12.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v12.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bf7c2dc281c7de35f28f16b69019d156ae75551 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b77b278d2e09f6cbf86ae45795e9d1151e64bdf16afd039610d63bf29816d929 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v12_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v12_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v12_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v20.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v20.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f0f19204a3bf728edf94ff33b80d1450448a36c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb10e05cea5e4228b1a9f6c758c1a876b382ae362410b89a5a50acadc1ad97d +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v20_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v20_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v20_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v28.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v28.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2e792a3af13cd6e02cca57a1e4e5181395cd2ef --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f773e149de3038fc948d9dcab3dc36d93cc2fcd2a904c0a4b011dbb24ef6ff +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v28_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v28_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v28_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v36.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v36.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b210662701da513e4ea63d592e75eadcd042d7e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2a31d048936e0e4a91d28079f6902e9d18b021a9a141e89cdb7f53529cf09a4 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v36_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v36_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v36_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v4.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v4.pt new file mode 100644 index 0000000000000000000000000000000000000000..32174f3da1b55aa58c81f06969544d71160685a3 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f3be6ecd3bac1db6e85cc8c5249e1f623a7d97d5b6cd3e9e8018295bcd613b +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v44.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v44.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb5a33984df25be2944d7baf97bb0c02ec361936 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbfb45c9eec13f54721cd5e4bb484c8e2b441fc4935a51e9fd9ec8ee62ac18f8 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v44_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v44_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v44_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v4_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v4_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v4_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v52.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v52.pt new file mode 100644 index 0000000000000000000000000000000000000000..2489a6df5d6a90740e00a2d62258ace84ce54ba9 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v52.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c514d4860bb31fe3d03ffa777105eaa601216c1c8e62fba85bc25d9f5419a0 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v52_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v52_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v52_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v60.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v60.pt new file mode 100644 index 0000000000000000000000000000000000000000..c723ff6e68ebd3091aa56879afbc27e46bfb1fb6 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v60.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafd74a01a0502f802baa3d3e176d74bc695000821fb2ca54086ab0d4fb1c709 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v60_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v60_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v60_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v68.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v68.pt new file mode 100644 index 0000000000000000000000000000000000000000..317064b0ae9827b292fdafe503facc3f030044bb --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v68.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8934f89cd338241e5070d193dc46683db53773b7e416ecceb467dad5e870c3a2 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v68_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v68_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v68_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v76.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v76.pt new file mode 100644 index 0000000000000000000000000000000000000000..af8d47e923ab7761364e57b76f2665683469ea4a --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v76.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f2a32651245b3a7b22f25ad517707ddfb37dcbe4aeb91e52d93dcd53383ed4 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v76_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v76_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1c3df5185463be2c8fa99a20ef29c4c618313526 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l11.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v76_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 1.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v13.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v13.pt new file mode 100644 index 0000000000000000000000000000000000000000..79bf390ea7ac22a1c581fa94c884d5ea0c3baf20 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f97eb6415c0978602a7ab139e5b680b5536ee6080317981b7cfbbd4dd7fc0a89 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v13_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v13_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v13_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v21.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v21.pt new file mode 100644 index 0000000000000000000000000000000000000000..80f016892e26a5253bc2362a74ade81006400f76 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d461c4b6e44cea867a54685490df08dbcd68c0a10eb762e673ddc67f10fb4971 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v21_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v21_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v21_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v29.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v29.pt new file mode 100644 index 0000000000000000000000000000000000000000..0da1e643e723d97a89108b5555c7df4f52cf5a04 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v29.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba8c535687fc1f006609adbf45791a92dd563da27569f2a08e5dc2f2e2e96e51 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v29_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v29_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v29_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v37.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v37.pt new file mode 100644 index 0000000000000000000000000000000000000000..072cc4afe64a9d6632eb0add51576a129e1f6cf3 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v37.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101523cc28474679bb80c6c622478d9d3c7fb6557ed55ce8fec9c5723ea13e5e +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v37_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v37_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v37_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v45.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v45.pt new file mode 100644 index 0000000000000000000000000000000000000000..de9ab77bbf6ef1ff018da81e68f1936ae6a9643c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v45.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306169830b08cd12d46aa2c8421d0712a1cf69dc43e78a088103cbe25cc1942b +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v45_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v45_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v45_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v5.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v5.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d8a2fd9b7edea1a3361ead9983ab93fd8aaa49a --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567b5f4541ecfe2339c5964f1479ba640c335f1e223ddd06325d65628b083879 +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v53.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v53.pt new file mode 100644 index 0000000000000000000000000000000000000000..c26395c53c01745297e0f03eca5affd4147734ea --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v53.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d430d97012e06add27b1b2c8907799ffc7a5e1361128cb76830ea9833b374957 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v53_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v53_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v53_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v5_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v5_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v5_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v61.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v61.pt new file mode 100644 index 0000000000000000000000000000000000000000..5255c0c6eeda5a10888a0a41bf240040936a330c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v61.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b332d4c3d0469267e85384541bb4ed743a1c2b7e0e834a03093e33d1bd773ea +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v61_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v61_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v61_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v69.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v69.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bcb44c3c933f34c36d69b68644af8fdae1839d9 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v69.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13421c9ffd77d391c94da4d39e06557cc387c264f73e75ab5016a41c0af794d5 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v69_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v69_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v69_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v77.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v77.pt new file mode 100644 index 0000000000000000000000000000000000000000..539c958c18e9688a667f58a540178bf818ef4840 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v77.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cc0114714ec01c2e85014363d854cd1b31ba5d74719ce3210560b8ce742efc +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v77_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v77_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec0186a1eb0ce277832f07609065b3087588881 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l12.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v77_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 2.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v10.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v10.pt new file mode 100644 index 0000000000000000000000000000000000000000..4363884a00ee210f85645a9528494a7d06f0929a --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd331da49452e914b5cb6c8cc67f8ad5eed5d328ccfa8453dbf9666bf507edd3 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v10_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v10_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v10_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v18.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v18.pt new file mode 100644 index 0000000000000000000000000000000000000000..a71adb5da5c008e4dac13587a2bd4dda8f8c720f --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf74aecea5a8669ae80c0e58b7de0f87569074ede04dc700ee99881e17e6154 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v18_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v18_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v18_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v2.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v2.pt new file mode 100644 index 0000000000000000000000000000000000000000..748a09d341abca16b495c7ac61c6d834b696a354 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd6541effaa3a16dff5dfe155cbda9340dd4736ea9fd990d0460dd6bdd2a25b +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v26.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v26.pt new file mode 100644 index 0000000000000000000000000000000000000000..86c7679a1784ed059eee473b68c217a81885cc34 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a607eb1711b5747a747cc3eeab16b4460bdf74b35d59606fb4a28a48843adaed +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v26_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v26_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v26_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v2_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v2_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v2_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v34.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v34.pt new file mode 100644 index 0000000000000000000000000000000000000000..abef67c2902405e53c9527bc47619cec3cabe071 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36134bd0b41ca977214fbab9688987047ed10add5077cb3d6e378f1f6777f158 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v34_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v34_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v34_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v42.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v42.pt new file mode 100644 index 0000000000000000000000000000000000000000..811786332a5da375eb24b07311e4e08019e63c67 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc4db758d49a5646e2d9388dd2d55751096437e0967fa7f72f7c4c30fa0c6ff +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v42_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v42_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v42_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v51.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v51.pt new file mode 100644 index 0000000000000000000000000000000000000000..7436a093c3c80fd661ec8ea2a9a35398f31cf128 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v51.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2598a516d39645005ad574e425ea5ebbb6585f99020e8293f3d967a356691f04 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v51_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v51_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v51_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v58.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v58.pt new file mode 100644 index 0000000000000000000000000000000000000000..973f37a44c0b964256293fac78cef015eed2ec1f --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v58.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43dc2be3cff3f635de3f8801a1a61a426988b7c1aff80adfa92099e1f9ee1b18 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v58_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v58_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v58_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v67.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v67.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0ad1a60961fdb91d38b700715c9489a2a0e329e --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v67.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffc9c4839f813679475c928a2f41f36444680712241fae7dda36ab0cbb82200 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v67_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v67_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v67_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v75.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v75.pt new file mode 100644 index 0000000000000000000000000000000000000000..eac6c38d5d58d0b342661aa827a831fbc0dbc566 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v75.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c9f599e45dc7e5652ef5b7c74cf7ae72acceecc0c7cebd2d0b8ac028c31181a +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v75_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v75_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4efc5a5c1a8f6cf1aaa938137eef0cab5d6073fc --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l14.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v75_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 4.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v1.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v1.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f7e5a2a489cbcdb8745f45f37670598157ab1b5 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb06d007c309e33560e62a0cec98738c2e1ebb51c4fb9611f8ecadb8d34321c3 +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v16.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v16.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebbff4fb2d8930444676b683d970e669ceda1d0b --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc9a10828afd2e6aa00ec8822d5f7f134b5adaf3370082940a32880a204beb0 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v16_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v16_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v16_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v1_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v1_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v1_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v24.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v24.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e0f9a37c8cd86484926df76ef0b8cd46298d14c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f20c9a348d335308420fefd1da44c48e5d03ea237a99691a05f010afb90720 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v24_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v24_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v24_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v32.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v32.pt new file mode 100644 index 0000000000000000000000000000000000000000..423fecdf7b3be44aea25106e36bd8baf5722c850 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5777c5c46ad28e14ecfe15b38a18fa01e18f2f45bcb985804b1c83662e1071c5 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v32_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v32_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v32_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v40.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v40.pt new file mode 100644 index 0000000000000000000000000000000000000000..6186d92f253d90f89e56e9a550501a195d5a2604 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72544ee283e30d452e04b3c6053f14ab66786f0a3f68a73d60536198c6fc9878 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v40_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v40_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v40_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v48.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v48.pt new file mode 100644 index 0000000000000000000000000000000000000000..93ab3111a5a6265b3364e5b78a01ba1951fd471d --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d1943b88f53ee5d7581854627d6d22d46cb28f72174008da1f392f0e563c80 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v48_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v48_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v48_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v56.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v56.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4255c651732a8b12674f600c0b79065fcd56406 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v56.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2711b65e24f077e5be54c46b0ff8763c2fb3dd5688eb32d1bc2dfce9be0915a +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v56_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v56_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v56_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v64.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v64.pt new file mode 100644 index 0000000000000000000000000000000000000000..42047d2b2e6be388ee8aacbe6845762bb7b5fc67 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v64.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eec20cb90dbed26ad6b3562523ac0f4f4aa4a94c41e2cd6a5f1c693c0bfa6b29 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v64_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v64_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v64_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v72.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v72.pt new file mode 100644 index 0000000000000000000000000000000000000000..538de31d151ee78f3d7ada40f5cca4569232aceb --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v72.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778b1ddd909269053eb1c4e273169169c0dbeeaf19af1b21dde861daefd85314 +size 67179624 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v72_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v72_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v72_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v9.pt b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v9.pt new file mode 100644 index 0000000000000000000000000000000000000000..62841b2e58f2901b29c6af560e23fdfb6200fed0 --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2d0fa743c220fc252f3c150e917cff2847972b398dd1d2ac2c0293d6e4a0fb +size 67179616 diff --git a/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v9_cfg.json b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v9_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4b1ab7b61856b6fe36b61417229d3ec18d1e2c --- /dev/null +++ b/concat-z-gelu-21-l1-lr-sweep-3/gelu-2l_L1_Hcat_z_lr1.00e-04_l18.00e+00_ds16384_bs4096_dc1.00e-07_rie50000_nr4_v9_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 8.0, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "model_name": "gelu-2l", "site": "z", "layer": 1, "device": "cuda", "reinit": "reinit", "head": "cat", "concat_heads": true, "dead_direction_cutoff": 1e-07, "re_init_every": 50000, "anthropic_resample_last": 25000, "resample_factor": 0.01, "num_resamples": 4, "wandb_project_name": "concat-z-gelu-21-l1-lr-sweep-3", "wandb_entity": "ckkissane", "save_state_dict_every": 50000, "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.1.attn.hook_z", "act_size": 512, "dict_size": 16384, "name": "gelu-2l_1_16384_z"} \ No newline at end of file