diff --git a/0.pt b/0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3967f75d7a67d6d7a3b97b97d13282872979df58 --- /dev/null +++ b/0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea7a53d76d4e1d906a6199c50bfc04a11337c29b920e0e842750b2001c41662 +size 2102587 diff --git a/0_cfg.json b/0_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/0_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/1.pt b/1.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bd0412d26e897dce5003303bb9622c34e9209b3 --- /dev/null +++ b/1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd478941ba7f7e271cec82f5de601531299363867808749392eb1bf1b88a7399 +size 2102587 diff --git a/10.pt b/10.pt new file mode 100644 index 0000000000000000000000000000000000000000..123321099094a351430ada418400329ef2a433e9 --- /dev/null +++ b/10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5ffa7d3e78d182583b2d343847c755c822b6b9f4c9165e9935dc65094070a3 +size 2102593 diff --git a/10_cfg.json b/10_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/10_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/11.pt b/11.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4cc1ea2ad99c435aac41b3ce977532dfc7844fa --- /dev/null +++ b/11.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eff5686a9ea7f2099f865c93debfaad8763731c73e2aa2ae5d685bf47ccca6b +size 2102593 diff --git a/11_cfg.json b/11_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/11_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/12.pt b/12.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfae8a0a95dcaa3da59a2807a283faebf8b11d4f --- /dev/null +++ b/12.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92802a4c504c4eee03a7d1c1bf0025f1c5ff25892c44c85fa07fe184748e91de +size 2102593 diff --git a/12_cfg.json b/12_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/12_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/13.pt b/13.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0470562c3bff89cced227bb4c2ba208a974fea0 --- /dev/null +++ b/13.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194f4b321aafad58a04a24a2ba9db27d2c036d6ec317e6ef2db6f61e32e80310 +size 2102593 diff --git a/13_cfg.json b/13_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/13_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/14.pt b/14.pt new file mode 100644 index 0000000000000000000000000000000000000000..a755b60a81fd98518be039b3c49f6366f9bb62c8 --- /dev/null +++ b/14.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac371d2573979709a717e702aa7af567fed2b8285493f28f7f2500e47c421ce +size 2102593 diff --git a/14_cfg.json b/14_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/14_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/15.pt b/15.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffe4dcf31600a4e0d5a01e12719df9b32936d190 --- /dev/null +++ b/15.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12664d3b9b56bfcc7f175b623e2c83ae2466705f52772f8a571369d83c261836 +size 2102593 diff --git a/15_cfg.json b/15_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/15_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/16.pt b/16.pt new file mode 100644 index 0000000000000000000000000000000000000000..e18c24ec3f46e702130932ceb29508ed858c5b53 --- /dev/null +++ b/16.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c86371bcb89f0178d5874484b09bb250818c8adab9f3a1e9cd8af2b90471b55f +size 2102593 diff --git a/16_cfg.json b/16_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/16_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/17.pt b/17.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff7dc1d3e127367275ba121a58826ef6e1866bca --- /dev/null +++ b/17.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab96c4592862ffd21a85b3f5a6b4446641f550ee78fb49ebd2e7b6dd019aa5d +size 2102593 diff --git a/17_cfg.json b/17_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/17_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/18.pt b/18.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9836ebeac565505de5047a79b481528725e610e --- /dev/null +++ b/18.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9665a38d9c692621c7bfc973b6119d05c76f05e165edae4059df386fcc44e401 +size 2102593 diff --git a/18_cfg.json b/18_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/18_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/19.pt b/19.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ea835215783101d79485590c181eb7b00c4b02e --- /dev/null +++ b/19.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7e8ce1c3db1a50d1a39500e024e8ca180ecbdceaf9552f44f5aac3a2cc12a5 +size 2102593 diff --git a/19_cfg.json b/19_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/19_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/1_cfg.json b/1_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/1_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/2.pt b/2.pt new file mode 100644 index 0000000000000000000000000000000000000000..065bda81a3874a96f1a97cf85981a0b7aa1fdbec --- /dev/null +++ b/2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e85ac421763b164c0634a7fb696efa5faf31adc56699157a7ceee6f53ec4c852 +size 2102587 diff --git a/20.pt b/20.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ae13424922271a578d99c5a00c746bcee7dd527 --- /dev/null +++ b/20.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9abab491b76ad658df1c869213fc2733d9d2d4dc6b95efb747753a2277292f90 +size 2102593 diff --git a/20_cfg.json b/20_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/20_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/21.pt b/21.pt new file mode 100644 index 0000000000000000000000000000000000000000..17f8dee9b5188e977fd7d947189477ec3c1624c0 --- /dev/null +++ b/21.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027b1b770b383dbac814ee3f63f3749c3a572d63b3feb7a5b2d4fb8dc842a879 +size 2102593 diff --git a/21_cfg.json b/21_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/21_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/22.pt b/22.pt new file mode 100644 index 0000000000000000000000000000000000000000..426e34e6391e16f6c423375b6041a4aae1aed70d --- /dev/null +++ b/22.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aa7278e56bcb37281291aa865c57387c98f3909f7c74e5c6d7f245dbd59cc04 +size 2102593 diff --git a/22_cfg.json b/22_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/22_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/23.pt b/23.pt new file mode 100644 index 0000000000000000000000000000000000000000..866941f3f0c8067833a295750d8b55c61a460c94 --- /dev/null +++ b/23.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4fce8ee30f0dd2038cb7772b49ae75718f917123cd727613748fd8552b540da +size 2102593 diff --git a/23_cfg.json b/23_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/23_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/24.pt b/24.pt new file mode 100644 index 0000000000000000000000000000000000000000..3176cde38c23cab46a3610cd1ab2d9f03cbc9318 --- /dev/null +++ b/24.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc894d1279e796213ab42457226d328446a9e4043de190fbfdb771dbe3264ff +size 2102593 diff --git a/24_cfg.json b/24_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/24_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/25.pt b/25.pt new file mode 100644 index 0000000000000000000000000000000000000000..96a3e1bcafaaeff0060f2ce7df53fc78c31cfdba --- /dev/null +++ b/25.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7131889ea47fe29f8e95728f7a66be3077672b5095ec306f2721b98d9958d083 +size 2102593 diff --git a/25_cfg.json b/25_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/25_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/26.pt b/26.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b6accad7111838c6e9ec53f52cd8a36f8b68723 --- /dev/null +++ b/26.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06c0717cb65dd62b7c2d7509ebc8d428a67dcc2a0095878d422c5f3dccd2a19 +size 2102593 diff --git a/26_cfg.json b/26_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/26_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/27.pt b/27.pt new file mode 100644 index 0000000000000000000000000000000000000000..2195d88e1be93dd30628e21abdc856f971176f76 --- /dev/null +++ b/27.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc7532528a79d0cde2fa092092400ffa2f1b16f80e1574db051eddede40e993 +size 2102593 diff --git a/27_cfg.json b/27_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/27_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/28.pt b/28.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8e4422feb5eace28fb5d523b286a602cf80e1b4 --- /dev/null +++ b/28.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4ea9ff96ec5104239d71e2587a27cafd3aa3a97f82c4adbaffe1d974133714 +size 2102593 diff --git a/28_cfg.json b/28_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/28_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/29.pt b/29.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa492f7a7467b66d0b5f5cfa068804fa5e974bd2 --- /dev/null +++ b/29.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac47f54b7c3e2d5616fdc723717660efe5a4fc0711a53d4b8909192f6f3e5afb +size 2102593 diff --git a/29_cfg.json b/29_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/29_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/2_cfg.json b/2_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/2_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/3.pt b/3.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3d7060a8520f1dd3c3691f81e71118abfda6a79 --- /dev/null +++ b/3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a7981bf4d4721ae69d76458c1fc6759afe6f7afd12214f0b8b477eab3d04636 +size 2102587 diff --git a/30.pt b/30.pt new file mode 100644 index 0000000000000000000000000000000000000000..6baed6aaf2e9117ef61ad62f68b0a3d2a8d44e3f --- /dev/null +++ b/30.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ee24664f0f1de2380b12ce4e5b079838e9ec61863abfc9a648c2a7e304c95ee +size 2102593 diff --git a/30_cfg.json b/30_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/30_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/31.pt b/31.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6538f7846aa43c2cbd80b81ce52665f0d31d1e9 --- /dev/null +++ b/31.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95fa718c05fd386a42bee04b55d9add73e9ec10520e7fff59ec3f02787175b7 +size 2102593 diff --git a/31_cfg.json b/31_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/31_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/32.pt b/32.pt new file mode 100644 index 0000000000000000000000000000000000000000..29faf9711d67842e6957d756767adb91c57b1a69 --- /dev/null +++ b/32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ee1ee6bc3d952e6062aeccbf6649420ffc4c1075d559a9d062d6fccd61fb4e +size 2102593 diff --git a/32_cfg.json b/32_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/32_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/33.pt b/33.pt new file mode 100644 index 0000000000000000000000000000000000000000..43099ee584912bce707ff16831822c91ee111100 --- /dev/null +++ b/33.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d4d28bec573d2487f1aeddf73218b6ddb169e65d549ff76519a73ea6ea701e +size 2102593 diff --git a/33_cfg.json b/33_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/33_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/34.pt b/34.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dc2e7bd63a9be961672455ab26538ca02edeac3 --- /dev/null +++ b/34.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c824e18f8610403b1c61276419597b337784a3c100c4171c3673f3dfa46b57 +size 2102593 diff --git a/34_cfg.json b/34_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/34_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/35.pt b/35.pt new file mode 100644 index 0000000000000000000000000000000000000000..5902ae7ef85bfc4227f92c496300526fddde41b5 --- /dev/null +++ b/35.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09f0f4069fb0a4f835e4ca9ae73359e5951168bc213669a3bf40583d86d8418 +size 2102593 diff --git a/35_cfg.json b/35_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/35_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/36.pt b/36.pt new file mode 100644 index 0000000000000000000000000000000000000000..40c6c9efe12ae3ee250bba3768abde788817591a --- /dev/null +++ b/36.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a284600c419c4f893d100b5d26c3535b8e47c1794b3f79763b76cecea61a21eb +size 2102593 diff --git a/36_cfg.json b/36_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/36_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/37.pt b/37.pt new file mode 100644 index 0000000000000000000000000000000000000000..e66a1e8de73c2d800e94a7c6f87e475632186fa1 --- /dev/null +++ b/37.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfe9dcc9021cb7bb6598208aa535b7ecd9a6a18eb848f33e75a0abec84e124b6 +size 2102593 diff --git a/37_cfg.json b/37_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/37_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/38.pt b/38.pt new file mode 100644 index 0000000000000000000000000000000000000000..05610c6730720b7a70dd603b68c63951c6760b11 --- /dev/null +++ b/38.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff092fb75e29f0167ae16f711aa4c3c3f6341f8594454f7235c7dac2ad6b9a31 +size 2102593 diff --git a/38_cfg.json b/38_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/38_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/39.pt b/39.pt new file mode 100644 index 0000000000000000000000000000000000000000..12410af7d428aea09a7bc3371ec2642df8aecb4d --- /dev/null +++ b/39.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89eb984a11f8bf3d2895f9d2cccf53fe445bde801b083acd3e4c0c4eb9e2076 +size 2102593 diff --git a/39_cfg.json b/39_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/39_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/3_cfg.json b/3_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/3_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/4.pt b/4.pt new file mode 100644 index 0000000000000000000000000000000000000000..ece51e8e57b7c46b75a1f08c2570944dd0b32b84 --- /dev/null +++ b/4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c6ea103207e75377c4d8f5981616401cb1609dcbf61b4cb79bd1cd169c691f +size 2102587 diff --git a/40.pt b/40.pt new file mode 100644 index 0000000000000000000000000000000000000000..62bef6da77e0cc3d914345e85f3768870e10971c --- /dev/null +++ b/40.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45b6aa3f068be2a13c642bb35e5beb0e4f55853445eaa72189ae198f5d12d721 +size 2102593 diff --git a/40_cfg.json b/40_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/40_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/41.pt b/41.pt new file mode 100644 index 0000000000000000000000000000000000000000..42addd228d105254078b6a643642c6bb111d83d6 --- /dev/null +++ b/41.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0877c19d282392bade514f6f7509146999b11b49f52fab7772c888bcfc6d43a0 +size 2102593 diff --git a/41_cfg.json b/41_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/41_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/42.pt b/42.pt new file mode 100644 index 0000000000000000000000000000000000000000..df962f19b559b309b31549c107dd1497f5a4f9ab --- /dev/null +++ b/42.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd1343d900beb70735b1d0a9cfa00bd471159ce2a77ca91670ef693081030e82 +size 2102593 diff --git a/42_cfg.json b/42_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/42_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/43.pt b/43.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e87b6f519b407776d8c5a3b51e6d770ff669666 --- /dev/null +++ b/43.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb26e05f6b2c0baac7897f6522b45e5a6745f7466ab3173c7e323dfd887db00 +size 2102593 diff --git a/43_cfg.json b/43_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/43_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/44.pt b/44.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e4163a4f205aee4c1c17500b917933d31ff1186 --- /dev/null +++ b/44.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9e332beccccdd32a94d7cd884b070d0135c45d8cd8a7311af8b7e136ca0546 +size 2102593 diff --git a/44_cfg.json b/44_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/44_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/45.pt b/45.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0d7e7f645d82bcf9d4060c59507f00d13af4c71 --- /dev/null +++ b/45.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858839c4aae25637186b6c5efe03731fe2d4c79d27fdf8df687f530cb6cbc44b +size 2102593 diff --git a/45_cfg.json b/45_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/45_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/46.pt b/46.pt new file mode 100644 index 0000000000000000000000000000000000000000..06a44d63c88bdbf1857f883aed31d8a9d747ff36 --- /dev/null +++ b/46.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad732964f0d9af0a12acebe32f80b9c24514e7ea2c4b3a02eb80699eb3b176ec +size 2102593 diff --git a/46_cfg.json b/46_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/46_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/47.pt b/47.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfcfb695392ad420b80c448167a59502b487e70c --- /dev/null +++ b/47.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f24f700bcbccf4e4dd7e83d8306e476e8866ca3bf97077d11b5505a8217a60 +size 2102593 diff --git a/47_cfg.json b/47_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/47_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/48.pt b/48.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e5469b4896392ffc30c4bbabd1b9fbd56a97870 --- /dev/null +++ b/48.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:264602fb8195daee384e1a070d21ae6639ed3879268e74de78bce13f40945177 +size 2102593 diff --git a/48_cfg.json b/48_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/48_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/49.pt b/49.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1ac234304b21949b9690bba8fd359788314d361 --- /dev/null +++ b/49.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e653c902b374ea5e03470afab839662293c6140c16df0d6a407f80694fb3874 +size 2102593 diff --git a/49_cfg.json b/49_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/49_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/4_cfg.json b/4_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/4_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/5.pt b/5.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e0dc77ccb71dfd071c983678638864f0b2b24d8 --- /dev/null +++ b/5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001c8b83ed62c3ae18fc5c59b51efb6e2a7526c1e657d0a01de2ff997a023c60 +size 2102587 diff --git a/50.pt b/50.pt new file mode 100644 index 0000000000000000000000000000000000000000..e35b3b0bf3175506a454cd8295313390c13d8879 --- /dev/null +++ b/50.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297c7922ffb8779eafacb95d87964b074e45975f026c5c68479fd7647969a08a +size 2102593 diff --git a/50_cfg.json b/50_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/50_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/51.pt b/51.pt new file mode 100644 index 0000000000000000000000000000000000000000..591067c4d129887eb002c841a80cf5c167e5ab19 --- /dev/null +++ b/51.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dbd6cc9212aa89fa47897c14053767b954ebefa76112f4315a92da75ab4bae8 +size 2102593 diff --git a/51_cfg.json b/51_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/51_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/52.pt b/52.pt new file mode 100644 index 0000000000000000000000000000000000000000..18a81fd7dc001b26bd173a3c6c4763f2ccc043d0 --- /dev/null +++ b/52.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48d9ba2316a50e30983559af1d6bf8f0538e6b2c9542cd08b675a0bb7cca854 +size 2102593 diff --git a/52_cfg.json b/52_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/52_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/53.pt b/53.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b0ca3f597801ba069dd2c0ee807aa8d79eaf258 --- /dev/null +++ b/53.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:658c78578f856b57228afba095485b6ed5ce8ee727218ac0c95eb41d8a016f36 +size 2102593 diff --git a/53_cfg.json b/53_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/53_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/54.pt b/54.pt new file mode 100644 index 0000000000000000000000000000000000000000..a896a5dc11e1bbc917f8e8eb815ce9e175c5d9f6 --- /dev/null +++ b/54.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93cc840a86c31619a29425d99609c722cf4ebc5e0aa463d8e7c27cfeed5c5d2e +size 2102593 diff --git a/54_cfg.json b/54_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/54_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/55.pt b/55.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ae6213a3b9608c198217272a9a93a567df5337b --- /dev/null +++ b/55.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b84621424a408674b2c3efd2d13e02beab21acffbea3f87d065dfbbfe86c17 +size 2102593 diff --git a/55_cfg.json b/55_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/55_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/56.pt b/56.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e855592d350b78c253e1c5161167acb92058db1 --- /dev/null +++ b/56.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d2c1b05500e2d9c8affb3d2d5628ddda31e332681d6baf9bb2bfb29b9aa430f +size 2102593 diff --git a/56_cfg.json b/56_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/56_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/57.pt b/57.pt new file mode 100644 index 0000000000000000000000000000000000000000..198e7150fbf16947f3888cc7f7f8a8499d93d65f --- /dev/null +++ b/57.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:043e1bb513bf50b8f60432d10e0db89edffd7e3c5452107813f26b6d55f3db41 +size 2102593 diff --git a/57_cfg.json b/57_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/57_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/58.pt b/58.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8ebf8283d8fcb83cb8240b35c7219fe122ad614 --- /dev/null +++ b/58.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ec7f56c489b553159edb9276654f273e73667ee438dec967d4b471c229f72b8 +size 2102593 diff --git a/58_cfg.json b/58_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/58_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/59.pt b/59.pt new file mode 100644 index 0000000000000000000000000000000000000000..18304025955224cecdb0f9b7fdaf7e246fc0a07b --- /dev/null +++ b/59.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3da6a3f46c7739998ac4fbd2142a7cdcf9253ae3ffd843296194316380a4e87f +size 2102593 diff --git a/59_cfg.json b/59_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/59_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/5_cfg.json b/5_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/5_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/6.pt b/6.pt new file mode 100644 index 0000000000000000000000000000000000000000..94884fa2c8186be5d91a06e5f072e89a00973673 --- /dev/null +++ b/6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12aaa3a217c38486abf5246bc615a0ec63b40f6133588be2a279b0474ff1a726 +size 2102587 diff --git a/60.pt b/60.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fd79a3354e08c6d1d6c47ebbb52c791782c6487 --- /dev/null +++ b/60.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9be83ea5eeb6e2efda14605bbba80d8e3c4dba4d9a9202c603119dc8a9bceef +size 2102593 diff --git a/60_cfg.json b/60_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/60_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/61.pt b/61.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3584b8e6cd29af7fe03f3882310f165b45ac978 --- /dev/null +++ b/61.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a212bbcc8b278ada8766ae7cb1a640446073adde0435dd615067af9e239831b +size 2102593 diff --git a/61_cfg.json b/61_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/61_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/62.pt b/62.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8e3d7666725bcba38639578bf68c9c4cc9f20ca --- /dev/null +++ b/62.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8adb34d1e84e2c63d4237bddd9aeb97eaa0c77fa149b5446f50bb443bb653b43 +size 2102593 diff --git a/62_cfg.json b/62_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/62_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/63.pt b/63.pt new file mode 100644 index 0000000000000000000000000000000000000000..0564e9f4403a55f7f7fdf0d556c9634ea44c737e --- /dev/null +++ b/63.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d566962f4730f4bb18769f5fc1a0c1a2cac7b30bef79ec2c84c2299eef939e9 +size 2102593 diff --git a/63_cfg.json b/63_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/63_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/64.pt b/64.pt new file mode 100644 index 0000000000000000000000000000000000000000..124f6566e626643190fb9958b306d0f7227ce830 --- /dev/null +++ b/64.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0033ea7ac9443d2e4527afe03ff969e766f5105fa5e889f4c0044ac15f68d9b +size 2102593 diff --git a/64_cfg.json b/64_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/64_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/65.pt b/65.pt new file mode 100644 index 0000000000000000000000000000000000000000..d415e42107c88fd04fbba904960ef9bef3a87bc3 --- /dev/null +++ b/65.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bdd30cd146bbd2d7db3a008b21289ea51f7955ebd61cc6554f9a73747f7fbd8 +size 2102593 diff --git a/65_cfg.json b/65_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/65_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/66.pt b/66.pt new file mode 100644 index 0000000000000000000000000000000000000000..795036dbf2a9e04835e5a9d3dbe60934bf443f06 --- /dev/null +++ b/66.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ceea20a7bf17687bb29565ef619262ea07f3c8aff2839690473c481eced4626 +size 2102593 diff --git a/66_cfg.json b/66_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/66_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/67.pt b/67.pt new file mode 100644 index 0000000000000000000000000000000000000000..00ae45f612aa476fcb53b11bb18b3089627b81e8 --- /dev/null +++ b/67.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6daf0b3a2a8b6875933fbab2e6e9fcf6c5f9be8ef842796f9b5e89e84fe601c +size 2102593 diff --git a/67_cfg.json b/67_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/67_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/68.pt b/68.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb981649313b173cc8ba27e47e2a3fa7a459ae55 --- /dev/null +++ b/68.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136ba454035741ed94fdbd208049ecc4a1827cd67a45cdd53425ae4141ea03a4 +size 2102593 diff --git a/68_cfg.json b/68_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/68_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/69.pt b/69.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f127ae4fda9b1a5f9bc3dfb819ed0418293a3c7 --- /dev/null +++ b/69.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952edf9a48611d66fe8a36bdb30fdb415122ba18647b0e7a9b7463ebacddb2f3 +size 2102593 diff --git a/69_cfg.json b/69_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/69_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/6_cfg.json b/6_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/6_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/7.pt b/7.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d4f5522e084259966f28ce7b439d59125ec68e0 --- /dev/null +++ b/7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6fb82a60d493028439f7f631c6dc3e456f8310e2d8936085f71071e1771579 +size 2102587 diff --git a/70.pt b/70.pt new file mode 100644 index 0000000000000000000000000000000000000000..eef520b9fb071f03b8488cb887926f6409c2f442 --- /dev/null +++ b/70.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5900b35ac4ff2872b3ca3b7d8cf9917a7fe64c9acefa1f1f62bbe28e20311bc1 +size 2102593 diff --git a/70_cfg.json b/70_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/70_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/71.pt b/71.pt new file mode 100644 index 0000000000000000000000000000000000000000..404dadfa3c8ea16250969b665728696970640446 --- /dev/null +++ b/71.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d9af71e0745189007e0c1357b17e123b1cab7523fcb41f8c482247271c38afb +size 2102593 diff --git a/71_cfg.json b/71_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/71_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/72.pt b/72.pt new file mode 100644 index 0000000000000000000000000000000000000000..7055899152d000020c52f2f84ef49d7318c26be3 --- /dev/null +++ b/72.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:103b7716342c0b65856a48a3f2d1378741859ea05265e7f80b784339f42a40e3 +size 2102593 diff --git a/72_cfg.json b/72_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/72_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/73.pt b/73.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f91660cccf331d8abe15b96034b9fa9f2943e4f --- /dev/null +++ b/73.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66f577a45360768d914c5869d6aba519ec93b2c220ca316109795ea97975ff5 +size 2102593 diff --git a/73_cfg.json b/73_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/73_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/74.pt b/74.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb29898b57ee35b5269340224052e6605de4deb1 --- /dev/null +++ b/74.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2586a4f8c5b12d9a5fda9e5777b3ca98924f3479b3644510520795a886bf34e +size 2102593 diff --git a/74_cfg.json b/74_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/74_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/75.pt b/75.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1145ea00946f53e1d8085366f2e101981455788 --- /dev/null +++ b/75.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f8284036736642d4aaa547516ad6ce981279f69da563b6c46fd3106d5302fbe +size 2102593 diff --git a/75_cfg.json b/75_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/75_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/76.pt b/76.pt new file mode 100644 index 0000000000000000000000000000000000000000..60df451404d612648b2e9101a181feaabd5c8536 --- /dev/null +++ b/76.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fbe8590d290e006427c4f006b68161674b413a8fdc14a8be2e450c73711d6b +size 2102593 diff --git a/76_cfg.json b/76_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/76_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/77.pt b/77.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a6e61356e04e4fee698f45904f62094fb9d723c --- /dev/null +++ b/77.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcdde57e672c4f53dae22508d00a8daff22afee624961dd593f6839ce3b0dc8a +size 2102593 diff --git a/77_cfg.json b/77_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/77_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/78.pt b/78.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9d9b6640888fba6d41c4801d89d07975df479b4 --- /dev/null +++ b/78.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd5eaaf3d43d5edc4be282647c22ab613ff25f195ce7f02de26e57a5056e357 +size 2102593 diff --git a/78_cfg.json b/78_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/78_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/79.pt b/79.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbe4a31d6a1a7cf7969eea33fd526358a201664a --- /dev/null +++ b/79.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9101542034759d513a4f485b495fb2fda008f1ece5c84e6b95bb4452355a32ad +size 2102593 diff --git a/79_cfg.json b/79_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/79_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/7_cfg.json b/7_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/7_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/8.pt b/8.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a496ec0da8fb7a4b8ba21736b7a364037f37432 --- /dev/null +++ b/8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:000df1f4877240e5695719e935deaf9af25f64c196d212c82e3332b0640cc50a +size 2102587 diff --git a/80.pt b/80.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fb0797d0eb0e6d3d7474551cf70e62d383d9d03 --- /dev/null +++ b/80.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce8164700664d9adba83ad5f0368702dcebe54878a97b8c800d7a9a08bef08d2 +size 2102593 diff --git a/80_cfg.json b/80_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/80_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/81.pt b/81.pt new file mode 100644 index 0000000000000000000000000000000000000000..85779f42d72b6f68698f42ea1707a0c295f7bc80 --- /dev/null +++ b/81.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dee961665f41dd85bbd700b08171df66eae2220841fe57b6da29afb932ba620 +size 2102593 diff --git a/81_cfg.json b/81_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/81_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/82.pt b/82.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4b351de06c25697f972f87505d91d3917b56f1c --- /dev/null +++ b/82.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ab7fdd8aaa9548092cc7a5173ffc5e6db48603e373d2329eb22f60676cd52a +size 2102593 diff --git a/82_cfg.json b/82_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/82_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/83.pt b/83.pt new file mode 100644 index 0000000000000000000000000000000000000000..5146e498f31f45499423e9dd84fcf75a101ea32f --- /dev/null +++ b/83.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3da3f4467372c3fe6d50beeba8336528a5de50c57d25aa16807cb8f22b557c47 +size 2102593 diff --git a/83_cfg.json b/83_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/83_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/84.pt b/84.pt new file mode 100644 index 0000000000000000000000000000000000000000..6db6e77bf1c5bcd62e2d12144fb156624bbc2e5d --- /dev/null +++ b/84.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2800e6a4b53ea0714cc470bed1336ae6aac8dac2b509b9c81592f2a992b7a1 +size 2102593 diff --git a/84_cfg.json b/84_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/84_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/85.pt b/85.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0cf8b64ffabeb0aea0f8f72759a7aac7a33b14e --- /dev/null +++ b/85.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899f975021fea4a2bf71f7f8691ce35a0a8729b8d4b4e1b04fe1dc158322410b +size 2102593 diff --git a/85_cfg.json b/85_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/85_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/86.pt b/86.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee00d8a9d205a3830b986a2247c30a3571ee376b --- /dev/null +++ b/86.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d382626e222c32b472172e1ba503ed4ae4991f79ede9228525bbcb16ae6859d +size 2102593 diff --git a/86_cfg.json b/86_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/86_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/87.pt b/87.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b9fc014efc2fd730132611818714d1936239c3b --- /dev/null +++ b/87.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae3b9a55fa71b90042a2f286f1f23f461fb6ac19f018e1d69d1ef8204c2e674 +size 2102593 diff --git a/87_cfg.json b/87_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/87_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/88.pt b/88.pt new file mode 100644 index 0000000000000000000000000000000000000000..47ab2e06889f61696239d6f114537eb86442b249 --- /dev/null +++ b/88.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d53c77e70174fbd92a6df80466c90ea8e1e03ae5ee65102f22d34c60911023 +size 2102593 diff --git a/88_cfg.json b/88_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/88_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/89.pt b/89.pt new file mode 100644 index 0000000000000000000000000000000000000000..f70f0e888875040a29686092070fdeb1e68beea8 --- /dev/null +++ b/89.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41869fa472bce5a177c2348284c84c6e2fa63ad814a8f581dc4a08346cd90f09 +size 2102593 diff --git a/89_cfg.json b/89_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/89_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/8_cfg.json b/8_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/8_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/9.pt b/9.pt new file mode 100644 index 0000000000000000000000000000000000000000..8392255438eddebd61ea254bc7b78937d725c2e0 --- /dev/null +++ b/9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7285fffbeef01a38eaca5a355500d4f25aaa4f929a32d05ce1ef1a0f49d148c9 +size 2102587 diff --git a/90.pt b/90.pt new file mode 100644 index 0000000000000000000000000000000000000000..52c91465abcc80665c941e652cdf03971d3c63c4 --- /dev/null +++ b/90.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17bc3bf125f01752b341fe7b25fdbc91a1bf1e296fab874c969af659f58ee588 +size 2102593 diff --git a/90_cfg.json b/90_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/90_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/91.pt b/91.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbce54f99fd54fc5802384b27115a0665e08f4ab --- /dev/null +++ b/91.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49f20e137a80eeac353e9c993d3f70a338677d9a7d282c42241f43a881c39b4 +size 2102593 diff --git a/91_cfg.json b/91_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/91_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/92.pt b/92.pt new file mode 100644 index 0000000000000000000000000000000000000000..041714efefcbbdd8852b52cc486a3424bbe2cc26 --- /dev/null +++ b/92.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4624df161b825b6ecca6dbd5811a49963b2e804e0a126c51a20ef8b9e493c732 +size 2102593 diff --git a/92_cfg.json b/92_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/92_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/93.pt b/93.pt new file mode 100644 index 0000000000000000000000000000000000000000..98ab115b7a8d3bb05e6ed26b74c9250dfe6cea8c --- /dev/null +++ b/93.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1078133bcbb13557f72ec13a06539dd3610b1390962c0b1a9f86b774818e2ea3 +size 2102593 diff --git a/93_cfg.json b/93_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/93_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/94.pt b/94.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d0190466f721beadd1415b47c57da99a8ae1863 --- /dev/null +++ b/94.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2653d78d9811e6aa3ad970eb39acd5e9c498b30b6b4929fbbdb5ebef75a5279 +size 2102593 diff --git a/94_cfg.json b/94_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/94_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/95.pt b/95.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e3e2397e632ae1c6fa44647cb9fe2a57e0d5cc8 --- /dev/null +++ b/95.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d28b070ef1c2ad42d234b0a8e13a44cdc90a28256bda611cca8bdfed3701745 +size 2102593 diff --git a/95_cfg.json b/95_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/95_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/96.pt b/96.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab2a5f1bf3cefbf3e73793497558cace42924a19 --- /dev/null +++ b/96.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7b6a23b18fdc2f775cbb9e32f40c2a1407fa6e3dc66f5a0bddfa77aa65cf1f +size 2102593 diff --git a/96_cfg.json b/96_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/96_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/97.pt b/97.pt new file mode 100644 index 0000000000000000000000000000000000000000..867d5a7f31c794f2a278fe11fb5240cadb1fab15 --- /dev/null +++ b/97.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6324da27c19ff391faf3cc235508bd75ffc8b6de390abbcffef919600b13cb +size 2102593 diff --git a/97_cfg.json b/97_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/97_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/98.pt b/98.pt new file mode 100644 index 0000000000000000000000000000000000000000..7297c72c0ef57bd1d53598bc4770f966f81e3dfa --- /dev/null +++ b/98.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c59eeb3242559b093698d6e35d3ae897a7aaf0d0e19ffa797f00f1e2cc875623 +size 2102593 diff --git a/98_cfg.json b/98_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..371490d7b80a272842d4bf4caa050f69bd5b2927 --- /dev/null +++ b/98_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file diff --git a/9_cfg.json b/9_cfg.json new file mode 100644 index 0000000000000000000000000000000000000000..77a66c994d353e2baa3d30a4fbf9d91b739f0373 --- /dev/null +++ b/9_cfg.json @@ -0,0 +1 @@ +{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"} \ No newline at end of file