YX-S-Z
commited on
Commit
·
ac7afda
1
Parent(s):
ec6702e
push
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 0.pt +3 -0
- 0_cfg.json +1 -0
- 1.pt +3 -0
- 10.pt +3 -0
- 10_cfg.json +1 -0
- 11.pt +3 -0
- 11_cfg.json +1 -0
- 12.pt +3 -0
- 12_cfg.json +1 -0
- 13.pt +3 -0
- 13_cfg.json +1 -0
- 14.pt +3 -0
- 14_cfg.json +1 -0
- 15.pt +3 -0
- 15_cfg.json +1 -0
- 16.pt +3 -0
- 16_cfg.json +1 -0
- 17.pt +3 -0
- 17_cfg.json +1 -0
- 18.pt +3 -0
- 18_cfg.json +1 -0
- 19.pt +3 -0
- 19_cfg.json +1 -0
- 1_cfg.json +1 -0
- 2.pt +3 -0
- 20.pt +3 -0
- 20_cfg.json +1 -0
- 21.pt +3 -0
- 21_cfg.json +1 -0
- 22.pt +3 -0
- 22_cfg.json +1 -0
- 23.pt +3 -0
- 23_cfg.json +1 -0
- 24.pt +3 -0
- 24_cfg.json +1 -0
- 25.pt +3 -0
- 25_cfg.json +1 -0
- 26.pt +3 -0
- 26_cfg.json +1 -0
- 27.pt +3 -0
- 27_cfg.json +1 -0
- 28.pt +3 -0
- 28_cfg.json +1 -0
- 29.pt +3 -0
- 29_cfg.json +1 -0
- 2_cfg.json +1 -0
- 3.pt +3 -0
- 30.pt +3 -0
- 30_cfg.json +1 -0
- 31.pt +3 -0
0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ea7a53d76d4e1d906a6199c50bfc04a11337c29b920e0e842750b2001c41662
|
3 |
+
size 2102587
|
0_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
1.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd478941ba7f7e271cec82f5de601531299363867808749392eb1bf1b88a7399
|
3 |
+
size 2102587
|
10.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd5ffa7d3e78d182583b2d343847c755c822b6b9f4c9165e9935dc65094070a3
|
3 |
+
size 2102593
|
10_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
11.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eff5686a9ea7f2099f865c93debfaad8763731c73e2aa2ae5d685bf47ccca6b
|
3 |
+
size 2102593
|
11_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
12.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92802a4c504c4eee03a7d1c1bf0025f1c5ff25892c44c85fa07fe184748e91de
|
3 |
+
size 2102593
|
12_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
13.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:194f4b321aafad58a04a24a2ba9db27d2c036d6ec317e6ef2db6f61e32e80310
|
3 |
+
size 2102593
|
13_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
14.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eac371d2573979709a717e702aa7af567fed2b8285493f28f7f2500e47c421ce
|
3 |
+
size 2102593
|
14_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
15.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12664d3b9b56bfcc7f175b623e2c83ae2466705f52772f8a571369d83c261836
|
3 |
+
size 2102593
|
15_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
16.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c86371bcb89f0178d5874484b09bb250818c8adab9f3a1e9cd8af2b90471b55f
|
3 |
+
size 2102593
|
16_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
17.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ab96c4592862ffd21a85b3f5a6b4446641f550ee78fb49ebd2e7b6dd019aa5d
|
3 |
+
size 2102593
|
17_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
18.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9665a38d9c692621c7bfc973b6119d05c76f05e165edae4059df386fcc44e401
|
3 |
+
size 2102593
|
18_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
19.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f7e8ce1c3db1a50d1a39500e024e8ca180ecbdceaf9552f44f5aac3a2cc12a5
|
3 |
+
size 2102593
|
19_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
1_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
2.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e85ac421763b164c0634a7fb696efa5faf31adc56699157a7ceee6f53ec4c852
|
3 |
+
size 2102587
|
20.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9abab491b76ad658df1c869213fc2733d9d2d4dc6b95efb747753a2277292f90
|
3 |
+
size 2102593
|
20_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
21.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:027b1b770b383dbac814ee3f63f3749c3a572d63b3feb7a5b2d4fb8dc842a879
|
3 |
+
size 2102593
|
21_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
22.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6aa7278e56bcb37281291aa865c57387c98f3909f7c74e5c6d7f245dbd59cc04
|
3 |
+
size 2102593
|
22_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
23.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4fce8ee30f0dd2038cb7772b49ae75718f917123cd727613748fd8552b540da
|
3 |
+
size 2102593
|
23_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
24.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dc894d1279e796213ab42457226d328446a9e4043de190fbfdb771dbe3264ff
|
3 |
+
size 2102593
|
24_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
25.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7131889ea47fe29f8e95728f7a66be3077672b5095ec306f2721b98d9958d083
|
3 |
+
size 2102593
|
25_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
26.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c06c0717cb65dd62b7c2d7509ebc8d428a67dcc2a0095878d422c5f3dccd2a19
|
3 |
+
size 2102593
|
26_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
27.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bc7532528a79d0cde2fa092092400ffa2f1b16f80e1574db051eddede40e993
|
3 |
+
size 2102593
|
27_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c4ea9ff96ec5104239d71e2587a27cafd3aa3a97f82c4adbaffe1d974133714
|
3 |
+
size 2102593
|
28_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
29.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac47f54b7c3e2d5616fdc723717660efe5a4fc0711a53d4b8909192f6f3e5afb
|
3 |
+
size 2102593
|
29_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
2_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
3.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a7981bf4d4721ae69d76458c1fc6759afe6f7afd12214f0b8b477eab3d04636
|
3 |
+
size 2102587
|
30.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ee24664f0f1de2380b12ce4e5b079838e9ec61863abfc9a648c2a7e304c95ee
|
3 |
+
size 2102593
|
30_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
|
31.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c95fa718c05fd386a42bee04b55d9add73e9ec10520e7fff59ec3f02787175b7
|
3 |
+
size 2102593
|