YX-S-Z commited on
Commit
ac7afda
·
1 Parent(s): ec6702e
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. 0.pt +3 -0
  2. 0_cfg.json +1 -0
  3. 1.pt +3 -0
  4. 10.pt +3 -0
  5. 10_cfg.json +1 -0
  6. 11.pt +3 -0
  7. 11_cfg.json +1 -0
  8. 12.pt +3 -0
  9. 12_cfg.json +1 -0
  10. 13.pt +3 -0
  11. 13_cfg.json +1 -0
  12. 14.pt +3 -0
  13. 14_cfg.json +1 -0
  14. 15.pt +3 -0
  15. 15_cfg.json +1 -0
  16. 16.pt +3 -0
  17. 16_cfg.json +1 -0
  18. 17.pt +3 -0
  19. 17_cfg.json +1 -0
  20. 18.pt +3 -0
  21. 18_cfg.json +1 -0
  22. 19.pt +3 -0
  23. 19_cfg.json +1 -0
  24. 1_cfg.json +1 -0
  25. 2.pt +3 -0
  26. 20.pt +3 -0
  27. 20_cfg.json +1 -0
  28. 21.pt +3 -0
  29. 21_cfg.json +1 -0
  30. 22.pt +3 -0
  31. 22_cfg.json +1 -0
  32. 23.pt +3 -0
  33. 23_cfg.json +1 -0
  34. 24.pt +3 -0
  35. 24_cfg.json +1 -0
  36. 25.pt +3 -0
  37. 25_cfg.json +1 -0
  38. 26.pt +3 -0
  39. 26_cfg.json +1 -0
  40. 27.pt +3 -0
  41. 27_cfg.json +1 -0
  42. 28.pt +3 -0
  43. 28_cfg.json +1 -0
  44. 29.pt +3 -0
  45. 29_cfg.json +1 -0
  46. 2_cfg.json +1 -0
  47. 3.pt +3 -0
  48. 30.pt +3 -0
  49. 30_cfg.json +1 -0
  50. 31.pt +3 -0
0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ea7a53d76d4e1d906a6199c50bfc04a11337c29b920e0e842750b2001c41662
3
+ size 2102587
0_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd478941ba7f7e271cec82f5de601531299363867808749392eb1bf1b88a7399
3
+ size 2102587
10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd5ffa7d3e78d182583b2d343847c755c822b6b9f4c9165e9935dc65094070a3
3
+ size 2102593
10_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
11.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eff5686a9ea7f2099f865c93debfaad8763731c73e2aa2ae5d685bf47ccca6b
3
+ size 2102593
11_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
12.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92802a4c504c4eee03a7d1c1bf0025f1c5ff25892c44c85fa07fe184748e91de
3
+ size 2102593
12_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
13.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:194f4b321aafad58a04a24a2ba9db27d2c036d6ec317e6ef2db6f61e32e80310
3
+ size 2102593
13_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
14.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac371d2573979709a717e702aa7af567fed2b8285493f28f7f2500e47c421ce
3
+ size 2102593
14_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
15.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12664d3b9b56bfcc7f175b623e2c83ae2466705f52772f8a571369d83c261836
3
+ size 2102593
15_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
16.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86371bcb89f0178d5874484b09bb250818c8adab9f3a1e9cd8af2b90471b55f
3
+ size 2102593
16_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
17.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab96c4592862ffd21a85b3f5a6b4446641f550ee78fb49ebd2e7b6dd019aa5d
3
+ size 2102593
17_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
18.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9665a38d9c692621c7bfc973b6119d05c76f05e165edae4059df386fcc44e401
3
+ size 2102593
18_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
19.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f7e8ce1c3db1a50d1a39500e024e8ca180ecbdceaf9552f44f5aac3a2cc12a5
3
+ size 2102593
19_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
1_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e85ac421763b164c0634a7fb696efa5faf31adc56699157a7ceee6f53ec4c852
3
+ size 2102587
20.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9abab491b76ad658df1c869213fc2733d9d2d4dc6b95efb747753a2277292f90
3
+ size 2102593
20_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
21.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027b1b770b383dbac814ee3f63f3749c3a572d63b3feb7a5b2d4fb8dc842a879
3
+ size 2102593
21_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
22.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aa7278e56bcb37281291aa865c57387c98f3909f7c74e5c6d7f245dbd59cc04
3
+ size 2102593
22_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
23.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4fce8ee30f0dd2038cb7772b49ae75718f917123cd727613748fd8552b540da
3
+ size 2102593
23_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
24.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc894d1279e796213ab42457226d328446a9e4043de190fbfdb771dbe3264ff
3
+ size 2102593
24_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
25.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7131889ea47fe29f8e95728f7a66be3077672b5095ec306f2721b98d9958d083
3
+ size 2102593
25_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
26.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06c0717cb65dd62b7c2d7509ebc8d428a67dcc2a0095878d422c5f3dccd2a19
3
+ size 2102593
26_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
27.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bc7532528a79d0cde2fa092092400ffa2f1b16f80e1574db051eddede40e993
3
+ size 2102593
27_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
28.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c4ea9ff96ec5104239d71e2587a27cafd3aa3a97f82c4adbaffe1d974133714
3
+ size 2102593
28_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
29.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac47f54b7c3e2d5616fdc723717660efe5a4fc0711a53d4b8909192f6f3e5afb
3
+ size 2102593
29_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
2_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a7981bf4d4721ae69d76458c1fc6759afe6f7afd12214f0b8b477eab3d04636
3
+ size 2102587
30.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ee24664f0f1de2380b12ce4e5b079838e9ec61863abfc9a648c2a7e304c95ee
3
+ size 2102593
30_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 10000000000, "l1_coeff": 3e-05, "beta1": 0.9, "beta2": 0.99, "dict_mult": 1, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-3l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 512, "name": "crate-3l_0_512_post"}
31.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c95fa718c05fd386a42bee04b55d9add73e9ec10520e7fff59ec3f02787175b7
3
+ size 2102593