noanabeshima commited on
Commit
7e1f6a5
·
verified ·
1 Parent(s): 332b11a

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. small_mlp_out/Mo0_N100_S-1.pt +3 -0
  2. small_mlp_out/Mo0_N100_S-10.pt +3 -0
  3. small_mlp_out/Mo0_N100_S-10_config.json +38 -0
  4. small_mlp_out/Mo0_N100_S-1_config.json +38 -0
  5. small_mlp_out/Mo0_N100_S-2.pt +3 -0
  6. small_mlp_out/Mo0_N100_S-2_config.json +38 -0
  7. small_mlp_out/Mo0_N100_S-3.pt +3 -0
  8. small_mlp_out/Mo0_N100_S-3_config.json +38 -0
  9. small_mlp_out/Mo0_N100_S-4.pt +3 -0
  10. small_mlp_out/Mo0_N100_S-4_config.json +38 -0
  11. small_mlp_out/Mo0_N100_S-5.pt +3 -0
  12. small_mlp_out/Mo0_N100_S-5_config.json +38 -0
  13. small_mlp_out/Mo0_N100_S-6.pt +3 -0
  14. small_mlp_out/Mo0_N100_S-6_config.json +38 -0
  15. small_mlp_out/Mo0_N100_S-7.pt +3 -0
  16. small_mlp_out/Mo0_N100_S-7_config.json +38 -0
  17. small_mlp_out/Mo0_N100_S-8.pt +3 -0
  18. small_mlp_out/Mo0_N100_S-8_config.json +38 -0
  19. small_mlp_out/Mo0_N100_S-9.pt +3 -0
  20. small_mlp_out/Mo0_N100_S-9_config.json +38 -0
  21. small_mlp_out/Mo0_N100_S0.pt +3 -0
  22. small_mlp_out/Mo0_N100_S0_config.json +38 -0
  23. small_mlp_out/Mo0_N100_S1.pt +3 -0
  24. small_mlp_out/Mo0_N100_S1_config.json +38 -0
  25. small_mlp_out/Mo0_N100_S2.pt +3 -0
  26. small_mlp_out/Mo0_N100_S2_config.json +38 -0
  27. small_mlp_out/Mo1_N100_S-1.pt +3 -0
  28. small_mlp_out/Mo1_N100_S-10.pt +3 -0
  29. small_mlp_out/Mo1_N100_S-10_config.json +38 -0
  30. small_mlp_out/Mo1_N100_S-1_config.json +38 -0
  31. small_mlp_out/Mo1_N100_S-2.pt +3 -0
  32. small_mlp_out/Mo1_N100_S-2_config.json +38 -0
  33. small_mlp_out/Mo1_N100_S-3.pt +3 -0
  34. small_mlp_out/Mo1_N100_S-3_config.json +38 -0
  35. small_mlp_out/Mo1_N100_S-4.pt +3 -0
  36. small_mlp_out/Mo1_N100_S-4_config.json +38 -0
  37. small_mlp_out/Mo1_N100_S-5.pt +3 -0
  38. small_mlp_out/Mo1_N100_S-5_config.json +38 -0
  39. small_mlp_out/Mo1_N100_S-6.pt +3 -0
  40. small_mlp_out/Mo1_N100_S-6_config.json +38 -0
  41. small_mlp_out/Mo1_N100_S-7.pt +3 -0
  42. small_mlp_out/Mo1_N100_S-7_config.json +38 -0
  43. small_mlp_out/Mo1_N100_S-8.pt +3 -0
  44. small_mlp_out/Mo1_N100_S-8_config.json +38 -0
  45. small_mlp_out/Mo1_N100_S-9.pt +3 -0
  46. small_mlp_out/Mo1_N100_S-9_config.json +38 -0
  47. small_mlp_out/Mo1_N100_S0.pt +3 -0
  48. small_mlp_out/Mo1_N100_S0_config.json +38 -0
  49. small_mlp_out/Mo1_N100_S1.pt +3 -0
  50. small_mlp_out/Mo1_N100_S1_config.json +38 -0
small_mlp_out/Mo0_N100_S-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af05b5086ef6459f0ba1f53ac36b1be4728306ad6e5717f8ccabad01a0c30a4d
3
+ size 619896
small_mlp_out/Mo0_N100_S-10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f8641706d85ae2322d6e897ba585c87597c45382716e00d231923e30f544486
3
+ size 619904
small_mlp_out/Mo0_N100_S-10_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-10"
38
+ }
small_mlp_out/Mo0_N100_S-1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-1"
38
+ }
small_mlp_out/Mo0_N100_S-2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25e8772eea4c3a267847387c189ca2688ad687b2ace325fa23df939b33c20794
3
+ size 619896
small_mlp_out/Mo0_N100_S-2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-2"
38
+ }
small_mlp_out/Mo0_N100_S-3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1e7d311122c25a89e59cfe847d770db0b789336e3198783dddd726f8e699dc0
3
+ size 619896
small_mlp_out/Mo0_N100_S-3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -3,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-3"
38
+ }
small_mlp_out/Mo0_N100_S-4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62ffe69e8783b55f4f602c0ed4932717af23e29541789f214407ce97050c58de
3
+ size 619896
small_mlp_out/Mo0_N100_S-4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-4"
38
+ }
small_mlp_out/Mo0_N100_S-5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec97df2666e65b8ea931140c874d275d25cc7f843cd88d0071c54d5219f366d
3
+ size 619896
small_mlp_out/Mo0_N100_S-5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -5,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-5"
38
+ }
small_mlp_out/Mo0_N100_S-6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa89c6746abe2576841458b4a77287285487450480bc56c059c21b31fa91e32
3
+ size 619896
small_mlp_out/Mo0_N100_S-6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-6"
38
+ }
small_mlp_out/Mo0_N100_S-7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c3076f2a9ae2b1fcea12b3850de9e0fdf02781d4e6b373b79f2e198ae7025fd
3
+ size 619896
small_mlp_out/Mo0_N100_S-7_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -7,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-7"
38
+ }
small_mlp_out/Mo0_N100_S-8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b321f175fcb8b0e62da6d3b26bd34ddde62c553de7d6e5add3cf45cde84fd7fe
3
+ size 619896
small_mlp_out/Mo0_N100_S-8_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -8,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-8"
38
+ }
small_mlp_out/Mo0_N100_S-9.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac62f06d7682d0c0cccd5ad74fdf75dec9485843aed47877524d6adc970d9ef
3
+ size 619896
small_mlp_out/Mo0_N100_S-9_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -9,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S-9"
38
+ }
small_mlp_out/Mo0_N100_S0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c52f40ba9aa6bb249e2ea562de4a2db109a58ab592601051f17cd7c22bf8875
3
+ size 619888
small_mlp_out/Mo0_N100_S0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 0,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S0"
38
+ }
small_mlp_out/Mo0_N100_S1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52538b9a86801cebb661e9db7fb2b94352e7cfe791324ef6a3c8fc3c2e98a645
3
+ size 619888
small_mlp_out/Mo0_N100_S1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S1"
38
+ }
small_mlp_out/Mo0_N100_S2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4ae6de80c7c941677af277440b611e3fe5e4a29bcd6bc0845e909710e7d1fc
3
+ size 619888
small_mlp_out/Mo0_N100_S2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N100_S2"
38
+ }
small_mlp_out/Mo1_N100_S-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae27e97e8cd3eddd1046adfbf85eca5a2fcf577240c58d511b567055490a656
3
+ size 619896
small_mlp_out/Mo1_N100_S-10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89c6721b55d29dc0756d6cabd10d22bc88c0e151b372378b0b2aafd89785c262
3
+ size 619904
small_mlp_out/Mo1_N100_S-10_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-10"
38
+ }
small_mlp_out/Mo1_N100_S-1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-1"
38
+ }
small_mlp_out/Mo1_N100_S-2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d36ccb1fc71fb2c1e8d25e6dee60a9077df13d97a5c76186e340233bb4a8a9e
3
+ size 619896
small_mlp_out/Mo1_N100_S-2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-2"
38
+ }
small_mlp_out/Mo1_N100_S-3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab9ea0f87d47b3d050fb8f1d688da77e682eb9aaf3ff0eaa8516cd2af5b88a97
3
+ size 619896
small_mlp_out/Mo1_N100_S-3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -3,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-3"
38
+ }
small_mlp_out/Mo1_N100_S-4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e611abec7d2d6bf4abce5bcc9accf787df7a75dca13694e77575f7dd7c6a364b
3
+ size 619896
small_mlp_out/Mo1_N100_S-4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-4"
38
+ }
small_mlp_out/Mo1_N100_S-5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1487f02437b087ca73fbe5efc06a24f83be478714b4bd39b75c76ba426d4d3c4
3
+ size 619896
small_mlp_out/Mo1_N100_S-5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -5,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-5"
38
+ }
small_mlp_out/Mo1_N100_S-6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64fea0f7373b118c234f57372682a6272637828245d439bce1489762188e88c3
3
+ size 619896
small_mlp_out/Mo1_N100_S-6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-6"
38
+ }
small_mlp_out/Mo1_N100_S-7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69eb1cfdd040be79a5b5738a23fac9b88dff72af88656dfa99789ac1d7c7f5bb
3
+ size 619896
small_mlp_out/Mo1_N100_S-7_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -7,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-7"
38
+ }
small_mlp_out/Mo1_N100_S-8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad75daf6562638331d20c3f86fdb93917bf0a1aaf64c8c67acec709f1f335c04
3
+ size 619896
small_mlp_out/Mo1_N100_S-8_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -8,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-8"
38
+ }
small_mlp_out/Mo1_N100_S-9.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a2b6cc9dbfbe4ec2f3519a8854b996c63051477dd94a690f416af53c8cab335
3
+ size 619896
small_mlp_out/Mo1_N100_S-9_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -9,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S-9"
38
+ }
small_mlp_out/Mo1_N100_S0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e842201d494a66f266a99bbf43ea5b03465873335a817094074267ab9e3dc857
3
+ size 619888
small_mlp_out/Mo1_N100_S0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": 0,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S0"
38
+ }
small_mlp_out/Mo1_N100_S1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8343acec8e1ee07400aa2376f4797783f35b7ec7a536b908b833b30c1ee63314
3
+ size 619888
small_mlp_out/Mo1_N100_S1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 100,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": 1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo1_N100_S1"
38
+ }