noanabeshima commited on
Commit
22505e8
·
verified ·
1 Parent(s): 762b92d

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. small_mlp_out/Mo0_N10_S-1.pt +3 -0
  2. small_mlp_out/Mo0_N10_S-10.pt +3 -0
  3. small_mlp_out/Mo0_N10_S-10_config.json +38 -0
  4. small_mlp_out/Mo0_N10_S-1_config.json +38 -0
  5. small_mlp_out/Mo0_N10_S-2.pt +3 -0
  6. small_mlp_out/Mo0_N10_S-2_config.json +38 -0
  7. small_mlp_out/Mo0_N10_S-3.pt +3 -0
  8. small_mlp_out/Mo0_N10_S-3_config.json +38 -0
  9. small_mlp_out/Mo0_N10_S-4.pt +3 -0
  10. small_mlp_out/Mo0_N10_S-4_config.json +38 -0
  11. small_mlp_out/Mo0_N10_S-5.pt +3 -0
  12. small_mlp_out/Mo0_N10_S-5_config.json +38 -0
  13. small_mlp_out/Mo0_N10_S-6.pt +3 -0
  14. small_mlp_out/Mo0_N10_S-6_config.json +38 -0
  15. small_mlp_out/Mo0_N10_S-7.pt +3 -0
  16. small_mlp_out/Mo0_N10_S-7_config.json +38 -0
  17. small_mlp_out/Mo0_N10_S-8.pt +3 -0
  18. small_mlp_out/Mo0_N10_S-8_config.json +38 -0
  19. small_mlp_out/Mo0_N10_S-9.pt +3 -0
  20. small_mlp_out/Mo0_N10_S-9_config.json +38 -0
  21. small_mlp_out/Mo0_N10_S0.pt +3 -0
  22. small_mlp_out/Mo0_N10_S0_config.json +38 -0
  23. small_mlp_out/Mo0_N10_S1.pt +3 -0
  24. small_mlp_out/Mo0_N10_S1_config.json +38 -0
  25. small_mlp_out/Mo0_N10_S2.pt +3 -0
  26. small_mlp_out/Mo0_N10_S2_config.json +38 -0
  27. small_mlp_out/Mo0_N30_S-1.pt +3 -0
  28. small_mlp_out/Mo0_N30_S-10.pt +3 -0
  29. small_mlp_out/Mo0_N30_S-10_config.json +38 -0
  30. small_mlp_out/Mo0_N30_S-1_config.json +38 -0
  31. small_mlp_out/Mo0_N30_S-2.pt +3 -0
  32. small_mlp_out/Mo0_N30_S-2_config.json +38 -0
  33. small_mlp_out/Mo0_N30_S-3.pt +3 -0
  34. small_mlp_out/Mo0_N30_S-3_config.json +38 -0
  35. small_mlp_out/Mo0_N30_S-4.pt +3 -0
  36. small_mlp_out/Mo0_N30_S-4_config.json +38 -0
  37. small_mlp_out/Mo0_N30_S-5.pt +3 -0
  38. small_mlp_out/Mo0_N30_S-5_config.json +38 -0
  39. small_mlp_out/Mo0_N30_S-6.pt +3 -0
  40. small_mlp_out/Mo0_N30_S-6_config.json +38 -0
  41. small_mlp_out/Mo0_N30_S-7.pt +3 -0
  42. small_mlp_out/Mo0_N30_S-7_config.json +38 -0
  43. small_mlp_out/Mo0_N30_S-8.pt +3 -0
  44. small_mlp_out/Mo0_N30_S-8_config.json +38 -0
  45. small_mlp_out/Mo0_N30_S-9.pt +3 -0
  46. small_mlp_out/Mo0_N30_S-9_config.json +38 -0
  47. small_mlp_out/Mo0_N30_S0.pt +3 -0
  48. small_mlp_out/Mo0_N30_S0_config.json +38 -0
  49. small_mlp_out/Mo0_N30_S1.pt +3 -0
  50. small_mlp_out/Mo0_N30_S1_config.json +38 -0
small_mlp_out/Mo0_N10_S-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:458a809ab4a62025fa0b4abe189fa7331908ff585e3222a5b898d51e5c25ee15
3
+ size 66544
small_mlp_out/Mo0_N10_S-10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:511ceb80e559315a776781c8d0388451b8870893da59c5eacc1cb2e3d9eb8dd5
3
+ size 66552
small_mlp_out/Mo0_N10_S-10_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-10"
38
+ }
small_mlp_out/Mo0_N10_S-1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-1"
38
+ }
small_mlp_out/Mo0_N10_S-2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8cc041b750c1bbff4b3633f570bd162f9aea4e34e4c580fd290d6f2f303cc04
3
+ size 66544
small_mlp_out/Mo0_N10_S-2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-2"
38
+ }
small_mlp_out/Mo0_N10_S-3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1266400972eb50c5a2bed21b2baa7b56ecf72c2edcbb96ba2a1810a005d129c5
3
+ size 66544
small_mlp_out/Mo0_N10_S-3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -3,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-3"
38
+ }
small_mlp_out/Mo0_N10_S-4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49751a1614e4a1317adec012e0754068d07a36f1c90723d94c07f089a7b3afc3
3
+ size 66544
small_mlp_out/Mo0_N10_S-4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-4"
38
+ }
small_mlp_out/Mo0_N10_S-5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a7a3074052a8a48288793220d83537a2e6556b5a7f54e8d41b285aad9d27b3
3
+ size 66544
small_mlp_out/Mo0_N10_S-5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -5,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-5"
38
+ }
small_mlp_out/Mo0_N10_S-6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94e4e3e1b4330d8b33ee007796b6f877ce91fb599abbb897c0cb78cf28391724
3
+ size 66544
small_mlp_out/Mo0_N10_S-6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-6"
38
+ }
small_mlp_out/Mo0_N10_S-7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a46250ae1f16105a8568d74d07528d2acb7143a5ea2829723e2b15fa99bbfdc8
3
+ size 66544
small_mlp_out/Mo0_N10_S-7_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -7,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-7"
38
+ }
small_mlp_out/Mo0_N10_S-8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972ac77cac6c62343db17b4d1dabba855053baa32a9688cb24b386527a251132
3
+ size 66544
small_mlp_out/Mo0_N10_S-8_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -8,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-8"
38
+ }
small_mlp_out/Mo0_N10_S-9.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55246d9b098836432ed8012c13a348505fc5504fef4776f84fce94b3f98c4a2f
3
+ size 66544
small_mlp_out/Mo0_N10_S-9_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -9,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S-9"
38
+ }
small_mlp_out/Mo0_N10_S0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dc4108bbf165d7a3f7eca1a1e8ad0d9964e10ac968428cbc878cd2acf1dbc26
3
+ size 66536
small_mlp_out/Mo0_N10_S0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 0,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S0"
38
+ }
small_mlp_out/Mo0_N10_S1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e66a513465bebcb6dce048f21410bd6a2a4067584308f24d4daba2c9432e71
3
+ size 66536
small_mlp_out/Mo0_N10_S1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S1"
38
+ }
small_mlp_out/Mo0_N10_S2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdd85acc5f3f8b071c4c83f35594133e1074d4af6732a036dd79c41efc41aea3
3
+ size 66536
small_mlp_out/Mo0_N10_S2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 10,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N10_S2"
38
+ }
small_mlp_out/Mo0_N30_S-1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd41d42403d49daa12aaf310f980973b4bbcb1de528352a78be1285f307ff85
3
+ size 189488
small_mlp_out/Mo0_N30_S-10.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a71e085a5fe9150bde61878ee9928f28c413e56b6b050ee0101d7d01c428466
3
+ size 189496
small_mlp_out/Mo0_N30_S-10_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -10,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-10"
38
+ }
small_mlp_out/Mo0_N30_S-1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-1"
38
+ }
small_mlp_out/Mo0_N30_S-2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08fa0c288d36c23878e28a33698cd7826da569fcb4891ee31ba2c152ad365b9b
3
+ size 189488
small_mlp_out/Mo0_N30_S-2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-2"
38
+ }
small_mlp_out/Mo0_N30_S-3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea1e4d57073cf941664981cc94cea6f43196c9ae058f1dce14b099b45d40bf7c
3
+ size 189488
small_mlp_out/Mo0_N30_S-3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -3,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-3"
38
+ }
small_mlp_out/Mo0_N30_S-4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ccaa416995bac6c910fa7882202bc07c2479236135d9a52c97b9434c7111c9
3
+ size 189488
small_mlp_out/Mo0_N30_S-4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-4"
38
+ }
small_mlp_out/Mo0_N30_S-5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b7cf9076021e50a85168f97dbb03d7203423421365a025501a364b3c05659e
3
+ size 189488
small_mlp_out/Mo0_N30_S-5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -5,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-5"
38
+ }
small_mlp_out/Mo0_N30_S-6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2970b40e439c10e90b2d17a22898122c94faca0b107ff204b329234a3f1210fe
3
+ size 189488
small_mlp_out/Mo0_N30_S-6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-6"
38
+ }
small_mlp_out/Mo0_N30_S-7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4322ab097b24adfbc4f61a73f265169817c88b7b451d408419927a1e1446efd5
3
+ size 189488
small_mlp_out/Mo0_N30_S-7_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -7,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-7"
38
+ }
small_mlp_out/Mo0_N30_S-8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85afcf08eb4b8fea4f6f5e6bfe220f3815ef6a4799dda1d746e4db44184be938
3
+ size 189488
small_mlp_out/Mo0_N30_S-8_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -8,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-8"
38
+ }
small_mlp_out/Mo0_N30_S-9.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dc244d7d427e24ff72ea5b183f9f23cdda5f891d5777b05fbe162a8ed07fa47
3
+ size 189488
small_mlp_out/Mo0_N30_S-9_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -9,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S-9"
38
+ }
small_mlp_out/Mo0_N30_S0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e62e1bd82f0a2e724141e01b3e1d99e9ad34f091c174b2c8d09226a09723970
3
+ size 189480
small_mlp_out/Mo0_N30_S0_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 0,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S0"
38
+ }
small_mlp_out/Mo0_N30_S1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:951b06500e3e8ab90311feee24120235b15a81ee05b09f5fb04d3667488c5bc4
3
+ size 189480
small_mlp_out/Mo0_N30_S1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 30,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 1,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "error",
25
+ "sparse_adam": false,
26
+ "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}",
27
+ "project_name": "small_mlp_out",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 1,
32
+ "l1_p": 0,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_out",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "Mo0_N30_S1"
38
+ }