diff --git a/small_mlp_out/Mo0_N10_S-1.pt b/small_mlp_out/Mo0_N10_S-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0aa021281b862e965310aafc482d8530705d5c9c --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458a809ab4a62025fa0b4abe189fa7331908ff585e3222a5b898d51e5c25ee15 +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-10.pt b/small_mlp_out/Mo0_N10_S-10.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a413a250e94edfd0cc367b7d2e665aaf93d2c85 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:511ceb80e559315a776781c8d0388451b8870893da59c5eacc1cb2e3d9eb8dd5 +size 66552 diff --git a/small_mlp_out/Mo0_N10_S-10_config.json b/small_mlp_out/Mo0_N10_S-10_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0de8825a6aa2fcfdab9e26a33fc1eeb6099a01c3 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-10_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -10, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-10" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-1_config.json b/small_mlp_out/Mo0_N10_S-1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9caaac0e8c183d93c5ba3a02210e34939e13e22 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-2.pt b/small_mlp_out/Mo0_N10_S-2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2d910b900252d39ee74e08743dee75e30dce884 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8cc041b750c1bbff4b3633f570bd162f9aea4e34e4c580fd290d6f2f303cc04 +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-2_config.json b/small_mlp_out/Mo0_N10_S-2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..75b9d44d3bc3aaeaec4550cba3fe77fd8e71af60 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-3.pt b/small_mlp_out/Mo0_N10_S-3.pt new file mode 100644 index 0000000000000000000000000000000000000000..01dcdfbb6bf4c77715efbc300552caa8fc863ce9 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1266400972eb50c5a2bed21b2baa7b56ecf72c2edcbb96ba2a1810a005d129c5 +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-3_config.json b/small_mlp_out/Mo0_N10_S-3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..23f877d0d4c071114679d6724d167720bfad61ef --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-3_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -3, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-3" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-4.pt b/small_mlp_out/Mo0_N10_S-4.pt new file mode 100644 index 0000000000000000000000000000000000000000..43a49e89ba4166416c2d0c907e0affe30bcae71d --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49751a1614e4a1317adec012e0754068d07a36f1c90723d94c07f089a7b3afc3 +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-4_config.json b/small_mlp_out/Mo0_N10_S-4_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e764dc6232cb5cdc586ecec81c7e686c8da160 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-4_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -4, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-4" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-5.pt b/small_mlp_out/Mo0_N10_S-5.pt new file mode 100644 index 0000000000000000000000000000000000000000..308bf00e6879109c101442dd3a16c87cf464d8be --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a7a3074052a8a48288793220d83537a2e6556b5a7f54e8d41b285aad9d27b3 +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-5_config.json b/small_mlp_out/Mo0_N10_S-5_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0866c9662acd417daf96f7acffce2b56f1c6c0e --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-5_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -5, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-5" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-6.pt b/small_mlp_out/Mo0_N10_S-6.pt new file mode 100644 index 0000000000000000000000000000000000000000..252c0e3cb56c3028dd7b9e7ca11220d5c27b9fec --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e4e3e1b4330d8b33ee007796b6f877ce91fb599abbb897c0cb78cf28391724 +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-6_config.json b/small_mlp_out/Mo0_N10_S-6_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6341c2096e1373eb1248dd6dfd3fa2956c11a7b3 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-6_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -6, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-6" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-7.pt b/small_mlp_out/Mo0_N10_S-7.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e9ad3dfc4a4401a85363fb052b64beafdb8183f --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46250ae1f16105a8568d74d07528d2acb7143a5ea2829723e2b15fa99bbfdc8 +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-7_config.json b/small_mlp_out/Mo0_N10_S-7_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a583774cf1520007cd670616bfd78add6c4a94a7 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-7_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -7, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-7" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-8.pt b/small_mlp_out/Mo0_N10_S-8.pt new file mode 100644 index 0000000000000000000000000000000000000000..de9c92c7d7e31d7a6437ea9abcaee96d0a4c7270 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972ac77cac6c62343db17b4d1dabba855053baa32a9688cb24b386527a251132 +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-8_config.json b/small_mlp_out/Mo0_N10_S-8_config.json new file mode 100644 index 0000000000000000000000000000000000000000..da8f94ff49a54a665d48ef12c7a7d46e6cfa9cb8 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-8_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -8, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-8" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S-9.pt b/small_mlp_out/Mo0_N10_S-9.pt new file mode 100644 index 0000000000000000000000000000000000000000..e45d8661fb43847306eb0c700f086b9864365f6a --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55246d9b098836432ed8012c13a348505fc5504fef4776f84fce94b3f98c4a2f +size 66544 diff --git a/small_mlp_out/Mo0_N10_S-9_config.json b/small_mlp_out/Mo0_N10_S-9_config.json new file mode 100644 index 0000000000000000000000000000000000000000..95f391decef4353d6ae10ce50dd47e8e0fff31c1 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S-9_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -9, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S-9" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S0.pt b/small_mlp_out/Mo0_N10_S0.pt new file mode 100644 index 0000000000000000000000000000000000000000..06125f462d0263192d7bd5e6ad2202a89f5fcdfe --- /dev/null +++ b/small_mlp_out/Mo0_N10_S0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dc4108bbf165d7a3f7eca1a1e8ad0d9964e10ac968428cbc878cd2acf1dbc26 +size 66536 diff --git a/small_mlp_out/Mo0_N10_S0_config.json b/small_mlp_out/Mo0_N10_S0_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ef7a7e8e1262042f0aed0d4896eba99f413ed90 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S0_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 0, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S0" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S1.pt b/small_mlp_out/Mo0_N10_S1.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbffb5399fc006ea2662ea16cf50fff10ecfcdf5 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e66a513465bebcb6dce048f21410bd6a2a4067584308f24d4daba2c9432e71 +size 66536 diff --git a/small_mlp_out/Mo0_N10_S1_config.json b/small_mlp_out/Mo0_N10_S1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..612f8b9364fc01a927ffff9943b766d890e6a1f2 --- /dev/null +++ b/small_mlp_out/Mo0_N10_S1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N10_S2.pt b/small_mlp_out/Mo0_N10_S2.pt new file mode 100644 index 0000000000000000000000000000000000000000..c09713e457a021961d04a4a25e1de5d525cfa39d --- /dev/null +++ b/small_mlp_out/Mo0_N10_S2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd85acc5f3f8b071c4c83f35594133e1074d4af6732a036dd79c41efc41aea3 +size 66536 diff --git a/small_mlp_out/Mo0_N10_S2_config.json b/small_mlp_out/Mo0_N10_S2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d6ad3349861265c76eb43653e735f53354ba3da --- /dev/null +++ b/small_mlp_out/Mo0_N10_S2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N10_S2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-1.pt b/small_mlp_out/Mo0_N30_S-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..e28527eca4d13a916ffb34d78b453e9c6be83949 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfd41d42403d49daa12aaf310f980973b4bbcb1de528352a78be1285f307ff85 +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-10.pt b/small_mlp_out/Mo0_N30_S-10.pt new file mode 100644 index 0000000000000000000000000000000000000000..52b8f91d10447afa1bb0e261dce3e00a28bf2e9c --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a71e085a5fe9150bde61878ee9928f28c413e56b6b050ee0101d7d01c428466 +size 189496 diff --git a/small_mlp_out/Mo0_N30_S-10_config.json b/small_mlp_out/Mo0_N30_S-10_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c7f6c05118a31601f823ae800b9f4af1ef2cfc84 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-10_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -10, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-10" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-1_config.json b/small_mlp_out/Mo0_N30_S-1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e23f0c87a7edc48fee3cdc21077f2a2e605d5583 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-2.pt b/small_mlp_out/Mo0_N30_S-2.pt new file mode 100644 index 0000000000000000000000000000000000000000..efd7a935b434f45cc2a9073f9af8f44b588ba817 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fa0c288d36c23878e28a33698cd7826da569fcb4891ee31ba2c152ad365b9b +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-2_config.json b/small_mlp_out/Mo0_N30_S-2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5cabd09e6b3281559b5cd6ba4cc3277d64402473 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-3.pt b/small_mlp_out/Mo0_N30_S-3.pt new file mode 100644 index 0000000000000000000000000000000000000000..7488c7054cd3706f23d04a60107a18d53eeea4bf --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1e4d57073cf941664981cc94cea6f43196c9ae058f1dce14b099b45d40bf7c +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-3_config.json b/small_mlp_out/Mo0_N30_S-3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..af2782c4b976dbe6aaa2d8298bb4d76ab73050cb --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-3_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -3, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-3" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-4.pt b/small_mlp_out/Mo0_N30_S-4.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9aebf0f313714bd372a80f641be099cf65069fb --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ccaa416995bac6c910fa7882202bc07c2479236135d9a52c97b9434c7111c9 +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-4_config.json b/small_mlp_out/Mo0_N30_S-4_config.json new file mode 100644 index 0000000000000000000000000000000000000000..343853087e5341a05dfb21f9afcb323d64b9b4e8 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-4_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -4, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-4" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-5.pt b/small_mlp_out/Mo0_N30_S-5.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ba3d81850d8374e0ca3ec8b05f02f5f5b84f4fe --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b7cf9076021e50a85168f97dbb03d7203423421365a025501a364b3c05659e +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-5_config.json b/small_mlp_out/Mo0_N30_S-5_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ff6a212d818c7c7d1bda164a4a8cdf201bdc06b2 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-5_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -5, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-5" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-6.pt b/small_mlp_out/Mo0_N30_S-6.pt new file mode 100644 index 0000000000000000000000000000000000000000..03e87124296a517c34be097935d357caf9541512 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2970b40e439c10e90b2d17a22898122c94faca0b107ff204b329234a3f1210fe +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-6_config.json b/small_mlp_out/Mo0_N30_S-6_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0fae0fb5836fae6fdecf24f0889c03d08836106 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-6_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -6, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-6" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-7.pt b/small_mlp_out/Mo0_N30_S-7.pt new file mode 100644 index 0000000000000000000000000000000000000000..4171682792c10948259d55e882b8b95838a835d1 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4322ab097b24adfbc4f61a73f265169817c88b7b451d408419927a1e1446efd5 +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-7_config.json b/small_mlp_out/Mo0_N30_S-7_config.json new file mode 100644 index 0000000000000000000000000000000000000000..dcf5163c4bbfe758a04404a9e48c36c3dc29d5c8 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-7_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -7, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-7" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-8.pt b/small_mlp_out/Mo0_N30_S-8.pt new file mode 100644 index 0000000000000000000000000000000000000000..3059ce36f2d7a799cf1f43e35a8f9f7d933e2947 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85afcf08eb4b8fea4f6f5e6bfe220f3815ef6a4799dda1d746e4db44184be938 +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-8_config.json b/small_mlp_out/Mo0_N30_S-8_config.json new file mode 100644 index 0000000000000000000000000000000000000000..682b7f90d98ee52003591575dd7336601918e08a --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-8_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -8, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-8" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S-9.pt b/small_mlp_out/Mo0_N30_S-9.pt new file mode 100644 index 0000000000000000000000000000000000000000..46afb907d07d7069df9f8163daf1a7d2df7d3fe5 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc244d7d427e24ff72ea5b183f9f23cdda5f891d5777b05fbe162a8ed07fa47 +size 189488 diff --git a/small_mlp_out/Mo0_N30_S-9_config.json b/small_mlp_out/Mo0_N30_S-9_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6821f6a15bade7ab2b4d4e105ba5d7a6128dc36 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S-9_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": -9, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S-9" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S0.pt b/small_mlp_out/Mo0_N30_S0.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bbc1a3ff8ee9eecff0ba7f33ffda81af9e966cc --- /dev/null +++ b/small_mlp_out/Mo0_N30_S0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e62e1bd82f0a2e724141e01b3e1d99e9ad34f091c174b2c8d09226a09723970 +size 189480 diff --git a/small_mlp_out/Mo0_N30_S0_config.json b/small_mlp_out/Mo0_N30_S0_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4fb34a7049e84014ade40ec066fba3bd7998b22d --- /dev/null +++ b/small_mlp_out/Mo0_N30_S0_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 0, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S0" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S1.pt b/small_mlp_out/Mo0_N30_S1.pt new file mode 100644 index 0000000000000000000000000000000000000000..a34c989505f9a8ac1dd7bf6c2f04c6e872d6a695 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951b06500e3e8ab90311feee24120235b15a81ee05b09f5fb04d3667488c5bc4 +size 189480 diff --git a/small_mlp_out/Mo0_N30_S1_config.json b/small_mlp_out/Mo0_N30_S1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..248e5fbb1b4dcda3b3ea2477dddcdb96ccf6de8e --- /dev/null +++ b/small_mlp_out/Mo0_N30_S1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo0_N30_S2.pt b/small_mlp_out/Mo0_N30_S2.pt new file mode 100644 index 0000000000000000000000000000000000000000..79d527a6eaf6c4ddc82dc5b8713f1da3662d9786 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:658b4dff10b285f5719ac06324c7a1f07dd6f96a6ad37f187ac4215e99717738 +size 189480 diff --git a/small_mlp_out/Mo0_N30_S2_config.json b/small_mlp_out/Mo0_N30_S2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..09237128dd5e43fc146749f5fe0156f9c878f921 --- /dev/null +++ b/small_mlp_out/Mo0_N30_S2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 0, + "l1_exp": 2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo0_N30_S2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-1.pt b/small_mlp_out/Mo1_N10_S-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d22f623bcf82408309b86f8a4bbc309b5bea846 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dddccde8cb31e848de0c7b8c369422140cdf1daba004912283313b03b6a47706 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-10.pt b/small_mlp_out/Mo1_N10_S-10.pt new file mode 100644 index 0000000000000000000000000000000000000000..231d8b33d84ba6a4e59f1ec755319323c51482e9 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983e2e04756a1101bf3fd76f6ae5845e469785a72c7f6e87191a3aeb0ff0ddd3 +size 66552 diff --git a/small_mlp_out/Mo1_N10_S-10_config.json b/small_mlp_out/Mo1_N10_S-10_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3cb8d354b0227c45c5bf4d65db5537dbedbf30a1 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-10_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -10, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-10" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-1_config.json b/small_mlp_out/Mo1_N10_S-1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..aec31016513a9d91eacd42d8e5f8996186a1f05b --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-2.pt b/small_mlp_out/Mo1_N10_S-2.pt new file mode 100644 index 0000000000000000000000000000000000000000..088a37e4ca8d5407d1abcdcbc5932bee148c82d6 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a1852ccf168316a0b180bf038b6fcfceb52c2f7817479c9a514f017f1c8767 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-2_config.json b/small_mlp_out/Mo1_N10_S-2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1eff9980a7948cb9b0e012f828a1f92f1f0a7d3 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-3.pt b/small_mlp_out/Mo1_N10_S-3.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c60dc60eacf63f53826e82030ab6ecdf904095b --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1468eeaa02da6c0481764cdb2a00e240ca4578fcabd8021e6420d5cf061d71b8 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-3_config.json b/small_mlp_out/Mo1_N10_S-3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..441bcb17ebb13373069040fa1af58297d037cb05 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-3_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -3, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-3" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-4.pt b/small_mlp_out/Mo1_N10_S-4.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a36902324f72e255586a0cc58e1be3aaa2786c9 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7ffbda8d1c6e94ea44d93e75499357ad4c8ba046eadef30d733939bc17fa12 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-4_config.json b/small_mlp_out/Mo1_N10_S-4_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f865deb0424a1ad5636f7764adab51944540412 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-4_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -4, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-4" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-5.pt b/small_mlp_out/Mo1_N10_S-5.pt new file mode 100644 index 0000000000000000000000000000000000000000..44f8773c1915f4248df6aa6be1290c5a0526ed0c --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985ca9fd165527c460710382a8d170f3f3153eb46e4bef70bb28ebaef2bb5858 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-5_config.json b/small_mlp_out/Mo1_N10_S-5_config.json new file mode 100644 index 0000000000000000000000000000000000000000..01e3ba7ab1ed3df5d00232b73a1569465ae98832 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-5_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -5, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-5" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-6.pt b/small_mlp_out/Mo1_N10_S-6.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f707786cf02f0d632983eee2ee37c931abe354c --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88e9d726d38cf35ac2db3b4044e40eafab7380d89d75111ebbc8a0254ecb0e96 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-6_config.json b/small_mlp_out/Mo1_N10_S-6_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2193a12721690e48872d68dd08b42c5ed455ca00 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-6_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -6, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-6" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-7.pt b/small_mlp_out/Mo1_N10_S-7.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa9b845f3014c9df5bf05c02a1730b6dcee3edc5 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a19c232e307e01ba040decb78c631d4bad4adf8fccfd9666534186458e137649 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-7_config.json b/small_mlp_out/Mo1_N10_S-7_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5cd50c944364e1089611b655371731199b82f48a --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-7_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -7, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-7" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-8.pt b/small_mlp_out/Mo1_N10_S-8.pt new file mode 100644 index 0000000000000000000000000000000000000000..70a4b01a98cfc4fe5c0b6dbefadbeb912b9c7e14 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4c984221f9d0b5f2bb1eed0cdef605f350db2e46b5c3c0308b7b2a242d0373 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-8_config.json b/small_mlp_out/Mo1_N10_S-8_config.json new file mode 100644 index 0000000000000000000000000000000000000000..02f3c6e800747c3f38482439f28c91220241abab --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-8_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -8, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-8" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S-9.pt b/small_mlp_out/Mo1_N10_S-9.pt new file mode 100644 index 0000000000000000000000000000000000000000..4113c233e152df0696ec5e10e944c69c827a545b --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a5fb2ed0cf674fd73ad917ad2c8e16f479d122063d46e1135cafd1602d4d035 +size 66544 diff --git a/small_mlp_out/Mo1_N10_S-9_config.json b/small_mlp_out/Mo1_N10_S-9_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3146267fd8e3b2c25eeb3d5c6e10bced8ad98c73 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S-9_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -9, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S-9" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S0.pt b/small_mlp_out/Mo1_N10_S0.pt new file mode 100644 index 0000000000000000000000000000000000000000..71742bd94de5af89fa5f1d9cbbd90098097cc971 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3364895746a1060d0f6fe2a648841fad21321f2d78186b17ff5f2d47a1d8f451 +size 66536 diff --git a/small_mlp_out/Mo1_N10_S0_config.json b/small_mlp_out/Mo1_N10_S0_config.json new file mode 100644 index 0000000000000000000000000000000000000000..33214137855a3e9fa0babb4931785d856fdb553b --- /dev/null +++ b/small_mlp_out/Mo1_N10_S0_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 0, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S0" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S1.pt b/small_mlp_out/Mo1_N10_S1.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef5afa1696c52d25ce33eaa056cb7c19cf6dbd12 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5aa388f3a7f98499f5cca01a5c9a242550c8b8dfb21b4e2842014c26297b865 +size 66536 diff --git a/small_mlp_out/Mo1_N10_S1_config.json b/small_mlp_out/Mo1_N10_S1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..95a475ac91454846c586715886655118f45aef91 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N10_S2.pt b/small_mlp_out/Mo1_N10_S2.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d7ad5700eca8e08ae97cfeaa9fc836054c4bd98 --- /dev/null +++ b/small_mlp_out/Mo1_N10_S2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:357023ecdc99232256dc2a432b611e05a1765d00c461c848ee671fdcb9a5ef1d +size 66536 diff --git a/small_mlp_out/Mo1_N10_S2_config.json b/small_mlp_out/Mo1_N10_S2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d72f8d51a5f8569dd8da91f81fdeae3aa4ca58e --- /dev/null +++ b/small_mlp_out/Mo1_N10_S2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 10, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N10_S2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-1.pt b/small_mlp_out/Mo1_N30_S-1.pt new file mode 100644 index 0000000000000000000000000000000000000000..67220af7f015b39e41157360d68d9c254598b725 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73f7df36d959f0153861808177a799a6efb7e4129a75c4cc07c5aba8e7e6d979 +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-10.pt b/small_mlp_out/Mo1_N30_S-10.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dc55e793db6ec4803793cb721437defbf2734f1 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d88ccc38bfe4e6bf8da860c13e64d223a52f605a5150c6f56bd0736d09b77814 +size 189496 diff --git a/small_mlp_out/Mo1_N30_S-10_config.json b/small_mlp_out/Mo1_N30_S-10_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3444a903e3a4bb63981b492644b43412377d0145 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-10_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -10, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-10" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-1_config.json b/small_mlp_out/Mo1_N30_S-1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5d6283017caaf4f056356b9a03e65c015b3b9479 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-2.pt b/small_mlp_out/Mo1_N30_S-2.pt new file mode 100644 index 0000000000000000000000000000000000000000..f175e9cd64ab30b373c6a9c956bc55a27bcb130d --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee8c499fb7a2bc70d1b90b3368694bb624654dbd1fc1691e9d5a2179c9c147f7 +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-2_config.json b/small_mlp_out/Mo1_N30_S-2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c138924e3a5b42c11bdbe0cb27a19ace31fc505 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-2" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-3.pt b/small_mlp_out/Mo1_N30_S-3.pt new file mode 100644 index 0000000000000000000000000000000000000000..4da2307c7ca62a243253c94369493c9d4d846dce --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03364fd170abeaf49ff18aa0043fa44195ac45c38b22f654a36fb49b405d639d +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-3_config.json b/small_mlp_out/Mo1_N30_S-3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6becbcb1c8b65a1c0c6f1b16ab18bd7ef44c354c --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-3_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -3, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-3" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-4.pt b/small_mlp_out/Mo1_N30_S-4.pt new file mode 100644 index 0000000000000000000000000000000000000000..48f041d1d16b395eb15e9fd927057a1ebd122951 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-4.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aea13990b44dca207c132d9e7a8d0a790b6231cb6a54207fb13cb88989632ed +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-4_config.json b/small_mlp_out/Mo1_N30_S-4_config.json new file mode 100644 index 0000000000000000000000000000000000000000..377551d2e1492eed14909b411ba3fc227b385941 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-4_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -4, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-4" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-5.pt b/small_mlp_out/Mo1_N30_S-5.pt new file mode 100644 index 0000000000000000000000000000000000000000..89f453b08d0830e93c139fabd136300680641b2f --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-5.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcc3ac95577813ab9885d84fddcaff8d473345d7fa91191155e6c0b8d26824bb +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-5_config.json b/small_mlp_out/Mo1_N30_S-5_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2342f973777a17a6f2b2c9f8fef038f91c38fa3 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-5_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -5, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-5" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-6.pt b/small_mlp_out/Mo1_N30_S-6.pt new file mode 100644 index 0000000000000000000000000000000000000000..150980015579fbf1e86956304f23a8d68ee4d95f --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-6.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43c823f7cd1e096f5e866a4ab27a8d310a75d6b4c3bcacf8b8360ea8925aead5 +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-6_config.json b/small_mlp_out/Mo1_N30_S-6_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1bd3be1393772c8707a1a12592e90ae6187c96f2 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-6_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -6, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-6" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-7.pt b/small_mlp_out/Mo1_N30_S-7.pt new file mode 100644 index 0000000000000000000000000000000000000000..96df471e4437914452810478b7345750424bc70c --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-7.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804b443ffd9f1e264d4713ab9044bccb4f4fdc09e225c40da80392bf01a98412 +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-7_config.json b/small_mlp_out/Mo1_N30_S-7_config.json new file mode 100644 index 0000000000000000000000000000000000000000..78b68c9e9600c64a57e5706cdd730bae862d9080 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-7_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -7, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-7" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-8.pt b/small_mlp_out/Mo1_N30_S-8.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f46d6ae58e6cab8b352fcd1534f63b6278403fa --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-8.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d57bdb20428e382914e85e9227e40e39a9c2409fbd700d3b41dfdfade416be +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-8_config.json b/small_mlp_out/Mo1_N30_S-8_config.json new file mode 100644 index 0000000000000000000000000000000000000000..feb8f8d3988498d03fd72f21d6b244b4b0118297 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-8_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -8, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-8" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S-9.pt b/small_mlp_out/Mo1_N30_S-9.pt new file mode 100644 index 0000000000000000000000000000000000000000..69cb42bae84ae14cbc8d37c1e926ccb50eabb61a --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-9.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2831a641c6e07f8e380422c353372d60e5c6f8aa7c7c8c35780fccdb2c16b83f +size 189488 diff --git a/small_mlp_out/Mo1_N30_S-9_config.json b/small_mlp_out/Mo1_N30_S-9_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e53afedc52f392a93b4cddd45210b1c4ced12c1 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S-9_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": -9, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S-9" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S0.pt b/small_mlp_out/Mo1_N30_S0.pt new file mode 100644 index 0000000000000000000000000000000000000000..30c51c25e806d4b860592c294e6e30fd1a8c4f3f --- /dev/null +++ b/small_mlp_out/Mo1_N30_S0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e429a45a8aecdaad834ba62f5d672f49512e0cf42858a978c1ca715aab787ef +size 189480 diff --git a/small_mlp_out/Mo1_N30_S0_config.json b/small_mlp_out/Mo1_N30_S0_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e6f0522ac4c5219f91c4a68994a7e1e6b86f8dc7 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S0_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 0, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S0" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S1.pt b/small_mlp_out/Mo1_N30_S1.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e4ff8c18a5678888bf42e1543a408630f66a16e --- /dev/null +++ b/small_mlp_out/Mo1_N30_S1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7deba5f80e8ef5e01184649f1e5d48ef36b025029c7b3447f2015d3dfa5cfb83 +size 189480 diff --git a/small_mlp_out/Mo1_N30_S1_config.json b/small_mlp_out/Mo1_N30_S1_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b5a6c27711ec81bcd68825619d0d7138f462c0ec --- /dev/null +++ b/small_mlp_out/Mo1_N30_S1_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 1, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S1" +} \ No newline at end of file diff --git a/small_mlp_out/Mo1_N30_S2.pt b/small_mlp_out/Mo1_N30_S2.pt new file mode 100644 index 0000000000000000000000000000000000000000..152153565c6272c12a373ab27d02c4da7b9a2585 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67370d19391c521b27d48ed392d55626c27b03635da5a421688e64942e983b4 +size 189480 diff --git a/small_mlp_out/Mo1_N30_S2_config.json b/small_mlp_out/Mo1_N30_S2_config.json new file mode 100644 index 0000000000000000000000000000000000000000..46e79a760406434949664bfa141f442506aa9cc4 --- /dev/null +++ b/small_mlp_out/Mo1_N30_S2_config.json @@ -0,0 +1,38 @@ +{ + "n_features": 30, + "d_model": 768, + "lr_exp": -10, + "disable_comet": false, + "per_neuron_reinit_interval": 0, + "reservoir_time_discount": 0.995, + "reinit_interval": 800, + "max_reinit_neurons": 5000, + "reservoir_size": 5000, + "n_piles": 292, + "log_interval": 200, + "reinit_input_norm": "target_scaled", + "reinit_input": "x", + "reinit_norm_alpha": 0.3, + "data_loc": "mlp_data", + "reinit_threshold": -6, + "scheduler": "wsd", + "layer_idx": 1, + "l1_exp": 2, + "neuron_reinit_percent": 0.85, + "beta1": 1, + "beta2": 4, + "reinit_target": "error", + "sparse_adam": false, + "run_template": "Mo{layer_idx}_N{n_features}_S{l1_exp}", + "project_name": "small_mlp_out", + "decoder_bias": true, + "l1_beta": 0.99, + "alt_sparsity_loss": "log", + "l1_ratio": 1, + "l1_p": 0, + "optimizer": "sparse_adam", + "model_type": "mlp_out", + "adam_beta1": 0.5, + "adam_beta2": 0.9375, + "run_name": "Mo1_N30_S2" +} \ No newline at end of file