checkpoints for 42M models
Browse files- {softmax1-42m-2023_12_13_10_37_41-43k → softmax0-42m-2023_12_14_19_09_34/iter_1000}/ckpt.pt +2 -2
- softmax0-42m-2023_12_14_19_09_34/iter_1000/config.json +1 -0
- softmax0-42m-2023_12_14_19_09_34/iter_16000/ckpt.pt +3 -0
- softmax0-42m-2023_12_14_19_09_34/iter_2000/ckpt.pt +3 -0
- softmax0-42m-2023_12_14_19_09_34/iter_32000/ckpt.pt +3 -0
- softmax0-42m-2023_12_14_19_09_34/iter_32000/config.json +1 -0
- softmax0-42m-2023_12_14_19_09_34/iter_4000/ckpt.pt +3 -0
- softmax0-42m-2023_12_14_19_09_34/iter_70000/ckpt.pt +3 -0
- softmax0-42m-2023_12_14_19_09_34/iter_70000/config.json +1 -0
- softmax0-42m-2023_12_14_19_09_34/iter_8000/ckpt.pt +3 -0
- softmax1-42m-2023_12_13_10_37_41-43k/config.json +0 -1
- softmax1-42m-2023_12_13_10_37_41-43k/iter_60000/ckpt.pt +3 -0
- softmax1-42m-2023_12_13_10_37_41-43k/{model.bin → iter_60000/model.bin} +1 -1
- softmax1-42m-2023_12_13_10_37_41-43k/iter_70000/ckpt.pt +3 -0
- softmax1-42m-2023_12_13_10_37_41-43k/iter_70000/config.json +1 -0
- softmax1-42m-2023_12_13_10_37_41-43k/iter_70000/model.bin +3 -0
{softmax1-42m-2023_12_13_10_37_41-43k → softmax0-42m-2023_12_14_19_09_34/iter_1000}/ckpt.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc89b2a8c84807fab8d73612687a40d442f588b7f99acc46effcb53361e99323
|
3 |
+
size 502454262
|
softmax0-42m-2023_12_14_19_09_34/iter_1000/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax0-42m-2023_12_14_19_09_34/iter_1000", "eval_interval": 1000, "log_interval": 20, "eval_iters": 20, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax0-42m-2023_12_14_19_09_34/iter_1000", "batch_size": 32, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "total_tokens": 1000000000.0, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "qint8", "compile": false, "softmax1": true, "softmaxn_param": 1}
|
softmax0-42m-2023_12_14_19_09_34/iter_16000/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bfcbeaaa27b989e32bc6919ca60dcf538a0a896c477fa1d49cc82312ea77ec1
|
3 |
+
size 502454262
|
softmax0-42m-2023_12_14_19_09_34/iter_2000/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33f2ed80dd0e5820056805cea431dae5b746d91702f7ae935d0d296f36685f2f
|
3 |
+
size 502454262
|
softmax0-42m-2023_12_14_19_09_34/iter_32000/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad9b2167ea758862d508d4c0602e7963a7ffc3dbb8d550b3a87835551a484bbd
|
3 |
+
size 502454262
|
softmax0-42m-2023_12_14_19_09_34/iter_32000/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax0-42m-2023_12_14_19_09_34/iter_32000", "eval_interval": 1000, "log_interval": 20, "eval_iters": 20, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax0-42m-2023_12_14_19_09_34/iter_32000", "batch_size": 32, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "total_tokens": 1000000000.0, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "qint8", "compile": false, "softmax1": true, "softmaxn_param": 1}
|
softmax0-42m-2023_12_14_19_09_34/iter_4000/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6716df13a50ff85972f47a9643dd55d06b5dbf233bc9972069cfbdc2701747bf
|
3 |
+
size 502454262
|
softmax0-42m-2023_12_14_19_09_34/iter_70000/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a73fb580a49f10616f7098dfb16a0ab2c31d75e8fafa2b7e16fd1e322ac1f0f
|
3 |
+
size 502454262
|
softmax0-42m-2023_12_14_19_09_34/iter_70000/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax0-42m-2023_12_14_19_09_34/iter_70000", "eval_interval": 1000, "log_interval": 20, "eval_iters": 20, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax0-42m-2023_12_14_19_09_34/iter_70000", "batch_size": 32, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "total_tokens": 1000000000.0, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "qint8", "compile": false, "softmax1": true, "softmaxn_param": 1}
|
softmax0-42m-2023_12_14_19_09_34/iter_8000/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37c2423f70a2d442f9622eb83a062bc1e2167a076c72a355f4d085e36cc85f29
|
3 |
+
size 502454262
|
softmax1-42m-2023_12_13_10_37_41-43k/config.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"out_dir": "out/softmax1-42m-2023_12_13_10_37_41-43k", "eval_interval": 1000, "log_interval": 20, "eval_iters": 10, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "checkpoint_interval": 20000, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax1-42m-2023_12_13_10_37_41-43k", "batch_size": 32, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "mps", "dtype": "float16", "compile": false, "softmax1": true, "softmaxn_param": 1}
|
|
|
|
softmax1-42m-2023_12_13_10_37_41-43k/iter_60000/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4c9c2d1152a2c8517aa2dce9b4e68b6435c694ef3340b7d71d6d86203798148
|
3 |
+
size 502455286
|
softmax1-42m-2023_12_13_10_37_41-43k/{model.bin → iter_60000/model.bin}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 166823964
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0904ddb79cfeca18eb3ab996de20775aec1aaf44315a1243fdb00efe02d57b02
|
3 |
size 166823964
|
softmax1-42m-2023_12_13_10_37_41-43k/iter_70000/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:deb1eaf42415d35a29d2a0b0c14d863a79ca5aad60360b7d5d9c635f5bc58a5f
|
3 |
+
size 502455286
|
softmax1-42m-2023_12_13_10_37_41-43k/iter_70000/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax1-42m-2023_12_13_10_37_41-43k/iter_70000", "eval_interval": 1000, "log_interval": 20, "eval_iters": 20, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax1-42m-2023_12_13_10_37_41-43k/iter_70000", "batch_size": 32, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "total_tokens": 1000000000.0, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "qint8", "compile": false, "softmax1": true, "softmaxn_param": 1}
|
softmax1-42m-2023_12_13_10_37_41-43k/iter_70000/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e40a1ad5f910fa8049c3f02ac14c55be94170b7dcfa731a3e1ffba5482923597
|
3 |
+
size 166823964
|