softmax0/1 eos@100k steps
Browse files- softmax0-15m-2023_08_26_00_08_49/config.json +1 -1
- {softmax0-15m-2023_12_11_10_05_14-70% β softmax0-15m-2023_12_11_10_05_14-100k}/ckpt.pt +2 -2
- softmax0-15m-2023_12_11_10_05_14-100k/config.json +1 -0
- {softmax0-15m-2023_12_11_10_05_14-70% β softmax0-15m-2023_12_11_10_05_14-100k}/model.bin +1 -1
- softmax0-15m-2023_12_11_10_05_14-70%/config.json +0 -1
- softmax0-15m-2023_12_11_10_05_14-70k/ckpt.pt +3 -0
- softmax0-15m-2023_12_11_10_05_14-70k/config.json +1 -0
- softmax0-15m-2023_12_11_10_05_14-70k/model.bin +3 -0
- softmax1-15m-2023_08_25_11_47_04/config.json +1 -1
- softmax1-15m-2023_12_11_08_53_43-100k/ckpt.pt +3 -0
- softmax1-15m-2023_12_11_08_53_43-100k/config.json +1 -0
- softmax1-15m-2023_12_11_08_53_43-100k/model.bin +3 -0
- {softmax1-15m-2023_12_11_08_53_43-70% β softmax1-15m-2023_12_11_08_53_43-70k}/ckpt.pt +0 -0
- {softmax1-15m-2023_12_11_08_53_43-70% β softmax1-15m-2023_12_11_08_53_43-70k}/config.json +0 -0
- {softmax1-15m-2023_12_11_08_53_43-70% β softmax1-15m-2023_12_11_08_53_43-70k}/model.bin +0 -0
softmax0-15m-2023_08_26_00_08_49/config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"out_dir": "out/softmax0-15m-2023_08_26_00_08_49", "eval_interval": 1000, "log_interval":
|
|
|
1 |
+
{"out_dir": "out/softmax0-15m-2023_08_26_00_08_49", "eval_interval": 1000, "log_interval": 20, "eval_iters": 50, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax0-15m-2023_08_26_00_08_49", "batch_size": 8, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "qint8", "compile": false, "softmax1": false, "softmaxn_param": 1}
|
{softmax0-15m-2023_12_11_10_05_14-70% β softmax0-15m-2023_12_11_10_05_14-100k}/ckpt.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c96f5cdfbacbe392fefc90701ee4a273c55568b8bd226cbaef8ae220c7a37120
|
3 |
+
size 183932925
|
softmax0-15m-2023_12_11_10_05_14-100k/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax0-15m-2023_12_11_10_05_14-100%", "eval_interval": 1000, "log_interval": 20, "eval_iters": 50, "eval_only": true, "always_save_checkpoint": false, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax0-15m-2023_12_11_10_05_14-100%", "batch_size": 8, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "qint8", "compile": false, "softmax1": false, "softmaxn_param": 1}
|
{softmax0-15m-2023_12_11_10_05_14-70% β softmax0-15m-2023_12_11_10_05_14-100k}/model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 60816028
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a567de125ceeb2c3974359ac27f2d7b2e7b201899dfa6211f328db2055f4b7f3
|
3 |
size 60816028
|
softmax0-15m-2023_12_11_10_05_14-70%/config.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"out_dir": "out/softmax0-15m-2023_12_11_10_05_14", "eval_interval": 1000, "log_interval": 1, "eval_iters": 50, "eval_only": false, "always_save_checkpoint": true, "init_from": "scratch", "wandb_log": true, "wandb_project": "softmax1-tinystories", "wandb_run_name": "softmax0-15m-2023_12_11_10_05_14", "batch_size": 72, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cuda", "dtype": "float16", "compile": false, "softmax1": false, "softmaxn_param": 1}
|
|
|
|
softmax0-15m-2023_12_11_10_05_14-70k/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e567620f4a9c199b7f15878557fd04ac212f92c1d4f4bc0a6cf97fe87ca87479
|
3 |
+
size 62360491
|
softmax0-15m-2023_12_11_10_05_14-70k/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax0-15m-2023_12_11_10_05_14-70%", "eval_interval": 1000, "log_interval": 20, "eval_iters": 50, "eval_only": true, "always_save_checkpoint": false, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax0-15m-2023_12_11_10_05_14-70%", "batch_size": 8, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "float16", "compile": false, "softmax1": false, "softmaxn_param": 1}
|
softmax0-15m-2023_12_11_10_05_14-70k/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d39bbf312489db97d3fc723c5904256b4e737ddc039ffc393b494ba8f1fc17f
|
3 |
+
size 60816028
|
softmax1-15m-2023_08_25_11_47_04/config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"out_dir": "out/softmax1-15m-2023_08_25_11_47_04", "eval_interval": 1000, "log_interval":
|
|
|
1 |
+
{"out_dir": "out/softmax1-15m-2023_08_25_11_47_04", "eval_interval": 1000, "log_interval": 20, "eval_iters": 50, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax1-15m-2023_08_25_11_47_04", "batch_size": 32, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "qint8", "compile": false, "softmax1": false, "softmaxn_param": 1}
|
softmax1-15m-2023_12_11_08_53_43-100k/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31cded6211f85a86bc66e8521b94741bb5ba8970ea1d398765eefea0b4d367eb
|
3 |
+
size 183932925
|
softmax1-15m-2023_12_11_08_53_43-100k/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax1-15m-2023_12_11_08_53_43-100%", "eval_interval": 1000, "log_interval": 20, "eval_iters": 50, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax1-15m-2023_12_11_08_53_43-100%", "batch_size": 8, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cpu", "dtype": "qint8", "compile": false, "softmax1": false, "softmaxn_param": 1}
|
softmax1-15m-2023_12_11_08_53_43-100k/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6485438e40d18614eeedfa67e114376d233dcded504c50c3508c344fc7d27e43
|
3 |
+
size 60816028
|
{softmax1-15m-2023_12_11_08_53_43-70% β softmax1-15m-2023_12_11_08_53_43-70k}/ckpt.pt
RENAMED
File without changes
|
{softmax1-15m-2023_12_11_08_53_43-70% β softmax1-15m-2023_12_11_08_53_43-70k}/config.json
RENAMED
File without changes
|
{softmax1-15m-2023_12_11_08_53_43-70% β softmax1-15m-2023_12_11_08_53_43-70k}/model.bin
RENAMED
File without changes
|