eos runs @ 70%
Browse files- quantized/softmax0-15m-2023_08_26_00_08_49.pt +3 -0
- quantized/softmax1-15m-2023_08_25_11_47_04.pt +3 -0
- softmax0-15m-2023_12_11_10_05_14-70%/ckpt.pt +3 -0
- softmax0-15m-2023_12_11_10_05_14-70%/config.json +1 -0
- softmax0-15m-2023_12_11_10_05_14-70%/model.bin +3 -0
- softmax1-15m-2023_12_11_08_53_43-70%/ckpt.pt +3 -0
- softmax1-15m-2023_12_11_08_53_43-70%/config.json +1 -0
- softmax1-15m-2023_12_11_08_53_43-70%/model.bin +3 -0
quantized/softmax0-15m-2023_08_26_00_08_49.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37eab91fc3f052dd5af6d5c68e0623d6ac0af069b17331c9f5e8e0ff61483fe1
|
3 |
+
size 26519969
|
quantized/softmax1-15m-2023_08_25_11_47_04.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d3d6c4d868891de7f17e19ae2a9a75f4145158704fd295e3d5dd3b1e6f64706
|
3 |
+
size 26519969
|
softmax0-15m-2023_12_11_10_05_14-70%/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ab92c37b609d074710a3dd504c962b066fdd632ca142318f3a82c3f26127bb0
|
3 |
+
size 183936376
|
softmax0-15m-2023_12_11_10_05_14-70%/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax0-15m-2023_12_11_10_05_14", "eval_interval": 1000, "log_interval": 1, "eval_iters": 50, "eval_only": false, "always_save_checkpoint": true, "init_from": "scratch", "wandb_log": true, "wandb_project": "softmax1-tinystories", "wandb_run_name": "softmax0-15m-2023_12_11_10_05_14", "batch_size": 72, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cuda", "dtype": "float16", "compile": false, "softmax1": false, "softmaxn_param": 1}
|
softmax0-15m-2023_12_11_10_05_14-70%/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd7ff4f646287d0ba05d654085036eeb1dd3ab5df3faf6645d7784bf66e935cc
|
3 |
+
size 60816028
|
softmax1-15m-2023_12_11_08_53_43-70%/ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:820e839f7bc9f64ad9556735d03072d21480ffca0597923dfe4149ff78aaccfa
|
3 |
+
size 183936376
|
softmax1-15m-2023_12_11_08_53_43-70%/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"out_dir": "out/softmax1-15m-2023_12_11_08_53_43", "eval_interval": 1000, "log_interval": 1, "eval_iters": 50, "eval_only": false, "always_save_checkpoint": true, "init_from": "scratch", "wandb_log": true, "wandb_project": "softmax1-tinystories", "wandb_run_name": "softmax1-15m-2023_12_11_08_53_43", "batch_size": 72, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cuda", "dtype": "float16", "compile": false, "softmax1": true, "softmaxn_param": 1}
|
softmax1-15m-2023_12_11_08_53_43-70%/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d20a21519b532d08c6b68be9e80db3794f45fde724f45cb2693542175169d17
|
3 |
+
size 60816028
|