photonmz
/

softmax1-tinystories

Model card Files Files and versions Community

photonmz commited on Dec 12, 2023

Commit

632848c

1 Parent(s): d7d6155

eos runs @ 70%

Browse files

Files changed (8) hide show

quantized/softmax0-15m-2023_08_26_00_08_49.pt +3 -0
quantized/softmax1-15m-2023_08_25_11_47_04.pt +3 -0
softmax0-15m-2023_12_11_10_05_14-70%/ckpt.pt +3 -0
softmax0-15m-2023_12_11_10_05_14-70%/config.json +1 -0
softmax0-15m-2023_12_11_10_05_14-70%/model.bin +3 -0
softmax1-15m-2023_12_11_08_53_43-70%/ckpt.pt +3 -0
softmax1-15m-2023_12_11_08_53_43-70%/config.json +1 -0
softmax1-15m-2023_12_11_08_53_43-70%/model.bin +3 -0

quantized/softmax0-15m-2023_08_26_00_08_49.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37eab91fc3f052dd5af6d5c68e0623d6ac0af069b17331c9f5e8e0ff61483fe1
+size 26519969

quantized/softmax1-15m-2023_08_25_11_47_04.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d3d6c4d868891de7f17e19ae2a9a75f4145158704fd295e3d5dd3b1e6f64706
+size 26519969

softmax0-15m-2023_12_11_10_05_14-70%/ckpt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ab92c37b609d074710a3dd504c962b066fdd632ca142318f3a82c3f26127bb0
+size 183936376

softmax0-15m-2023_12_11_10_05_14-70%/config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"out_dir": "out/softmax0-15m-2023_12_11_10_05_14", "eval_interval": 1000, "log_interval": 1, "eval_iters": 50, "eval_only": false, "always_save_checkpoint": true, "init_from": "scratch", "wandb_log": true, "wandb_project": "softmax1-tinystories", "wandb_run_name": "softmax0-15m-2023_12_11_10_05_14", "batch_size": 72, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cuda", "dtype": "float16", "compile": false, "softmax1": false, "softmaxn_param": 1}

softmax0-15m-2023_12_11_10_05_14-70%/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd7ff4f646287d0ba05d654085036eeb1dd3ab5df3faf6645d7784bf66e935cc
+size 60816028

softmax1-15m-2023_12_11_08_53_43-70%/ckpt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:820e839f7bc9f64ad9556735d03072d21480ffca0597923dfe4149ff78aaccfa
+size 183936376

softmax1-15m-2023_12_11_08_53_43-70%/config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"out_dir": "out/softmax1-15m-2023_12_11_08_53_43", "eval_interval": 1000, "log_interval": 1, "eval_iters": 50, "eval_only": false, "always_save_checkpoint": true, "init_from": "scratch", "wandb_log": true, "wandb_project": "softmax1-tinystories", "wandb_run_name": "softmax1-15m-2023_12_11_08_53_43", "batch_size": 72, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "cuda", "dtype": "float16", "compile": false, "softmax1": true, "softmaxn_param": 1}

softmax1-15m-2023_12_11_08_53_43-70%/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d20a21519b532d08c6b68be9e80db3794f45fde724f45cb2693542175169d17
+size 60816028