Upload folder using huggingface_hub
Browse files- checkpoints/embed_0.pt +3 -0
- checkpoints/embed_1024.pt +3 -0
- checkpoints/embed_1536.pt +3 -0
- checkpoints/embed_2048.pt +3 -0
- checkpoints/embed_512.pt +3 -0
- checkpoints/torso[0].attn_0.pt +3 -0
- checkpoints/torso[0].attn_1024.pt +3 -0
- checkpoints/torso[0].attn_1536.pt +3 -0
- checkpoints/torso[0].attn_2048.pt +3 -0
- checkpoints/torso[0].attn_512.pt +3 -0
- checkpoints/torso[0].res_final_0.pt +3 -0
- checkpoints/torso[0].res_final_1024.pt +3 -0
- checkpoints/torso[0].res_final_1536.pt +3 -0
- checkpoints/torso[0].res_final_2048.pt +3 -0
- checkpoints/torso[0].res_final_512.pt +3 -0
- checkpoints/torso[0].res_mlp_0.pt +3 -0
- checkpoints/torso[0].res_mlp_1024.pt +3 -0
- checkpoints/torso[0].res_mlp_1536.pt +3 -0
- checkpoints/torso[0].res_mlp_2048.pt +3 -0
- checkpoints/torso[0].res_mlp_512.pt +3 -0
- trainer_0/config.json +26 -0
- trainer_0/embed.pt +3 -0
- trainer_0/torso[0].attn.pt +3 -0
- trainer_0/torso[0].res_final.pt +3 -0
- trainer_0/torso[0].res_mlp.pt +3 -0
checkpoints/embed_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78dbb0e1385cb5a48e0a8fec6416a7de71b9513d183816a2aa0d8a171b2ac6c1
|
3 |
+
size 37778320
|
checkpoints/embed_1024.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadd993bd08acdd197960b70c20eb08168991cdb40dda37745885476c7cbf8ab
|
3 |
+
size 37778536
|
checkpoints/embed_1536.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff390cb25b0b38e864a95d03f9abfce3722c94487f72e3417a6429a04689c21c
|
3 |
+
size 37778536
|
checkpoints/embed_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3147be9d26f96b895586026034af610304579c574f72d14db7f01de6d0fe89cd
|
3 |
+
size 37778536
|
checkpoints/embed_512.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21a159bb134341b92fc8277427dd7486d5bd8adc4a858bcaada76b04093137f7
|
3 |
+
size 37778528
|
checkpoints/torso[0].attn_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffc354c3a5802b66bbccd7d7df0772ed0743cbd1cae5c9d84b80f3e2bdfbdc6f
|
3 |
+
size 37778576
|
checkpoints/torso[0].attn_1024.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e1ac7d98b3b7f33e0e782de518da05b8b2ffc38a88714a30c51e321289e209b
|
3 |
+
size 37778600
|
checkpoints/torso[0].attn_1536.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2cc2223784a03c7c7931bc205997eaed5a905d9daf36e789f6271db78504932
|
3 |
+
size 37778600
|
checkpoints/torso[0].attn_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68a74b08694488633789e06f650eb9d403fe31ab878e195c148f88e2339728df
|
3 |
+
size 37778600
|
checkpoints/torso[0].attn_512.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccd5923b1f50ffc97bbf808c3b4b05abb02709f897d0a8fa9acfc65fb8249a9f
|
3 |
+
size 37778592
|
checkpoints/torso[0].res_final_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97c6e916cbcd13b1865aa13704d9d6b6bc63ea69b284fd56b0b1eb2c23936be2
|
3 |
+
size 37778616
|
checkpoints/torso[0].res_final_1024.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d2b3a6f1a646506656d7462119592d55a5846e1e32b8c0f2fb3b3a167730ab3
|
3 |
+
size 37778704
|
checkpoints/torso[0].res_final_1536.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:218017394a6b6384e0a97c473a735923dbc25fcc3b68bde10cedbce24e157105
|
3 |
+
size 37778704
|
checkpoints/torso[0].res_final_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa5b8d6a3b2f4b6b4390c389511ba46827d6e723521b0e7f50a11f1578f38c9a
|
3 |
+
size 37778704
|
checkpoints/torso[0].res_final_512.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36fe68253dfaa1a7619c91637a4c600129b0db33ac01f6c17621ae1a13030c61
|
3 |
+
size 37778696
|
checkpoints/torso[0].res_mlp_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f739e58039152b9ae31520b5c9ed20ab8dcf14095c3150eed62a9fe58d0f5217
|
3 |
+
size 37778600
|
checkpoints/torso[0].res_mlp_1024.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6015a2483beaa3487e9b983dfc9e0ba3d6715f9b7dcb6afe2f3f40a9e700eed
|
3 |
+
size 37778624
|
checkpoints/torso[0].res_mlp_1536.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b2b29c9d660d58ca51b693c94cde11d0331826f944e851137de789d2fc6a247
|
3 |
+
size 37778624
|
checkpoints/torso[0].res_mlp_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f0576b1b2da36f243a7e808887e69231efaed86636065b5217d17b429bf2793
|
3 |
+
size 37778624
|
checkpoints/torso[0].res_mlp_512.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07205336068e1eae08e77626b71aa8527cd6df5fe788991141963302f8b5771a
|
3 |
+
size 37778616
|
trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.00032659863237109043,
|
6 |
+
"steps": 30000,
|
7 |
+
"seed": null,
|
8 |
+
"activation_dim": 768,
|
9 |
+
"dict_size": 6144,
|
10 |
+
"k": 30,
|
11 |
+
"device": "cuda",
|
12 |
+
"layer": 0,
|
13 |
+
"lm_name": "TinyModel_2L_3E",
|
14 |
+
"wandb_name": "AutoEncoderTopK",
|
15 |
+
"submodule_name": null
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 768,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 128,
|
21 |
+
"ctx_len": 256,
|
22 |
+
"refresh_batch_size": 512,
|
23 |
+
"out_batch_size": 1024,
|
24 |
+
"device": "cuda"
|
25 |
+
}
|
26 |
+
}
|
trainer_0/embed.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be740a6797d3219641415c5879b370690ed598cf1ac9525d2e3d7c859cd82e98
|
3 |
+
size 37778240
|
trainer_0/torso[0].attn.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:564152669b865a3b3aa866870f3754423da56bebf9b82be5007e63c755233a88
|
3 |
+
size 37778560
|
trainer_0/torso[0].res_final.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c29b4b9d062f6f5f073ef524944735f80e7109657f030c1ed702db431437591e
|
3 |
+
size 37778600
|
trainer_0/torso[0].res_mlp.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da4ee56f6ffa65286f7809884941c484f31670971ba5a2c0ed17ab077d7c51e9
|
3 |
+
size 37778584
|