Upload 148 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +52 -0
- moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight +0 -0
- moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl +3 -0
- moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
- moe-3b-qdp/param_split/decoder.layers.23.ffn.ffn.w_in.w.weight +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,55 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
38 |
+
moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
39 |
+
moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
40 |
+
moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
41 |
+
moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
42 |
+
moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
43 |
+
moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
44 |
+
moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
45 |
+
moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
46 |
+
moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
47 |
+
moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
48 |
+
moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
49 |
+
moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
50 |
+
moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
51 |
+
moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
52 |
+
moe-3b-qdp/param_split/decoder.layers.23.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
53 |
+
moe-3b-qdp/param_split/decoder.layers.3.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
54 |
+
moe-3b-qdp/param_split/decoder.layers.4.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
55 |
+
moe-3b-qdp/param_split/decoder.layers.5.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
56 |
+
moe-3b-qdp/param_split/decoder.layers.6.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
57 |
+
moe-3b-qdp/param_split/decoder.layers.7.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
58 |
+
moe-3b-qdp/param_split/decoder.layers.8.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
59 |
+
moe-3b-qdp/param_split/decoder.layers.9.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
60 |
+
moe-3b-qdp/param_split/encoder.layers.0.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
61 |
+
moe-3b-qdp/param_split/encoder.layers.1.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
62 |
+
moe-3b-qdp/param_split/encoder.layers.10.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
63 |
+
moe-3b-qdp/param_split/encoder.layers.11.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
64 |
+
moe-3b-qdp/param_split/encoder.layers.12.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
65 |
+
moe-3b-qdp/param_split/encoder.layers.13.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
66 |
+
moe-3b-qdp/param_split/encoder.layers.14.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
67 |
+
moe-3b-qdp/param_split/encoder.layers.15.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
68 |
+
moe-3b-qdp/param_split/encoder.layers.16.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
69 |
+
moe-3b-qdp/param_split/encoder.layers.17.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
70 |
+
moe-3b-qdp/param_split/encoder.layers.18.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
71 |
+
moe-3b-qdp/param_split/encoder.layers.19.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
72 |
+
moe-3b-qdp/param_split/encoder.layers.2.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
73 |
+
moe-3b-qdp/param_split/encoder.layers.20.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
74 |
+
moe-3b-qdp/param_split/encoder.layers.21.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
75 |
+
moe-3b-qdp/param_split/encoder.layers.22.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
76 |
+
moe-3b-qdp/param_split/encoder.layers.23.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
77 |
+
moe-3b-qdp/param_split/encoder.layers.3.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
78 |
+
moe-3b-qdp/param_split/encoder.layers.4.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
79 |
+
moe-3b-qdp/param_split/encoder.layers.5.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
80 |
+
moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
81 |
+
moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl_11 filter=lfs diff=lfs merge=lfs -text
|
82 |
+
moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl_17 filter=lfs diff=lfs merge=lfs -text
|
83 |
+
moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl_6 filter=lfs diff=lfs merge=lfs -text
|
84 |
+
moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl_9 filter=lfs diff=lfs merge=lfs -text
|
85 |
+
moe-3b-qdp/param_split/encoder.layers.7.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
86 |
+
moe-3b-qdp/param_split/encoder.layers.8.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
87 |
+
moe-3b-qdp/param_split/encoder.layers.9.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
|
moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb85d40c345f9a9af446393b4f586bf2ce65fa452d9dcc4c87d3f8edb5524dbe
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7182807573412037
|
moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20bed1e67c3ee1fe4f7cc7ef473860255aa95583f509134bbcc77f4fb0cdf063
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7568720467744474
|
moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cfc61d7231941624afb21ec3a335a567f6354bd3baaf614d859829fe7d5cbaa
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.748058017777638
|
moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3068e0abe0fe9d83bd07bbc7064606e6032ddb5a4902e3421066a87fb0011513
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7405886863413804
|
moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2af635c227579f92a27d37848a14c1b05b6d94db24a3f2b5626adfa1cd07a6d
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7240940497555308
|
moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e77bab9ec7867aa7368ba578158e213993b9db288b38b91ec92df78e528cd53
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7124260153281037
|
moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d7df88eb6c2c54408411dde7b833169ab32bff6afcbdca26472cf1c504f5d3d
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7323668953459915
|
moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f16e465cd271ee72ce95d1e21ea1c32ef65b5296ff65557e4d0a029be41a4a93
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7322201105149965
|
moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bfbfb4bfc22ca4587d1ac4e78e53b71cac7352de8c03d30a87165e5c2b2a8e2
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.731212756898174
|
moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a1ddb9e1cafccbf2b8eab8b81558f59126005230c99253426e14aca5f72744d
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7087740436002287
|
moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a28c5d6ec9002cf88f6795fbaee9c77721eeae13aaf3abf283443db176c62c8f
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7103819609011953
|
moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91801389212fdef862996cb249248b4f3b01e407d3effcb99ff954ce22a1c548
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.6948271535597236
|
moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4297fe8c193d47f269e3d0e5cdb69163defd1a14c5503f8c773e93ef179d7a1d
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7546966849540423
|
moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fa933e0a510a761361b52ad631b867692ca6159b17d3fad94b3791ddf2fc87d
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7048745886978236
|
moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d918044c9d0aff83da39d103a70ee4623c495a54f41ff6c537b1d2476b0b9fe
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.6884838024893459
|
moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|
moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed3070812d08c2f1b0a60bd5e6caf40010e6fd11485ad450e30b9ddbae727a8b
|
3 |
+
size 3147751
|
moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl_acc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.695699293593393
|
moe-3b-qdp/param_split/decoder.layers.23.ffn.ffn.w_in.w.weight
ADDED
Binary file (762 kB). View file
|
|