hyx21 commited on
Commit
c1ee8f5
1 Parent(s): bf31b5d

Upload 148 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +52 -0
  2. moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight +0 -0
  3. moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl +3 -0
  4. moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  5. moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight +0 -0
  6. moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl +3 -0
  7. moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  8. moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight +0 -0
  9. moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl +3 -0
  10. moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  11. moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight +0 -0
  12. moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl +3 -0
  13. moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  14. moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight +0 -0
  15. moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl +3 -0
  16. moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  17. moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight +0 -0
  18. moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl +3 -0
  19. moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  20. moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight +0 -0
  21. moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl +3 -0
  22. moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  23. moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight +0 -0
  24. moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl +3 -0
  25. moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  26. moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight +0 -0
  27. moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl +3 -0
  28. moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  29. moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight +0 -0
  30. moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl +3 -0
  31. moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  32. moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight +0 -0
  33. moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl +3 -0
  34. moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  35. moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight +0 -0
  36. moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl +3 -0
  37. moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  38. moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight +0 -0
  39. moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl +3 -0
  40. moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  41. moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight +0 -0
  42. moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl +3 -0
  43. moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  44. moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight +0 -0
  45. moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl +3 -0
  46. moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  47. moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight +0 -0
  48. moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl +3 -0
  49. moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl_acc +1 -0
  50. moe-3b-qdp/param_split/decoder.layers.23.ffn.ffn.w_in.w.weight +0 -0
.gitattributes CHANGED
@@ -33,3 +33,55 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
37
+ moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
38
+ moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
39
+ moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
40
+ moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
41
+ moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
42
+ moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
43
+ moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
44
+ moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
45
+ moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
46
+ moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
47
+ moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
48
+ moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
49
+ moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
50
+ moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
51
+ moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
52
+ moe-3b-qdp/param_split/decoder.layers.23.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
53
+ moe-3b-qdp/param_split/decoder.layers.3.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
54
+ moe-3b-qdp/param_split/decoder.layers.4.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
55
+ moe-3b-qdp/param_split/decoder.layers.5.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
56
+ moe-3b-qdp/param_split/decoder.layers.6.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
57
+ moe-3b-qdp/param_split/decoder.layers.7.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
58
+ moe-3b-qdp/param_split/decoder.layers.8.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
59
+ moe-3b-qdp/param_split/decoder.layers.9.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
60
+ moe-3b-qdp/param_split/encoder.layers.0.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
61
+ moe-3b-qdp/param_split/encoder.layers.1.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
62
+ moe-3b-qdp/param_split/encoder.layers.10.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
63
+ moe-3b-qdp/param_split/encoder.layers.11.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
64
+ moe-3b-qdp/param_split/encoder.layers.12.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
65
+ moe-3b-qdp/param_split/encoder.layers.13.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
66
+ moe-3b-qdp/param_split/encoder.layers.14.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
67
+ moe-3b-qdp/param_split/encoder.layers.15.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
68
+ moe-3b-qdp/param_split/encoder.layers.16.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
69
+ moe-3b-qdp/param_split/encoder.layers.17.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
70
+ moe-3b-qdp/param_split/encoder.layers.18.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
71
+ moe-3b-qdp/param_split/encoder.layers.19.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
72
+ moe-3b-qdp/param_split/encoder.layers.2.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
73
+ moe-3b-qdp/param_split/encoder.layers.20.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
74
+ moe-3b-qdp/param_split/encoder.layers.21.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
75
+ moe-3b-qdp/param_split/encoder.layers.22.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
76
+ moe-3b-qdp/param_split/encoder.layers.23.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
77
+ moe-3b-qdp/param_split/encoder.layers.3.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
78
+ moe-3b-qdp/param_split/encoder.layers.4.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
79
+ moe-3b-qdp/param_split/encoder.layers.5.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
80
+ moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
81
+ moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl_11 filter=lfs diff=lfs merge=lfs -text
82
+ moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl_17 filter=lfs diff=lfs merge=lfs -text
83
+ moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl_6 filter=lfs diff=lfs merge=lfs -text
84
+ moe-3b-qdp/param_split/encoder.layers.6.ffn.ffn.w_in.w.weight_input_compl_9 filter=lfs diff=lfs merge=lfs -text
85
+ moe-3b-qdp/param_split/encoder.layers.7.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
86
+ moe-3b-qdp/param_split/encoder.layers.8.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
87
+ moe-3b-qdp/param_split/encoder.layers.9.ffn.ffn.w_in.w.weight_input_compl filter=lfs diff=lfs merge=lfs -text
moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb85d40c345f9a9af446393b4f586bf2ce65fa452d9dcc4c87d3f8edb5524dbe
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.0.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7182807573412037
moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20bed1e67c3ee1fe4f7cc7ef473860255aa95583f509134bbcc77f4fb0cdf063
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.1.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7568720467744474
moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cfc61d7231941624afb21ec3a335a567f6354bd3baaf614d859829fe7d5cbaa
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.10.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.748058017777638
moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3068e0abe0fe9d83bd07bbc7064606e6032ddb5a4902e3421066a87fb0011513
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.11.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7405886863413804
moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2af635c227579f92a27d37848a14c1b05b6d94db24a3f2b5626adfa1cd07a6d
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.12.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7240940497555308
moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e77bab9ec7867aa7368ba578158e213993b9db288b38b91ec92df78e528cd53
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.13.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7124260153281037
moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7df88eb6c2c54408411dde7b833169ab32bff6afcbdca26472cf1c504f5d3d
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.14.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7323668953459915
moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f16e465cd271ee72ce95d1e21ea1c32ef65b5296ff65557e4d0a029be41a4a93
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.15.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7322201105149965
moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bfbfb4bfc22ca4587d1ac4e78e53b71cac7352de8c03d30a87165e5c2b2a8e2
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.16.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.731212756898174
moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1ddb9e1cafccbf2b8eab8b81558f59126005230c99253426e14aca5f72744d
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.17.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7087740436002287
moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a28c5d6ec9002cf88f6795fbaee9c77721eeae13aaf3abf283443db176c62c8f
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.18.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7103819609011953
moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91801389212fdef862996cb249248b4f3b01e407d3effcb99ff954ce22a1c548
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.19.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.6948271535597236
moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4297fe8c193d47f269e3d0e5cdb69163defd1a14c5503f8c773e93ef179d7a1d
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.2.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7546966849540423
moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fa933e0a510a761361b52ad631b867692ca6159b17d3fad94b3791ddf2fc87d
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.20.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.7048745886978236
moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d918044c9d0aff83da39d103a70ee4623c495a54f41ff6c537b1d2476b0b9fe
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.21.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.6884838024893459
moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file
 
moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed3070812d08c2f1b0a60bd5e6caf40010e6fd11485ad450e30b9ddbae727a8b
3
+ size 3147751
moe-3b-qdp/param_split/decoder.layers.22.ffn.ffn.w_in.w.weight_input_compl_acc ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.695699293593393
moe-3b-qdp/param_split/decoder.layers.23.ffn.ffn.w_in.w.weight ADDED
Binary file (762 kB). View file