|
base_model: ./Yosegi-0603 |
|
dtype: bfloat16 |
|
merge_method: breadcrumbs_ties |
|
parameters: |
|
int8_mask: 1.0 |
|
normalize: 0.0 |
|
slices: |
|
- sources: |
|
- layer_range: [0, 2] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.050387850856855765 |
|
- filter: mlp |
|
value: -0.17075015661203768 |
|
- value: -0.008041653902986862 |
|
weight: |
|
- filter: self_attn |
|
value: 0.0999312941470471 |
|
- filter: mlp |
|
value: 0.541727762184749 |
|
- value: 0.6837012779994258 |
|
- layer_range: [0, 2] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8218846237599902 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9254078866667358 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.11213758231875963 |
|
- filter: mlp |
|
value: 0.021586098873668948 |
|
- value: -0.12827998218659437 |
|
weight: |
|
- filter: self_attn |
|
value: 0.40391646444657003 |
|
- filter: mlp |
|
value: 0.623121864641881 |
|
- value: 0.5967833694632534 |
|
- layer_range: [0, 2] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [2, 4] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8079479346300947 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.710146185559419 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.1383609589681566 |
|
- filter: mlp |
|
value: 0.21188532059635062 |
|
- value: 0.2994723556443468 |
|
weight: |
|
- filter: self_attn |
|
value: 0.48107070906079974 |
|
- filter: mlp |
|
value: 0.5848073552919492 |
|
- value: 0.4583842493359253 |
|
- layer_range: [2, 4] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.934378153535579 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.073192612278188 |
|
- filter: mlp |
|
value: 0.07939126555063317 |
|
- value: -0.06891845030175699 |
|
weight: |
|
- filter: self_attn |
|
value: 0.32120386994101 |
|
- filter: mlp |
|
value: 0.5001108459121922 |
|
- value: 0.9138710221666694 |
|
- layer_range: [2, 4] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [4, 6] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.7237519222177541 |
|
- value: 0.776951124863642 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.2265121048274062 |
|
- filter: mlp |
|
value: -0.1757947421960496 |
|
- value: -0.11401593728931929 |
|
weight: |
|
- filter: self_attn |
|
value: 0.6448742737026658 |
|
- filter: mlp |
|
value: 0.13809748641457986 |
|
- value: 0.3950550285769662 |
|
- layer_range: [4, 6] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9649359194114893 |
|
- filter: mlp |
|
value: 0.916637032428399 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.16291684846287688 |
|
- filter: mlp |
|
value: -0.19013548712121703 |
|
- value: 0.038409066391918795 |
|
weight: |
|
- filter: self_attn |
|
value: 0.1977358472772336 |
|
- filter: mlp |
|
value: 0.22661167907612348 |
|
- value: 0.6426575016448257 |
|
- layer_range: [4, 6] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [6, 8] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8727809666891416 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.5160677785559116 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.14245180617134273 |
|
- filter: mlp |
|
value: 0.08189992601998919 |
|
- value: -0.1038827997670827 |
|
weight: |
|
- filter: self_attn |
|
value: 0.23575676914257698 |
|
- filter: mlp |
|
value: 0.4047231670507743 |
|
- value: 0.34207794631274374 |
|
- layer_range: [6, 8] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.576775501046583 |
|
- filter: mlp |
|
value: -0.046028636298718645 |
|
- value: -0.024161321403060265 |
|
weight: |
|
- filter: self_attn |
|
value: 0.833089842843994 |
|
- filter: mlp |
|
value: 0.5434667434613458 |
|
- value: 0.2946693008513797 |
|
- layer_range: [6, 8] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [8, 10] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9930269337531187 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.4549980941970383 |
|
- filter: mlp |
|
value: 0.10362988739411173 |
|
- value: -0.43800391668559174 |
|
weight: |
|
- filter: self_attn |
|
value: 0.19663450954683193 |
|
- filter: mlp |
|
value: 0.16783989984505265 |
|
- value: 0.7465091417598162 |
|
- layer_range: [8, 10] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.797370597380894 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.0665958634205702 |
|
- filter: mlp |
|
value: -0.058297473060129834 |
|
- value: -0.38206760673090134 |
|
weight: |
|
- filter: self_attn |
|
value: 0.7015967347604024 |
|
- filter: mlp |
|
value: 0.7733694864324641 |
|
- value: 0.7636921732342238 |
|
- layer_range: [8, 10] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [10, 12] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8047576867589878 |
|
- filter: mlp |
|
value: 0.8852533319203653 |
|
- value: 0.7707342647603538 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.054343999574509694 |
|
- filter: mlp |
|
value: -0.3465154355167133 |
|
- value: 0.022315854655582765 |
|
weight: |
|
- filter: self_attn |
|
value: 0.4396484757291151 |
|
- filter: mlp |
|
value: 0.34318396468602314 |
|
- value: 0.8236034746664869 |
|
- layer_range: [10, 12] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9058471193805165 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.1221058588826469 |
|
- filter: mlp |
|
value: -0.4004985640890659 |
|
- value: 0.3219195440395816 |
|
weight: |
|
- filter: self_attn |
|
value: 0.3565443612269864 |
|
- filter: mlp |
|
value: 0.2817057075232181 |
|
- value: 0.5934890337808251 |
|
- layer_range: [10, 12] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [12, 14] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.027897116191693133 |
|
- filter: mlp |
|
value: -0.1765379388255607 |
|
- value: 0.09108936063176161 |
|
weight: |
|
- filter: self_attn |
|
value: 0.4499753137521779 |
|
- filter: mlp |
|
value: 0.901296236087911 |
|
- value: 0.3548680126954006 |
|
- layer_range: [12, 14] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8973815150776497 |
|
- filter: mlp |
|
value: 0.6029953465961999 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.10393082898402586 |
|
- filter: mlp |
|
value: 0.15993577688878796 |
|
- value: 0.011410411917833683 |
|
weight: |
|
- filter: self_attn |
|
value: 0.2211644023056492 |
|
- filter: mlp |
|
value: 0.5677387594231849 |
|
- value: 0.1316535663010981 |
|
- layer_range: [12, 14] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [14, 16] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9584597245055072 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.17789727632680347 |
|
- filter: mlp |
|
value: 0.2182263440314275 |
|
- value: 0.1449547656126498 |
|
weight: |
|
- filter: self_attn |
|
value: 0.4551004762874224 |
|
- filter: mlp |
|
value: 0.9182082826762857 |
|
- value: 0.3736989395186422 |
|
- layer_range: [14, 16] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7414465107848625 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.7894887419395906 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.07343933395880992 |
|
- filter: mlp |
|
value: 0.250800731630588 |
|
- value: -0.2948778134297542 |
|
weight: |
|
- filter: self_attn |
|
value: 0.43125199001016495 |
|
- filter: mlp |
|
value: 0.6182726353394477 |
|
- value: 0.838902157446268 |
|
- layer_range: [14, 16] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [16, 18] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9474287877268394 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9613380133344519 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.08608895546593046 |
|
- filter: mlp |
|
value: -0.07275416053291164 |
|
- value: -0.5796137860399382 |
|
weight: |
|
- filter: self_attn |
|
value: 0.5593420897751296 |
|
- filter: mlp |
|
value: 0.7339447992880666 |
|
- value: 0.5447558586689005 |
|
- layer_range: [16, 18] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9321536960575384 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9613033408813294 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.20610728738224296 |
|
- filter: mlp |
|
value: 0.2002206706624053 |
|
- value: -0.45349278793293785 |
|
weight: |
|
- filter: self_attn |
|
value: 0.16162975594196963 |
|
- filter: mlp |
|
value: 0.21262726992327483 |
|
- value: 0.061213622827234075 |
|
- layer_range: [16, 18] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [18, 20] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.03922456593148313 |
|
- filter: mlp |
|
value: 0.3318035822806869 |
|
- value: -0.10373990685028205 |
|
weight: |
|
- filter: self_attn |
|
value: 0.8254441016674987 |
|
- filter: mlp |
|
value: 0.4568039342431161 |
|
- value: 0.3152648515747969 |
|
- layer_range: [18, 20] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9807358937293073 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.22734036563128657 |
|
- filter: mlp |
|
value: 0.26113222150270854 |
|
- value: 0.17739039022957015 |
|
weight: |
|
- filter: self_attn |
|
value: 0.33759130475641996 |
|
- filter: mlp |
|
value: 0.616639215544168 |
|
- value: 0.47560658618977714 |
|
- layer_range: [18, 20] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [20, 22] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9394514442960196 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9885037757465567 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.17365709450334324 |
|
- filter: mlp |
|
value: 0.0712279381144505 |
|
- value: 0.11809665485306464 |
|
weight: |
|
- filter: self_attn |
|
value: 0.485610337254665 |
|
- filter: mlp |
|
value: 0.8406593173801935 |
|
- value: 0.5024102481819739 |
|
- layer_range: [20, 22] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.09980202641768818 |
|
- filter: mlp |
|
value: 0.051454493742856926 |
|
- value: 0.14619126408666103 |
|
weight: |
|
- filter: self_attn |
|
value: 0.54772456079406 |
|
- filter: mlp |
|
value: 0.3440893571099615 |
|
- value: 0.3747271233512448 |
|
- layer_range: [20, 22] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [22, 24] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9474712362889293 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.16020032978118146 |
|
- filter: mlp |
|
value: -0.025085248873309034 |
|
- value: 0.06046174910893976 |
|
weight: |
|
- filter: self_attn |
|
value: 0.8654189362345427 |
|
- filter: mlp |
|
value: 0.6344956382288498 |
|
- value: 0.6383979001549549 |
|
- layer_range: [22, 24] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8240762427167851 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 0.9004913821398048 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.12224186789525764 |
|
- filter: mlp |
|
value: -0.25877585460700525 |
|
- value: 0.35149388360871714 |
|
weight: |
|
- filter: self_attn |
|
value: 0.4294356408713786 |
|
- filter: mlp |
|
value: 0.3920647298630233 |
|
- value: 0.795891295390721 |
|
- layer_range: [22, 24] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [24, 26] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.16915580088030202 |
|
- filter: mlp |
|
value: 0.2602652727555053 |
|
- value: 0.16985672723305376 |
|
weight: |
|
- filter: self_attn |
|
value: 0.420377024485687 |
|
- filter: mlp |
|
value: 0.3401141209432324 |
|
- value: 0.4953511256159331 |
|
- layer_range: [24, 26] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7290652609253236 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.1039167464696765 |
|
- filter: mlp |
|
value: -0.18476572570059685 |
|
- value: 0.1221387313921081 |
|
weight: |
|
- filter: self_attn |
|
value: 0.2925002157134928 |
|
- filter: mlp |
|
value: 0.3854740639588027 |
|
- value: 0.555448110317977 |
|
- layer_range: [24, 26] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [26, 28] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 1.0 |
|
- filter: mlp |
|
value: 0.9104496350690235 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.24831264214235005 |
|
- filter: mlp |
|
value: -0.03903149241855605 |
|
- value: 0.14189425093398259 |
|
weight: |
|
- filter: self_attn |
|
value: 0.7685811138035815 |
|
- filter: mlp |
|
value: 0.06535011571274918 |
|
- value: 0.696502559577317 |
|
- layer_range: [26, 28] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.9236218028490522 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.2451400735890047 |
|
- filter: mlp |
|
value: -0.21555851418482214 |
|
- value: 0.020418471695148876 |
|
weight: |
|
- filter: self_attn |
|
value: 0.451368534421561 |
|
- filter: mlp |
|
value: 0.27412879847687055 |
|
- value: 0.18339776770537336 |
|
- layer_range: [26, 28] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [28, 30] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8590812961904566 |
|
- filter: mlp |
|
value: 1.0 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.06934549536310654 |
|
- filter: mlp |
|
value: -0.28464693250998063 |
|
- value: -0.0588491947891552 |
|
weight: |
|
- filter: self_attn |
|
value: 0.26716389671655294 |
|
- filter: mlp |
|
value: 0.8228280162386532 |
|
- value: 0.24197568479527135 |
|
- layer_range: [28, 30] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.7277181780542642 |
|
- filter: mlp |
|
value: 0.74166025738732 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: 0.1772650150670655 |
|
- filter: mlp |
|
value: 0.06545031487123437 |
|
- value: -0.28681451125993446 |
|
weight: |
|
- filter: self_attn |
|
value: 0.5781944040541174 |
|
- filter: mlp |
|
value: 0.2288692970435767 |
|
- value: 0.689751088930503 |
|
- layer_range: [28, 30] |
|
model: ./Yosegi-0603 |
|
- sources: |
|
- layer_range: [30, 32] |
|
model: ./Yosegi-0601 |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8177341862620365 |
|
- filter: mlp |
|
value: 0.8875629677599377 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.06572527259889459 |
|
- filter: mlp |
|
value: -0.18979543285938766 |
|
- value: -0.24122036571646263 |
|
weight: |
|
- filter: self_attn |
|
value: 0.5818433594657613 |
|
- filter: mlp |
|
value: 0.36676821100234736 |
|
- value: 0.3580688869263428 |
|
- layer_range: [30, 32] |
|
model: ./Ninja-2B_JP |
|
parameters: |
|
density: |
|
- filter: self_attn |
|
value: 0.8306036003344672 |
|
- filter: mlp |
|
value: 0.6993970248745297 |
|
- value: 1.0 |
|
gamma: |
|
- filter: self_attn |
|
value: -0.20599853236581384 |
|
- filter: mlp |
|
value: -0.2001187634455465 |
|
- value: -0.07654635090020837 |
|
weight: |
|
- filter: self_attn |
|
value: 0.37120677279712305 |
|
- filter: mlp |
|
value: 0.13105486609905853 |
|
- value: 0.7204857820148367 |
|
- layer_range: [30, 32] |
|
model: ./Yosegi-0603 |
|
tokenizer_source: union |