ggml_debug: inp_embd = (f32) GET_ROWS(token_embd.weight{6144, 100352, 1, 1}, inp_tokens{3, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0072, 3.9928, 7.9928, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: norm-0 = (f32) NORM(CUDA0#inp_embd#0{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.4884, 3.5116, 7.5116, ...], [24575.5117, 24579.5117, 24583.5117, ...], [49151.5117, 49155.5117, 49159.5117, ...], ], ] sum = 221215.609375 ggml_debug: attn_norm-0 = (f32) MUL(norm-0{6144, 3, 1, 1}, blk.0.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: wqkv-0 = (f32) MUL_MAT(blk.0.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-0{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.1989, 4.1989, 8.1989, ...], [32768.1992, 32772.1992, 32776.1992, ...], [65536.1953, 65540.1953, 65544.1953, ...], ], ] sum = 294949.781250 ggml_debug: wqkv_clamped-0 = (f32) CLAMP(wqkv-0{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.1989, 4.1989, 8.1989, ...], [32768.1992, 32772.1992, 32776.1992, ...], [65536.1953, 65540.1953, 65544.1953, ...], ], ] sum = 294949.781250 ggml_debug: wqkv_clamped-0 (view) = (f32) VIEW(wqkv_clamped-0{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1989, 4.1989, 8.1989, ...], [32768.1992, 32772.1992, 32776.1992, ...], [65536.1953, 65540.1953, 65544.1953, ...], ], ] sum = 294949.781250 ggml_debug: Qcur-0 = (f32) CONT(wqkv_clamped-0 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1989, 4.1989, 8.1989, ...], [24576.1992, 24580.1992, 24584.1992, ...], [49152.1992, 49156.1992, 49160.1992, ...], ], ] sum = 221221.796875 ggml_debug: Qcur-0 (reshaped) = (f32) RESHAPE(Qcur-0{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.1989, 4.1989, 8.1989, ...], [512.1989, 516.1989, 520.1989, ...], [1024.1989, 1028.1989, 1032.1989, ...], ... ], [ [24576.1992, 24580.1992, 24584.1992, ...], [25088.1992, 25092.1992, 25096.1992, ...], [25600.1992, 25604.1992, 25608.1992, ...], ... ], [ [49152.1992, 49156.1992, 49160.1992, ...], [49664.1992, 49668.1992, 49672.1992, ...], [50176.1992, 50180.1992, 50184.1992, ...], ... ], ] sum = 677489.312500 ggml_debug: Qcur-0 = (f32) ROPE(Qcur-0 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.1989, 4.1989, 8.1989, ...], [512.1989, 516.1989, 520.1989, ...], [1024.1989, 1028.1989, 1032.1989, ...], ... ], [ [24576.1992, 24580.1992, 24584.1992, ...], [25088.1992, 25092.1992, 25096.1992, ...], [25600.1992, 25604.1992, 25608.1992, ...], ... ], [ [49152.1992, 49156.1992, 49160.1992, ...], [49664.1992, 49668.1992, 49672.1992, ...], [50176.1992, 50180.1992, 50184.1992, ...], ... ], ] sum = 677489.312500 ggml_debug: wqkv_clamped-0 (view) = (f32) VIEW(wqkv_clamped-0{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.3524, 5.3524, 9.3524, ...], [32769.3516, 32773.3516, 32777.3516, ...], [65537.3516, 65541.3516, 65545.3516, ...], ], ] sum = 294960.156250 ggml_debug: Kcur-0 = (f32) CONT(wqkv_clamped-0 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.3524, 5.3524, 9.3524, ...], [4097.3525, 4101.3525, 4105.3525, ...], [8193.3525, 8197.3525, 8201.3525, ...], ], ] sum = 36912.171875 ggml_debug: Kcur-0 (reshaped) = (f32) RESHAPE(Kcur-0{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 1.3524, 5.3524, 9.3524, ...], [513.3524, 517.3524, 521.3524, ...], [1025.3524, 1029.3524, 1033.3524, ...], ... ], [ [4097.3525, 4101.3525, 4105.3525, ...], [4609.3525, 4613.3525, 4617.3525, ...], [5121.3525, 5125.3525, 5129.3525, ...], ... ], [ [8193.3525, 8197.3525, 8201.3525, ...], [8705.3525, 8709.3525, 8713.3525, ...], [9217.3525, 9221.3525, 9225.3525, ...], ... ], ] sum = 124560.507812 ggml_debug: Kcur-0 = (f32) ROPE(Kcur-0 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 1.3524, 5.3524, 9.3524, ...], [513.3524, 517.3524, 521.3524, ...], [1025.3524, 1029.3524, 1033.3524, ...], ... ], [ [4097.3525, 4101.3525, 4105.3525, ...], [4609.3525, 4613.3525, 4617.3525, ...], [5121.3525, 5125.3525, 5129.3525, ...], ... ], [ [8193.3525, 8197.3525, 8201.3525, ...], [8705.3525, 8709.3525, 8713.3525, ...], [9217.3525, 9221.3525, 9225.3525, ...], ... ], ] sum = 124560.507812 ggml_debug: wqkv_clamped-0 (view) = (f32) VIEW(wqkv_clamped-0{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0020, 3.9980, 7.9980, ...], [32767.9980, 32771.9961, 32775.9961, ...], [65535.9961, 65540.0000, 65544.0000, ...], ], ] sum = 294948.000000 ggml_debug: Vcur-0 = (f32) CONT(wqkv_clamped-0 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0020, 3.9980, 7.9980, ...], [4095.9980, 4099.9980, 4103.9980, ...], [8191.9980, 8195.9980, 8199.9980, ...], ], ] sum = 36899.984375 ggml_debug: k_cache_view-0 = (f16) VIEW(cache_k_l0{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-0 (copy of Kcur-0) = (f16) CPY(Kcur-0{128, 8, 3, 1}, k_cache_view-0{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 1.3525, 1.3545, 1.3564, ...], ], ] sum = 4.063477 ggml_debug: v_cur_t-0 = (f32) TRANSPOSE(Vcur-0{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0020, 4095.9980, 8191.9980], [ 3.9980, 4099.9980, 8195.9980], [ 7.9980, 4103.9980, 8199.9980], ... ], ] sum = 36899.984375 ggml_debug: v_cache_view-0 = (f16) VIEW(cache_v_l0{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-0 (copy of v_cur_t-0) = (f16) CPY(v_cur_t-0{3, 1024, 1, 1}, v_cache_view-0{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0020, -0.0020, -0.0020], [ -0.0041, -0.0041, -0.0041], [ -0.0082, -0.0082, -0.0082], ... ], ] sum = -0.042938 ggml_debug: v-0 = (f16) VIEW(cache_v_l0{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0020, -0.0020, -0.0020, ...], [ -0.0041, -0.0041, -0.0041, ...], [ -0.0082, -0.0082, -0.0082, ...], ... ], [ [ -0.0020, -0.0020, -0.0020, ...], [ -0.0041, -0.0041, -0.0041, ...], [ -0.0082, -0.0082, -0.0082, ...], ... ], [ [ -0.0020, -0.0020, -0.0020, ...], [ -0.0041, -0.0041, -0.0041, ...], [ -0.0082, -0.0082, -0.0082, ...], ... ], ... ] sum = -0.128815 ggml_debug: k-0 = (f16) VIEW(cache_k_l0{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 1.3525, 1.3545, 1.3564, ...], [ 5.4102, 5.4180, 5.4258, ...], [ 21.6406, 21.6719, 21.7031, ...], ... ], [ [ 1.6025, 1.6045, 1.6064, ...], [ 6.4102, 6.4180, 6.4258, ...], [ 25.6406, 25.6719, 25.7031, ...], ... ], [ [ 1.8525, 1.8545, 1.8564, ...], [ 7.4102, 7.4180, 7.4258, ...], [ 29.6406, 29.6719, 29.7031, ...], ... ], ... ] sum = 303.249023 ggml_debug: q-0 = (f32) PERMUTE(Qcur-0{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.1989, 4.1989, 8.1989, ...], [24576.1992, 24580.1992, 24584.1992, ...], [49152.1992, 49156.1992, 49160.1992, ...], ], [ [512.1989, 516.1989, 520.1989, ...], [25088.1992, 25092.1992, 25096.1992, ...], [49664.1992, 49668.1992, 49672.1992, ...], ], [ [1024.1989, 1028.1989, 1032.1989, ...], [25600.1992, 25604.1992, 25608.1992, ...], [50176.1992, 50180.1992, 50184.1992, ...], ], ... ] sum = 677489.250000 ggml_debug: kq-0 = (f32) MUL_MAT(k-0{128, 32, 8, 1}, q-0{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 1.6367, 5.6367, 9.6367, ...], [129.6367, 133.6367, 137.6367, ...], [257.6367, 261.6367, 265.6367, ...], ], [ [385.6367, 389.6367, 393.6367, ...], [513.6367, 517.6367, 521.6367, ...], [641.6367, 645.6367, 649.6367, ...], ], [ [769.6367, 773.6367, 777.6367, ...], [897.6367, 901.6367, 905.6367, ...], [1025.6367, 1029.6367, 1033.6367, ...], ], ... ] sum = 13976.191406 ggml_debug: kq_soft_max_ext-0 = (f32) SOFT_MAX(kq-0{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-0 = (f32) MUL_MAT(v-0{32, 128, 8, 1}, kq_soft_max_ext-0{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0020, 3.9980, 7.9980, ...], [511.9980, 515.9980, 519.9980, ...], [1023.9980, 1027.9979, 1031.9979, ...], ], [ [1535.9979, 1539.9979, 1543.9979, ...], [2047.9979, 2051.9980, 2055.9980, ...], [2559.9980, 2563.9980, 2567.9980, ...], ], [ [3071.9980, 3075.9980, 3079.9980, ...], [3583.9980, 3587.9980, 3591.9980, ...], [4095.9980, 4099.9980, 4103.9980, ...], ], ... ] sum = 55403.953125 ggml_debug: kqv_merged-0 = (f32) PERMUTE(kqv-0{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0020, 3.9980, 7.9980, ...], [1535.9979, 1539.9979, 1543.9979, ...], [3071.9980, 3075.9980, 3079.9980, ...], ... ], [ [511.9980, 515.9980, 519.9980, ...], [2047.9979, 2051.9980, 2055.9980, ...], [3583.9980, 3587.9980, 3591.9980, ...], ... ], [ [1023.9980, 1027.9979, 1031.9979, ...], [2559.9980, 2563.9980, 2567.9980, ...], [4095.9980, 4099.9980, 4103.9980, ...], ... ], ] sum = 55403.953125 ggml_debug: kqv_merged_cont-0 = (f32) CONT(kqv_merged-0{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0020, 3.9980, 7.9980, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.984375 ggml_debug: kqv_out-0 = (f32) MUL_MAT(blk.0.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0102, 3.9898, 7.9898, ...], [24575.9902, 24579.9902, 24583.9902, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: norm-0 = (f32) NORM(kqv_out-0{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.9815, 3.0185, 7.0185, ...], [24575.0176, 24579.0176, 24583.0176, ...], [49151.0195, 49155.0195, 49159.0195, ...], ], ] sum = 221211.156250 ggml_debug: attn_out_norm-0 = (f32) MUL(norm-0{6144, 3, 1, 1}, blk.0.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0289, 3.9711, 7.9711, ...], [24575.9707, 24579.9707, 24583.9707, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.734375 ggml_debug: ffn_moe_logits-0 = (f32) MUL_MAT(blk.0.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.4526, 3.5474, 7.5474, ...], [ 63.5474, 67.5474, 71.5474, ...], [127.5474, 131.5474, 135.5474, ...], ], ] sum = 607.926270 ggml_debug: ffn_moe_probs-0 = (f32) SOFT_MAX(ffn_moe_logits-0{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0383, 4.0383, 8.0383, ...], [ 64.0383, 68.0383, 72.0383, ...], [128.0383, 132.0383, 136.0383, ...], ], ] sum = 612.344360 ggml_debug: ffn_moe_argsort-0 = (i32) ARGSORT(ffn_moe_probs-0{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 9.0000, 13.0000, 17.0000, ...], [ 73.0000, 77.0000, 81.0000, ...], [137.0000, 141.0000, 145.0000, ...], ], ] sum = 693.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-0{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 9.0000, 13.0000, 17.0000, ...], [ 73.0000, 77.0000, 81.0000, ...], [137.0000, 141.0000, 145.0000, ...], ], ] sum = 693.000000 ggml_debug: ffn_moe_up-0 = (f32) MUL_MAT_ID(blk.0.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1626, 4.1626, 8.1626, ...], [43008.1641, 43012.1641, 43016.1641, ...], [86016.1641, 86020.1641, 86024.1641, ...], ], ] sum = 387109.468750 ggml_debug: ffn_moe_gate-0 = (f32) MUL_MAT_ID(blk.0.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1012, 3.8988, 7.8988, ...], [43007.8984, 43011.8984, 43015.8984, ...], [86015.8984, 86019.8984, 86023.8984, ...], ], ] sum = 387107.093750 ggml_debug: ffn_moe_silu-0 = (f32) UNARY(ffn_moe_gate-0{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0481, 3.9519, 7.9519, ...], [43007.9531, 43011.9531, 43015.9531, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_gate_par-0 = (f32) MUL(ffn_moe_up-0{10752, 3, 1, 1}, ffn_moe_silu-0{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0078, 3.9922, 7.9922, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-0 = (f32) MUL_MAT_ID(blk.0.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-0{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0015, 4.0015, 8.0015, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_probs-0 (reshaped) = (f32) RESHAPE(ffn_moe_probs-0{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0383], [ 4.0383], [ 8.0383], ... ], [ [ 64.0383], [ 68.0383], [ 72.0383], ... ], [ [128.0383], [132.0383], [136.0383], ... ], ] sum = 612.344360 ggml_debug: ffn_moe_weights-0 = (f32) GET_ROWS(ffn_moe_probs-0 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1791], [ 4.1791], [ 8.1791], ... ], [ [ 16.1791], [ 20.1791], [ 24.1791], ... ], [ [ 32.1791], [ 36.1791], [ 40.1791], ... ], ] sum = 181.611862 ggml_debug: ffn_moe_weights-0 (reshaped) = (f32) RESHAPE(ffn_moe_weights-0{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1791, 4.1791, 8.1791, ...], [ 16.1791, 20.1791, 24.1791, ...], [ 32.1791, 36.1791, 40.1791, ...], ], ] sum = 181.611862 ggml_debug: ffn_moe_weights_sum-0 = (f32) SUM_ROWS(ffn_moe_weights-0 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.5413], [ 4.5413], [ 8.5413], ], ] sum = 13.623940 ggml_debug: ffn_moe_weights_norm-0 = (f32) DIV(ffn_moe_weights-0 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-0{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3309, 4.3309, 8.3309, ...], [ 16.3309, 20.3309, 24.3309, ...], [ 32.3309, 36.3309, 40.3309, ...], ], ] sum = 182.977707 ggml_debug: ffn_moe_weights_norm-0 (view) = (f32) VIEW(ffn_moe_weights_norm-0{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3309], [ 16.3309], [ 32.3309], ], ] sum = 48.992569 ggml_debug: ffn_moe_weighted-0 = (f32) MUL(ffn_moe_down-0{6144, 3, 1, 1}, ffn_moe_weights_norm-0 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0005, 4.0005, 8.0005, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_up-0 = (f32) MUL_MAT_ID(blk.0.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0805, 3.9195, 7.9195, ...], [43007.9180, 43011.9180, 43015.9180, ...], [86015.9219, 86019.9219, 86023.9219, ...], ], ] sum = 387107.312500 ggml_debug: ffn_moe_gate-0 = (f32) MUL_MAT_ID(blk.0.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0173, 3.9827, 7.9827, ...], [43007.9844, 43011.9844, 43015.9844, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_silu-0 = (f32) UNARY(ffn_moe_gate-0{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0086, 3.9914, 7.9914, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_gate_par-0 = (f32) MUL(ffn_moe_up-0{10752, 3, 1, 1}, ffn_moe_silu-0{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0007, 4.0007, 8.0007, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-0 = (f32) MUL_MAT_ID(blk.0.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-0{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0087, 4.0087, 8.0087, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_weights_norm-0 (view) = (f32) VIEW(ffn_moe_weights_norm-0{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3120], [ 16.3120], [ 32.3120], ], ] sum = 48.936081 ggml_debug: ffn_moe_weighted-0 = (f32) MUL(ffn_moe_down-0{6144, 3, 1, 1}, ffn_moe_weights_norm-0 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0027, 4.0027, 8.0027, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_out-0 = (f32) ADD(ffn_moe_weighted-0{6144, 3, 1, 1}, ffn_moe_weighted-0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0032, 4.0032, 8.0032, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_up-0 = (f32) MUL_MAT_ID(blk.0.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0274, 4.0274, 8.0274, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_gate-0 = (f32) MUL_MAT_ID(blk.0.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0587, 3.9413, 7.9413, ...], [43007.9414, 43011.9414, 43015.9414, ...], [86015.9375, 86019.9375, 86023.9375, ...], ], ] sum = 387107.437500 ggml_debug: ffn_moe_silu-0 = (f32) UNARY(ffn_moe_gate-0{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0285, 3.9715, 7.9715, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9688, 86019.9688, 86023.9688, ...], ], ] sum = 387107.718750 ggml_debug: ffn_moe_gate_par-0 = (f32) MUL(ffn_moe_up-0{10752, 3, 1, 1}, ffn_moe_silu-0{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0008, 3.9992, 7.9992, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-0 = (f32) MUL_MAT_ID(blk.0.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-0{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0056, 3.9944, 7.9944, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.953125 ggml_debug: ffn_moe_weights_norm-0 (view) = (f32) VIEW(ffn_moe_weights_norm-0{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2732], [ 16.2732], [ 32.2732], ], ] sum = 48.819660 ggml_debug: ffn_moe_weighted-0 = (f32) MUL(ffn_moe_down-0{6144, 3, 1, 1}, ffn_moe_weights_norm-0 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0015, 3.9985, 7.9985, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-0 = (f32) ADD(ffn_moe_out-0{6144, 3, 1, 1}, ffn_moe_weighted-0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0017, 4.0017, 8.0017, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_up-0 = (f32) MUL_MAT_ID(blk.0.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0263, 3.9737, 7.9737, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.750000 ggml_debug: ffn_moe_gate-0 = (f32) MUL_MAT_ID(blk.0.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-0{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0007, 3.9993, 7.9993, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_silu-0 = (f32) UNARY(ffn_moe_gate-0{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0004, 3.9996, 7.9996, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_gate_par-0 = (f32) MUL(ffn_moe_up-0{10752, 3, 1, 1}, ffn_moe_silu-0{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-0 = (f32) MUL_MAT_ID(blk.0.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-0{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0100, 4.0100, 8.0100, ...], [24576.0098, 24580.0098, 24584.0098, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.093750 ggml_debug: ffn_moe_weights_norm-0 (view) = (f32) VIEW(ffn_moe_weights_norm-0{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.0839], [ 16.0839], [ 32.0839], ], ] sum = 48.251698 ggml_debug: ffn_moe_weighted-0 = (f32) MUL(ffn_moe_down-0{6144, 3, 1, 1}, ffn_moe_weights_norm-0 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0008, 4.0008, 8.0008, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-0 = (f32) ADD(ffn_moe_out-0{6144, 3, 1, 1}, ffn_moe_weighted-0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0025, 4.0025, 8.0025, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_inp-0 = (f32) ADD(kqv_out-0{6144, 3, 1, 1}, CUDA0#inp_embd#0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0174, 3.9826, 7.9826, ...], [24575.9824, 24579.9824, 24583.9824, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: l_out-0 = (f32) ADD(ffn_moe_out-0{6144, 3, 1, 1}, ffn_inp-0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0149, 3.9851, 7.9851, ...], [24575.9844, 24579.9844, 24583.9844, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: norm-1 = (f32) NORM(l_out-0{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6330, 3.3670, 7.3670, ...], [24575.3672, 24579.3672, 24583.3672, ...], [49151.3672, 49155.3672, 49159.3672, ...], ], ] sum = 221214.312500 ggml_debug: attn_norm-1 = (f32) MUL(norm-1{6144, 3, 1, 1}, blk.1.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0001, 4.0001, 8.0001, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: wqkv-1 = (f32) MUL_MAT(blk.1.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-1{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -1.4474, 2.5526, 6.5526, ...], [32766.5527, 32770.5508, 32774.5508, ...], [65534.5508, 65538.5547, 65542.5547, ...], ], ] sum = 294934.968750 ggml_debug: wqkv_clamped-1 = (f32) CLAMP(wqkv-1{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -1.4474, 2.5526, 6.5526, ...], [32766.5527, 32770.5508, 32774.5508, ...], [65534.5508, 65538.5547, 65542.5547, ...], ], ] sum = 294934.968750 ggml_debug: wqkv_clamped-1 (view) = (f32) VIEW(wqkv_clamped-1{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.4474, 2.5526, 6.5526, ...], [32766.5527, 32770.5508, 32774.5508, ...], [65534.5508, 65538.5547, 65542.5547, ...], ], ] sum = 294934.968750 ggml_debug: Qcur-1 = (f32) CONT(wqkv_clamped-1 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.4474, 2.5526, 6.5526, ...], [24574.5527, 24578.5527, 24582.5527, ...], [49150.5508, 49154.5508, 49158.5508, ...], ], ] sum = 221206.968750 ggml_debug: Qcur-1 (reshaped) = (f32) RESHAPE(Qcur-1{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -1.4474, 2.5526, 6.5526, ...], [510.5526, 514.5527, 518.5527, ...], [1022.5527, 1026.5526, 1030.5526, ...], ... ], [ [24574.5527, 24578.5527, 24582.5527, ...], [25086.5527, 25090.5527, 25094.5527, ...], [25598.5527, 25602.5527, 25606.5527, ...], ... ], [ [49150.5508, 49154.5508, 49158.5508, ...], [49662.5508, 49666.5508, 49670.5508, ...], [50174.5508, 50178.5508, 50182.5508, ...], ... ], ] sum = 677445.000000 ggml_debug: Qcur-1 = (f32) ROPE(Qcur-1 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -1.4474, 2.5526, 6.5526, ...], [510.5526, 514.5527, 518.5527, ...], [1022.5527, 1026.5526, 1030.5526, ...], ... ], [ [24574.5527, 24578.5527, 24582.5527, ...], [25086.5527, 25090.5527, 25094.5527, ...], [25598.5527, 25602.5527, 25606.5527, ...], ... ], [ [49150.5508, 49154.5508, 49158.5508, ...], [49662.5508, 49666.5508, 49670.5508, ...], [50174.5508, 50178.5508, 50182.5508, ...], ... ], ] sum = 677445.000000 ggml_debug: wqkv_clamped-1 (view) = (f32) VIEW(wqkv_clamped-1{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2244, 3.7756, 7.7756, ...], [32767.7754, 32771.7773, 32775.7773, ...], [65535.7773, 65539.7734, 65543.7734, ...], ], ] sum = 294946.000000 ggml_debug: Kcur-1 = (f32) CONT(wqkv_clamped-1 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2244, 3.7756, 7.7756, ...], [4095.7756, 4099.7759, 4103.7759, ...], [8191.7759, 8195.7754, 8199.7754, ...], ], ] sum = 36897.980469 ggml_debug: Kcur-1 (reshaped) = (f32) RESHAPE(Kcur-1{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -0.2244, 3.7756, 7.7756, ...], [511.7756, 515.7756, 519.7756, ...], [1023.7756, 1027.7756, 1031.7756, ...], ... ], [ [4095.7756, 4099.7759, 4103.7759, ...], [4607.7759, 4611.7759, 4615.7759, ...], [5119.7759, 5123.7759, 5127.7759, ...], ... ], [ [8191.7759, 8195.7754, 8199.7754, ...], [8703.7754, 8707.7754, 8711.7754, ...], [9215.7754, 9219.7754, 9223.7754, ...], ... ], ] sum = 124517.929688 ggml_debug: Kcur-1 = (f32) ROPE(Kcur-1 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -0.2244, 3.7756, 7.7756, ...], [511.7756, 515.7756, 519.7756, ...], [1023.7756, 1027.7756, 1031.7756, ...], ... ], [ [4095.7756, 4099.7759, 4103.7759, ...], [4607.7759, 4611.7759, 4615.7759, ...], [5119.7759, 5123.7759, 5127.7759, ...], ... ], [ [8191.7759, 8195.7754, 8199.7754, ...], [8703.7754, 8707.7754, 8711.7754, ...], [9215.7754, 9219.7754, 9223.7754, ...], ... ], ] sum = 124517.929688 ggml_debug: wqkv_clamped-1 (view) = (f32) VIEW(wqkv_clamped-1{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0024, 4.0024, 8.0024, ...], [32768.0039, 32772.0039, 32776.0039, ...], [65536.0000, 65540.0000, 65544.0000, ...], ], ] sum = 294948.000000 ggml_debug: Vcur-1 = (f32) CONT(wqkv_clamped-1 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0024, 4.0024, 8.0024, ...], [4096.0024, 4100.0024, 4104.0024, ...], [8192.0020, 8196.0020, 8200.0020, ...], ], ] sum = 36900.019531 ggml_debug: k_cache_view-1 = (f16) VIEW(cache_k_l1{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-1 (copy of Kcur-1) = (f16) CPY(Kcur-1{128, 8, 3, 1}, k_cache_view-1{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -0.2244, -0.2246, -0.2249, ...], ], ] sum = -0.673828 ggml_debug: v_cur_t-1 = (f32) TRANSPOSE(Vcur-1{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0024, 4096.0024, 8192.0020], [ 4.0024, 4100.0024, 8196.0020], [ 8.0024, 4104.0024, 8200.0020], ... ], ] sum = 36900.019531 ggml_debug: v_cache_view-1 = (f16) VIEW(cache_v_l1{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-1 (copy of v_cur_t-1) = (f16) CPY(v_cur_t-1{3, 1024, 1, 1}, v_cache_view-1{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0024, 0.0024, 0.0024], [ 0.0048, 0.0048, 0.0048], [ 0.0095, 0.0095, 0.0096], ... ], ] sum = 0.050108 ggml_debug: v-1 = (f16) VIEW(cache_v_l1{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0024, 0.0024, 0.0024, ...], [ 0.0048, 0.0048, 0.0048, ...], [ 0.0095, 0.0095, 0.0096, ...], ... ], [ [ 0.0024, 0.0024, 0.0024, ...], [ 0.0048, 0.0048, 0.0048, ...], [ 0.0095, 0.0095, 0.0096, ...], ... ], [ [ 0.0024, 0.0024, 0.0024, ...], [ 0.0048, 0.0048, 0.0048, ...], [ 0.0095, 0.0095, 0.0096, ...], ... ], ... ] sum = 0.150324 ggml_debug: k-1 = (f16) VIEW(cache_k_l1{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -0.2244, -0.2246, -0.2249, ...], [ -0.8975, -0.8984, -0.8994, ...], [ -3.5898, -3.5938, -3.5977, ...], ... ], [ [ -0.2612, -0.2617, -0.2622, ...], [ -1.0449, -1.0469, -1.0488, ...], [ -4.1797, -4.1875, -4.1953, ...], ... ], [ [ -0.3237, -0.3242, -0.3247, ...], [ -1.2949, -1.2969, -1.2988, ...], [ -5.1797, -5.1875, -5.1953, ...], ... ], ... ] sum = -51.064453 ggml_debug: q-1 = (f32) PERMUTE(Qcur-1{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -1.4474, 2.5526, 6.5526, ...], [24574.5527, 24578.5527, 24582.5527, ...], [49150.5508, 49154.5508, 49158.5508, ...], ], [ [510.5526, 514.5527, 518.5527, ...], [25086.5527, 25090.5527, 25094.5527, ...], [49662.5508, 49666.5508, 49670.5508, ...], ], [ [1022.5527, 1026.5526, 1030.5526, ...], [25598.5527, 25602.5527, 25606.5527, ...], [50174.5508, 50178.5508, 50182.5508, ...], ], ... ] sum = 677445.000000 ggml_debug: kq-1 = (f32) MUL_MAT(k-1{128, 32, 8, 1}, q-1{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 47.0938, 51.0938, 55.0938, ...], [175.0938, 179.0938, 183.0938, ...], [303.0938, 307.0938, 311.0938, ...], ], [ [431.0938, 435.0938, 439.0938, ...], [559.0938, 563.0938, 567.0938, ...], [687.0938, 691.0938, 695.0938, ...], ], [ [815.0938, 819.0938, 823.0938, ...], [943.0938, 947.0938, 951.0938, ...], [1071.0938, 1075.0938, 1079.0938, ...], ], ... ] sum = 15203.531250 ggml_debug: kq_soft_max_ext-1 = (f32) SOFT_MAX(kq-1{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-1 = (f32) MUL_MAT(v-1{32, 128, 8, 1}, kq_soft_max_ext-1{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0024, 4.0024, 8.0024, ...], [512.0024, 516.0024, 520.0024, ...], [1024.0024, 1028.0024, 1032.0024, ...], ], [ [1536.0024, 1540.0024, 1544.0024, ...], [2048.0024, 2052.0024, 2056.0024, ...], [2560.0024, 2564.0024, 2568.0024, ...], ], [ [3072.0024, 3076.0024, 3080.0024, ...], [3584.0024, 3588.0024, 3592.0024, ...], [4096.0024, 4100.0024, 4104.0024, ...], ], ... ] sum = 55404.070312 ggml_debug: kqv_merged-1 = (f32) PERMUTE(kqv-1{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0024, 4.0024, 8.0024, ...], [1536.0024, 1540.0024, 1544.0024, ...], [3072.0024, 3076.0024, 3080.0024, ...], ... ], [ [512.0024, 516.0024, 520.0024, ...], [2048.0024, 2052.0024, 2056.0024, ...], [3584.0024, 3588.0024, 3592.0024, ...], ... ], [ [1024.0024, 1028.0024, 1032.0024, ...], [2560.0024, 2564.0024, 2568.0024, ...], [4096.0024, 4100.0024, 4104.0024, ...], ... ], ] sum = 55404.074219 ggml_debug: kqv_merged_cont-1 = (f32) CONT(kqv_merged-1{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0024, 4.0024, 8.0024, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: kqv_out-1 = (f32) MUL_MAT(blk.1.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-1{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0052, 4.0052, 8.0052, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: norm-1 = (f32) NORM(kqv_out-1{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.6252, 4.6252, 8.6252, ...], [24576.6250, 24580.6250, 24584.6250, ...], [49152.6250, 49156.6250, 49160.6250, ...], ], ] sum = 221225.625000 ggml_debug: attn_out_norm-1 = (f32) MUL(norm-1{6144, 3, 1, 1}, blk.1.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0443, 4.0443, 8.0443, ...], [24576.0449, 24580.0449, 24584.0449, ...], [49152.0430, 49156.0430, 49160.0430, ...], ], ] sum = 221220.406250 ggml_debug: ffn_moe_logits-1 = (f32) MUL_MAT(blk.1.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.3013, 3.6987, 7.6987, ...], [ 63.6987, 67.6987, 71.6987, ...], [127.6987, 131.6987, 135.6987, ...], ], ] sum = 609.288574 ggml_debug: ffn_moe_probs-1 = (f32) SOFT_MAX(ffn_moe_logits-1{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0508, 4.0508, 8.0508, ...], [ 64.0508, 68.0508, 72.0508, ...], [128.0508, 132.0508, 136.0508, ...], ], ] sum = 612.457458 ggml_debug: ffn_moe_argsort-1 = (i32) ARGSORT(ffn_moe_probs-1{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 8.0000, 12.0000, 16.0000, ...], [ 72.0000, 76.0000, 80.0000, ...], [136.0000, 140.0000, 144.0000, ...], ], ] sum = 684.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-1{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 8.0000, 12.0000, 16.0000, ...], [ 72.0000, 76.0000, 80.0000, ...], [136.0000, 140.0000, 144.0000, ...], ], ] sum = 684.000000 ggml_debug: ffn_moe_up-1 = (f32) MUL_MAT_ID(blk.1.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0362, 4.0362, 8.0362, ...], [43008.0352, 43012.0352, 43016.0352, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.312500 ggml_debug: ffn_moe_gate-1 = (f32) MUL_MAT_ID(blk.1.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0283, 3.9717, 7.9717, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9688, 86019.9688, 86023.9688, ...], ], ] sum = 387107.718750 ggml_debug: ffn_moe_silu-1 = (f32) UNARY(ffn_moe_gate-1{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0140, 3.9860, 7.9860, ...], [43007.9844, 43011.9844, 43015.9844, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_gate_par-1 = (f32) MUL(ffn_moe_up-1{10752, 3, 1, 1}, ffn_moe_silu-1{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0005, 3.9995, 7.9995, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-1 = (f32) MUL_MAT_ID(blk.1.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-1{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0084, 4.0084, 8.0084, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_probs-1 (reshaped) = (f32) RESHAPE(ffn_moe_probs-1{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0508], [ 4.0508], [ 8.0508], ... ], [ [ 64.0508], [ 68.0508], [ 72.0508], ... ], [ [128.0508], [132.0508], [136.0508], ... ], ] sum = 612.457458 ggml_debug: ffn_moe_weights-1 = (f32) GET_ROWS(ffn_moe_probs-1 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0771], [ 4.0771], [ 8.0771], ... ], [ [ 16.0771], [ 20.0771], [ 24.0771], ... ], [ [ 32.0771], [ 36.0771], [ 40.0771], ... ], ] sum = 180.693481 ggml_debug: ffn_moe_weights-1 (reshaped) = (f32) RESHAPE(ffn_moe_weights-1{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0771, 4.0771, 8.0771, ...], [ 16.0771, 20.0771, 24.0771, ...], [ 32.0771, 36.0771, 40.0771, ...], ], ] sum = 180.693481 ggml_debug: ffn_moe_weights_sum-1 = (f32) SUM_ROWS(ffn_moe_weights-1 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2942], [ 4.2942], [ 8.2942], ], ] sum = 12.882648 ggml_debug: ffn_moe_weights_norm-1 = (f32) DIV(ffn_moe_weights-1 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-1{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2619, 4.2619, 8.2619, ...], [ 16.2619, 20.2619, 24.2619, ...], [ 32.2619, 36.2619, 40.2619, ...], ], ] sum = 182.357010 ggml_debug: ffn_moe_weights_norm-1 (view) = (f32) VIEW(ffn_moe_weights_norm-1{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2619], [ 16.2619], [ 32.2619], ], ] sum = 48.785671 ggml_debug: ffn_moe_weighted-1 = (f32) MUL(ffn_moe_down-1{6144, 3, 1, 1}, ffn_moe_weights_norm-1 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0022, 4.0022, 8.0022, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_up-1 = (f32) MUL_MAT_ID(blk.1.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0108, 4.0108, 8.0108, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_gate-1 = (f32) MUL_MAT_ID(blk.1.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1285, 3.8715, 7.8715, ...], [43007.8711, 43011.8711, 43015.8711, ...], [86015.8750, 86019.8750, 86023.8750, ...], ], ] sum = 387106.875000 ggml_debug: ffn_moe_silu-1 = (f32) UNARY(ffn_moe_gate-1{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0601, 3.9399, 7.9399, ...], [43007.9414, 43011.9414, 43015.9414, ...], [86015.9375, 86019.9375, 86023.9375, ...], ], ] sum = 387107.437500 ggml_debug: ffn_moe_gate_par-1 = (f32) MUL(ffn_moe_up-1{10752, 3, 1, 1}, ffn_moe_silu-1{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0006, 3.9994, 7.9994, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-1 = (f32) MUL_MAT_ID(blk.1.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-1{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0027, 3.9973, 7.9973, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.984375 ggml_debug: ffn_moe_weights_norm-1 (view) = (f32) VIEW(ffn_moe_weights_norm-1{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2481], [ 16.2481], [ 32.2481], ], ] sum = 48.744312 ggml_debug: ffn_moe_weighted-1 = (f32) MUL(ffn_moe_down-1{6144, 3, 1, 1}, ffn_moe_weights_norm-1 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0007, 3.9993, 7.9993, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-1 = (f32) ADD(ffn_moe_weighted-1{6144, 3, 1, 1}, ffn_moe_weighted-1{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0015, 4.0015, 8.0015, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_up-1 = (f32) MUL_MAT_ID(blk.1.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0350, 4.0350, 8.0350, ...], [43008.0352, 43012.0352, 43016.0352, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_gate-1 = (f32) MUL_MAT_ID(blk.1.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0989, 3.9011, 7.9011, ...], [43007.9023, 43011.9023, 43015.9023, ...], [86015.8984, 86019.8984, 86023.8984, ...], ], ] sum = 387107.125000 ggml_debug: ffn_moe_silu-1 = (f32) UNARY(ffn_moe_gate-1{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0470, 3.9530, 7.9530, ...], [43007.9531, 43011.9531, 43015.9531, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_gate_par-1 = (f32) MUL(ffn_moe_up-1{10752, 3, 1, 1}, ffn_moe_silu-1{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0016, 3.9984, 7.9984, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-1 = (f32) MUL_MAT_ID(blk.1.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-1{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0231, 4.0231, 8.0231, ...], [24576.0234, 24580.0234, 24584.0234, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: ffn_moe_weights_norm-1 (view) = (f32) VIEW(ffn_moe_weights_norm-1{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2466], [ 16.2466], [ 32.2466], ], ] sum = 48.739761 ggml_debug: ffn_moe_weighted-1 = (f32) MUL(ffn_moe_down-1{6144, 3, 1, 1}, ffn_moe_weights_norm-1 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0057, 4.0057, 8.0057, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.046875 ggml_debug: ffn_moe_out-1 = (f32) ADD(ffn_moe_out-1{6144, 3, 1, 1}, ffn_moe_weighted-1{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0072, 4.0072, 8.0072, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_up-1 = (f32) MUL_MAT_ID(blk.1.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0593, 4.0593, 8.0593, ...], [43008.0586, 43012.0586, 43016.0586, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_gate-1 = (f32) MUL_MAT_ID(blk.1.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-1{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0540, 3.9460, 7.9460, ...], [43007.9453, 43011.9453, 43015.9453, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.500000 ggml_debug: ffn_moe_silu-1 = (f32) UNARY(ffn_moe_gate-1{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0263, 3.9737, 7.9737, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.750000 ggml_debug: ffn_moe_gate_par-1 = (f32) MUL(ffn_moe_up-1{10752, 3, 1, 1}, ffn_moe_silu-1{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0016, 3.9984, 7.9984, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-1 = (f32) MUL_MAT_ID(blk.1.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-1{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0125, 4.0125, 8.0125, ...], [24576.0117, 24580.0117, 24584.0117, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.125000 ggml_debug: ffn_moe_weights_norm-1 (view) = (f32) VIEW(ffn_moe_weights_norm-1{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2434], [ 16.2434], [ 32.2434], ], ] sum = 48.730251 ggml_debug: ffn_moe_weighted-1 = (f32) MUL(ffn_moe_down-1{6144, 3, 1, 1}, ffn_moe_weights_norm-1 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0030, 4.0030, 8.0030, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_out-1 = (f32) ADD(ffn_moe_out-1{6144, 3, 1, 1}, ffn_moe_weighted-1{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0103, 4.0103, 8.0103, ...], [24576.0098, 24580.0098, 24584.0098, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.109375 ggml_debug: ffn_inp-1 = (f32) ADD(kqv_out-1{6144, 3, 1, 1}, l_out-0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0097, 3.9903, 7.9903, ...], [24575.9902, 24579.9902, 24583.9902, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: l_out-1 = (f32) ADD(ffn_moe_out-1{6144, 3, 1, 1}, ffn_inp-1{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0006, 4.0006, 8.0006, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: norm-2 = (f32) NORM(l_out-1{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0203, 4.0203, 8.0203, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: attn_norm-2 = (f32) MUL(norm-2{6144, 3, 1, 1}, blk.2.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: wqkv-2 = (f32) MUL_MAT(blk.2.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-2{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.0468, 4.0468, 8.0468, ...], [32768.0469, 32772.0469, 32776.0469, ...], [65536.0469, 65540.0469, 65544.0469, ...], ], ] sum = 294948.437500 ggml_debug: wqkv_clamped-2 = (f32) CLAMP(wqkv-2{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.0468, 4.0468, 8.0468, ...], [32768.0469, 32772.0469, 32776.0469, ...], [65536.0469, 65540.0469, 65544.0469, ...], ], ] sum = 294948.437500 ggml_debug: wqkv_clamped-2 (view) = (f32) VIEW(wqkv_clamped-2{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0468, 4.0468, 8.0468, ...], [32768.0469, 32772.0469, 32776.0469, ...], [65536.0469, 65540.0469, 65544.0469, ...], ], ] sum = 294948.437500 ggml_debug: Qcur-2 = (f32) CONT(wqkv_clamped-2 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0468, 4.0468, 8.0468, ...], [24576.0469, 24580.0469, 24584.0469, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: Qcur-2 (reshaped) = (f32) RESHAPE(Qcur-2{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0468, 4.0468, 8.0468, ...], [512.0468, 516.0468, 520.0468, ...], [1024.0468, 1028.0468, 1032.0468, ...], ... ], [ [24576.0469, 24580.0469, 24584.0469, ...], [25088.0469, 25092.0469, 25096.0469, ...], [25600.0469, 25604.0469, 25608.0469, ...], ... ], [ [49152.0469, 49156.0469, 49160.0469, ...], [49664.0469, 49668.0469, 49672.0469, ...], [50176.0469, 50180.0469, 50184.0469, ...], ... ], ] sum = 677485.375000 ggml_debug: Qcur-2 = (f32) ROPE(Qcur-2 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.0468, 4.0468, 8.0468, ...], [512.0468, 516.0468, 520.0468, ...], [1024.0468, 1028.0468, 1032.0468, ...], ... ], [ [24576.0469, 24580.0469, 24584.0469, ...], [25088.0469, 25092.0469, 25096.0469, ...], [25600.0469, 25604.0469, 25608.0469, ...], ... ], [ [49152.0469, 49156.0469, 49160.0469, ...], [49664.0469, 49668.0469, 49672.0469, ...], [50176.0469, 50180.0469, 50184.0469, ...], ... ], ] sum = 677485.375000 ggml_debug: wqkv_clamped-2 (view) = (f32) VIEW(wqkv_clamped-2{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -2.2315, 1.7685, 5.7685, ...], [32765.7676, 32769.7695, 32773.7695, ...], [65533.7695, 65537.7656, 65541.7656, ...], ], ] sum = 294927.906250 ggml_debug: Kcur-2 = (f32) CONT(wqkv_clamped-2 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -2.2315, 1.7685, 5.7685, ...], [4093.7686, 4097.7686, 4101.7686, ...], [8189.7686, 8193.7686, 8197.7686, ...], ], ] sum = 36879.917969 ggml_debug: Kcur-2 (reshaped) = (f32) RESHAPE(Kcur-2{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -2.2315, 1.7685, 5.7685, ...], [509.7685, 513.7685, 517.7685, ...], [1021.7685, 1025.7684, 1029.7684, ...], ... ], [ [4093.7686, 4097.7686, 4101.7686, ...], [4605.7686, 4609.7686, 4613.7686, ...], [5117.7686, 5121.7686, 5125.7686, ...], ... ], [ [8189.7686, 8193.7686, 8197.7686, ...], [8701.7686, 8705.7686, 8709.7686, ...], [9213.7686, 9217.7686, 9221.7686, ...], ... ], ] sum = 124463.742188 ggml_debug: Kcur-2 = (f32) ROPE(Kcur-2 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -2.2315, 1.7685, 5.7685, ...], [509.7685, 513.7685, 517.7685, ...], [1021.7685, 1025.7684, 1029.7684, ...], ... ], [ [4093.7686, 4097.7686, 4101.7686, ...], [4605.7686, 4609.7686, 4613.7686, ...], [5117.7686, 5121.7686, 5125.7686, ...], ... ], [ [8189.7686, 8193.7686, 8197.7686, ...], [8701.7686, 8705.7686, 8709.7686, ...], [9213.7686, 9217.7686, 9221.7686, ...], ... ], ] sum = 124463.742188 ggml_debug: wqkv_clamped-2 (view) = (f32) VIEW(wqkv_clamped-2{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0193, 4.0193, 8.0193, ...], [32768.0195, 32772.0195, 32776.0195, ...], [65536.0156, 65540.0156, 65544.0156, ...], ], ] sum = 294948.156250 ggml_debug: Vcur-2 = (f32) CONT(wqkv_clamped-2 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0193, 4.0193, 8.0193, ...], [4096.0190, 4100.0190, 4104.0190, ...], [8192.0195, 8196.0195, 8200.0195, ...], ], ] sum = 36900.171875 ggml_debug: k_cache_view-2 = (f16) VIEW(cache_k_l2{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-2 (copy of Kcur-2) = (f16) CPY(Kcur-2{128, 8, 3, 1}, k_cache_view-2{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -2.2324, -2.2363, -2.2402, ...], ], ] sum = -6.708984 ggml_debug: v_cur_t-2 = (f32) TRANSPOSE(Vcur-2{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0193, 4096.0190, 8192.0195], [ 4.0193, 4100.0190, 8196.0195], [ 8.0193, 4104.0190, 8200.0195], ... ], ] sum = 36900.171875 ggml_debug: v_cache_view-2 = (f16) VIEW(cache_v_l2{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-2 (copy of v_cur_t-2) = (f16) CPY(v_cur_t-2{3, 1024, 1, 1}, v_cache_view-2{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0193, 0.0193, 0.0193], [ 0.0385, 0.0386, 0.0387], [ 0.0771, 0.0772, 0.0773], ... ], ] sum = 0.405350 ggml_debug: v-2 = (f16) VIEW(cache_v_l2{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0193, 0.0193, 0.0193, ...], [ 0.0385, 0.0386, 0.0387, ...], [ 0.0771, 0.0772, 0.0773, ...], ... ], [ [ 0.0193, 0.0193, 0.0193, ...], [ 0.0385, 0.0386, 0.0387, ...], [ 0.0771, 0.0772, 0.0773, ...], ... ], [ [ 0.0193, 0.0193, 0.0193, ...], [ 0.0385, 0.0386, 0.0387, ...], [ 0.0771, 0.0772, 0.0773, ...], ... ], ... ] sum = 1.216049 ggml_debug: k-2 = (f16) VIEW(cache_k_l2{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -2.2324, -2.2363, -2.2402, ...], [ -8.9297, -8.9453, -8.9609, ...], [-35.7188, -35.7812, -35.8438, ...], ... ], [ [ -2.7324, -2.7363, -2.7402, ...], [-10.9297, -10.9453, -10.9609, ...], [-43.7188, -43.7812, -43.8438, ...], ... ], [ [ -3.2324, -3.2363, -3.2402, ...], [-12.9297, -12.9453, -12.9609, ...], [-51.7188, -51.7812, -51.8438, ...], ... ], ... ] sum = -517.166016 ggml_debug: q-2 = (f32) PERMUTE(Qcur-2{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.0468, 4.0468, 8.0468, ...], [24576.0469, 24580.0469, 24584.0469, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], [ [512.0468, 516.0468, 520.0468, ...], [25088.0469, 25092.0469, 25096.0469, ...], [49664.0469, 49668.0469, 49672.0469, ...], ], [ [1024.0468, 1028.0468, 1032.0468, ...], [25600.0469, 25604.0469, 25608.0469, ...], [50176.0469, 50180.0469, 50184.0469, ...], ], ... ] sum = 677485.312500 ggml_debug: kq-2 = (f32) MUL_MAT(k-2{128, 32, 8, 1}, q-2{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [-24.1562, -20.1562, -16.1562, ...], [103.8438, 107.8438, 111.8438, ...], [231.8438, 235.8438, 239.8438, ...], ], [ [359.8438, 363.8438, 367.8438, ...], [487.8438, 491.8438, 495.8438, ...], [615.8438, 619.8438, 623.8438, ...], ], [ [743.8438, 747.8438, 751.8438, ...], [871.8438, 875.8438, 879.8438, ...], [999.8438, 1003.8438, 1007.8438, ...], ], ... ] sum = 13279.781250 ggml_debug: kq_soft_max_ext-2 = (f32) SOFT_MAX(kq-2{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-2 = (f32) MUL_MAT(v-2{32, 128, 8, 1}, kq_soft_max_ext-2{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0193, 4.0193, 8.0193, ...], [512.0193, 516.0193, 520.0193, ...], [1024.0193, 1028.0193, 1032.0193, ...], ], [ [1536.0193, 1540.0193, 1544.0193, ...], [2048.0193, 2052.0193, 2056.0193, ...], [2560.0193, 2564.0193, 2568.0193, ...], ], [ [3072.0193, 3076.0193, 3080.0193, ...], [3584.0193, 3588.0193, 3592.0193, ...], [4096.0190, 4100.0190, 4104.0190, ...], ], ... ] sum = 55404.523438 ggml_debug: kqv_merged-2 = (f32) PERMUTE(kqv-2{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0193, 4.0193, 8.0193, ...], [1536.0193, 1540.0193, 1544.0193, ...], [3072.0193, 3076.0193, 3080.0193, ...], ... ], [ [512.0193, 516.0193, 520.0193, ...], [2048.0193, 2052.0193, 2056.0193, ...], [3584.0193, 3588.0193, 3592.0193, ...], ... ], [ [1024.0193, 1028.0193, 1032.0193, ...], [2560.0193, 2564.0193, 2568.0193, ...], [4096.0190, 4100.0190, 4104.0190, ...], ... ], ] sum = 55404.523438 ggml_debug: kqv_merged_cont-2 = (f32) CONT(kqv_merged-2{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0193, 4.0193, 8.0193, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: kqv_out-2 = (f32) MUL_MAT(blk.2.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-2{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0256, 3.9744, 7.9744, ...], [24575.9746, 24579.9746, 24583.9746, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.765625 ggml_debug: norm-2 = (f32) NORM(kqv_out-2{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.7674, 3.2326, 7.2326, ...], [24575.2324, 24579.2324, 24583.2324, ...], [49151.2344, 49155.2344, 49159.2344, ...], ], ] sum = 221213.109375 ggml_debug: attn_out_norm-2 = (f32) MUL(norm-2{6144, 3, 1, 1}, blk.2.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0832, 3.9168, 7.9168, ...], [24575.9160, 24579.9160, 24583.9160, ...], [49151.9180, 49155.9180, 49159.9180, ...], ], ] sum = 221219.265625 ggml_debug: ffn_moe_logits-2 = (f32) MUL_MAT(blk.2.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.5483, 3.4517, 7.4517, ...], [ 63.4517, 67.4517, 71.4517, ...], [127.4517, 131.4517, 135.4517, ...], ], ] sum = 607.064941 ggml_debug: ffn_moe_probs-2 = (f32) SOFT_MAX(ffn_moe_logits-2{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0397, 4.0397, 8.0397, ...], [ 64.0397, 68.0397, 72.0397, ...], [128.0397, 132.0397, 136.0397, ...], ], ] sum = 612.357483 ggml_debug: ffn_moe_argsort-2 = (i32) ARGSORT(ffn_moe_probs-2{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 2.0000, 6.0000, 10.0000, ...], [ 66.0000, 70.0000, 74.0000, ...], [130.0000, 134.0000, 138.0000, ...], ], ] sum = 630.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-2{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 2.0000, 6.0000, 10.0000, ...], [ 66.0000, 70.0000, 74.0000, ...], [130.0000, 134.0000, 138.0000, ...], ], ] sum = 630.000000 ggml_debug: ffn_moe_up-2 = (f32) MUL_MAT_ID(blk.2.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2002, 4.2002, 8.2002, ...], [43008.1992, 43012.1992, 43016.1992, ...], [86016.2031, 86020.2031, 86024.2031, ...], ], ] sum = 387109.812500 ggml_debug: ffn_moe_gate-2 = (f32) MUL_MAT_ID(blk.2.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3248, 4.3248, 8.3248, ...], [43008.3242, 43012.3242, 43016.3242, ...], [86016.3281, 86020.3281, 86024.3281, ...], ], ] sum = 387110.937500 ggml_debug: ffn_moe_silu-2 = (f32) UNARY(ffn_moe_gate-2{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1886, 4.1886, 8.1886, ...], [43008.1875, 43012.1875, 43016.1875, ...], [86016.1875, 86020.1875, 86024.1875, ...], ], ] sum = 387109.687500 ggml_debug: ffn_moe_gate_par-2 = (f32) MUL(ffn_moe_up-2{10752, 3, 1, 1}, ffn_moe_silu-2{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0377, 4.0377, 8.0377, ...], [43008.0391, 43012.0391, 43016.0391, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.343750 ggml_debug: ffn_moe_down-2 = (f32) MUL_MAT_ID(blk.2.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-2{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0476, 4.0476, 8.0476, ...], [24576.0469, 24580.0469, 24584.0469, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: ffn_moe_probs-2 (reshaped) = (f32) RESHAPE(ffn_moe_probs-2{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0397], [ 4.0397], [ 8.0397], ... ], [ [ 64.0397], [ 68.0397], [ 72.0397], ... ], [ [128.0397], [132.0397], [136.0397], ... ], ] sum = 612.357483 ggml_debug: ffn_moe_weights-2 = (f32) GET_ROWS(ffn_moe_probs-2 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1085], [ 4.1085], [ 8.1085], ... ], [ [ 16.1085], [ 20.1085], [ 24.1085], ... ], [ [ 32.1085], [ 36.1085], [ 40.1085], ... ], ] sum = 180.976166 ggml_debug: ffn_moe_weights-2 (reshaped) = (f32) RESHAPE(ffn_moe_weights-2{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1085, 4.1085, 8.1085, ...], [ 16.1085, 20.1085, 24.1085, ...], [ 32.1085, 36.1085, 40.1085, ...], ], ] sum = 180.976166 ggml_debug: ffn_moe_weights_sum-2 = (f32) SUM_ROWS(ffn_moe_weights-2 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3421], [ 4.3421], [ 8.3421], ], ] sum = 13.026395 ggml_debug: ffn_moe_weights_norm-2 = (f32) DIV(ffn_moe_weights-2 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-2{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3170, 4.3170, 8.3170, ...], [ 16.3170, 20.3170, 24.3170, ...], [ 32.3170, 36.3170, 40.3170, ...], ], ] sum = 182.853180 ggml_debug: ffn_moe_weights_norm-2 (view) = (f32) VIEW(ffn_moe_weights_norm-2{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3170], [ 16.3170], [ 32.3170], ], ] sum = 48.951065 ggml_debug: ffn_moe_weighted-2 = (f32) MUL(ffn_moe_down-2{6144, 3, 1, 1}, ffn_moe_weights_norm-2 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0151, 4.0151, 8.0151, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_up-2 = (f32) MUL_MAT_ID(blk.2.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0477, 4.0477, 8.0477, ...], [43008.0469, 43012.0469, 43016.0469, ...], [86016.0469, 86020.0469, 86024.0469, ...], ], ] sum = 387108.437500 ggml_debug: ffn_moe_gate-2 = (f32) MUL_MAT_ID(blk.2.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0146, 3.9854, 7.9854, ...], [43007.9844, 43011.9844, 43015.9844, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_silu-2 = (f32) UNARY(ffn_moe_gate-2{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0072, 3.9928, 7.9928, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_gate_par-2 = (f32) MUL(ffn_moe_up-2{10752, 3, 1, 1}, ffn_moe_silu-2{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0003, 3.9997, 7.9997, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-2 = (f32) MUL_MAT_ID(blk.2.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-2{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0080, 4.0080, 8.0080, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_weights_norm-2 (view) = (f32) VIEW(ffn_moe_weights_norm-2{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2572], [ 16.2572], [ 32.2572], ], ] sum = 48.771515 ggml_debug: ffn_moe_weighted-2 = (f32) MUL(ffn_moe_down-2{6144, 3, 1, 1}, ffn_moe_weights_norm-2 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0021, 4.0021, 8.0021, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_out-2 = (f32) ADD(ffn_moe_weighted-2{6144, 3, 1, 1}, ffn_moe_weighted-2{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0172, 4.0172, 8.0172, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_up-2 = (f32) MUL_MAT_ID(blk.2.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1330, 3.8670, 7.8670, ...], [43007.8672, 43011.8672, 43015.8672, ...], [86015.8672, 86019.8672, 86023.8672, ...], ], ] sum = 387106.812500 ggml_debug: ffn_moe_gate-2 = (f32) MUL_MAT_ID(blk.2.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0024, 4.0024, 8.0024, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_silu-2 = (f32) UNARY(ffn_moe_gate-2{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0012, 4.0012, 8.0012, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_gate_par-2 = (f32) MUL(ffn_moe_up-2{10752, 3, 1, 1}, ffn_moe_silu-2{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0002, 3.9998, 7.9998, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-2 = (f32) MUL_MAT_ID(blk.2.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-2{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0056, 4.0056, 8.0056, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.046875 ggml_debug: ffn_moe_weights_norm-2 (view) = (f32) VIEW(ffn_moe_weights_norm-2{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2182], [ 16.2182], [ 32.2182], ], ] sum = 48.654575 ggml_debug: ffn_moe_weighted-2 = (f32) MUL(ffn_moe_down-2{6144, 3, 1, 1}, ffn_moe_weights_norm-2 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0012, 4.0012, 8.0012, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-2 = (f32) ADD(ffn_moe_out-2{6144, 3, 1, 1}, ffn_moe_weighted-2{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0184, 4.0184, 8.0184, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: ffn_moe_up-2 = (f32) MUL_MAT_ID(blk.2.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0164, 3.9836, 7.9836, ...], [43007.9844, 43011.9844, 43015.9844, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_gate-2 = (f32) MUL_MAT_ID(blk.2.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-2{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0595, 4.0595, 8.0595, ...], [43008.0586, 43012.0586, 43016.0586, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_silu-2 = (f32) UNARY(ffn_moe_gate-2{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0306, 4.0306, 8.0306, ...], [43008.0312, 43012.0312, 43016.0312, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_gate_par-2 = (f32) MUL(ffn_moe_up-2{10752, 3, 1, 1}, ffn_moe_silu-2{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0005, 3.9995, 7.9995, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-2 = (f32) MUL_MAT_ID(blk.2.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-2{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0004, 4.0004, 8.0004, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_weights_norm-2 (view) = (f32) VIEW(ffn_moe_weights_norm-2{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2076], [ 16.2076], [ 32.2076], ], ] sum = 48.622849 ggml_debug: ffn_moe_weighted-2 = (f32) MUL(ffn_moe_down-2{6144, 3, 1, 1}, ffn_moe_weights_norm-2 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0001, 4.0001, 8.0001, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-2 = (f32) ADD(ffn_moe_out-2{6144, 3, 1, 1}, ffn_moe_weighted-2{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0185, 4.0185, 8.0185, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: ffn_inp-2 = (f32) ADD(kqv_out-2{6144, 3, 1, 1}, l_out-1{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0251, 3.9749, 7.9749, ...], [24575.9746, 24579.9746, 24583.9746, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: l_out-2 = (f32) ADD(ffn_moe_out-2{6144, 3, 1, 1}, ffn_inp-2{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0066, 3.9934, 7.9934, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: norm-3 = (f32) NORM(l_out-2{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0276, 3.9724, 7.9724, ...], [24575.9727, 24579.9727, 24583.9727, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.750000 ggml_debug: attn_norm-3 = (f32) MUL(norm-3{6144, 3, 1, 1}, blk.3.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0006, 3.9994, 7.9994, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: wqkv-3 = (f32) MUL_MAT(blk.3.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-3{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.0710, 3.9290, 7.9290, ...], [32767.9297, 32771.9297, 32775.9297, ...], [65535.9297, 65539.9297, 65543.9297, ...], ], ] sum = 294947.375000 ggml_debug: wqkv_clamped-3 = (f32) CLAMP(wqkv-3{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.0710, 3.9290, 7.9290, ...], [32767.9297, 32771.9297, 32775.9297, ...], [65535.9297, 65539.9297, 65543.9297, ...], ], ] sum = 294947.375000 ggml_debug: wqkv_clamped-3 (view) = (f32) VIEW(wqkv_clamped-3{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0710, 3.9290, 7.9290, ...], [32767.9297, 32771.9297, 32775.9297, ...], [65535.9297, 65539.9297, 65543.9297, ...], ], ] sum = 294947.375000 ggml_debug: Qcur-3 = (f32) CONT(wqkv_clamped-3 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0710, 3.9290, 7.9290, ...], [24575.9297, 24579.9297, 24583.9297, ...], [49151.9297, 49155.9297, 49159.9297, ...], ], ] sum = 221219.375000 ggml_debug: Qcur-3 (reshaped) = (f32) RESHAPE(Qcur-3{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0710, 3.9290, 7.9290, ...], [511.9290, 515.9290, 519.9290, ...], [1023.9290, 1027.9290, 1031.9290, ...], ... ], [ [24575.9297, 24579.9297, 24583.9297, ...], [25087.9297, 25091.9297, 25095.9297, ...], [25599.9297, 25603.9297, 25607.9297, ...], ... ], [ [49151.9297, 49155.9297, 49159.9297, ...], [49663.9297, 49667.9297, 49671.9297, ...], [50175.9297, 50179.9297, 50183.9297, ...], ... ], ] sum = 677482.187500 ggml_debug: Qcur-3 = (f32) ROPE(Qcur-3 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.0710, 3.9290, 7.9290, ...], [511.9290, 515.9290, 519.9290, ...], [1023.9290, 1027.9290, 1031.9290, ...], ... ], [ [24575.9297, 24579.9297, 24583.9297, ...], [25087.9297, 25091.9297, 25095.9297, ...], [25599.9297, 25603.9297, 25607.9297, ...], ... ], [ [49151.9297, 49155.9297, 49159.9297, ...], [49663.9297, 49667.9297, 49671.9297, ...], [50175.9297, 50179.9297, 50183.9297, ...], ... ], ] sum = 677482.187500 ggml_debug: wqkv_clamped-3 (view) = (f32) VIEW(wqkv_clamped-3{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.7314, 4.7314, 8.7314, ...], [32768.7305, 32772.7305, 32776.7305, ...], [65536.7344, 65540.7344, 65544.7344, ...], ], ] sum = 294954.593750 ggml_debug: Kcur-3 = (f32) CONT(wqkv_clamped-3 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.7314, 4.7314, 8.7314, ...], [4096.7314, 4100.7314, 4104.7314, ...], [8192.7314, 8196.7314, 8200.7314, ...], ], ] sum = 36906.582031 ggml_debug: Kcur-3 (reshaped) = (f32) RESHAPE(Kcur-3{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.7314, 4.7314, 8.7314, ...], [512.7314, 516.7314, 520.7314, ...], [1024.7314, 1028.7314, 1032.7314, ...], ... ], [ [4096.7314, 4100.7314, 4104.7314, ...], [4608.7314, 4612.7314, 4616.7314, ...], [5120.7314, 5124.7314, 5128.7314, ...], ... ], [ [8192.7314, 8196.7314, 8200.7314, ...], [8704.7314, 8708.7314, 8712.7314, ...], [9216.7314, 9220.7314, 9224.7314, ...], ... ], ] sum = 124543.757812 ggml_debug: Kcur-3 = (f32) ROPE(Kcur-3 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.7314, 4.7314, 8.7314, ...], [512.7314, 516.7314, 520.7314, ...], [1024.7314, 1028.7314, 1032.7314, ...], ... ], [ [4096.7314, 4100.7314, 4104.7314, ...], [4608.7314, 4612.7314, 4616.7314, ...], [5120.7314, 5124.7314, 5128.7314, ...], ... ], [ [8192.7314, 8196.7314, 8200.7314, ...], [8704.7314, 8708.7314, 8712.7314, ...], [9216.7314, 9220.7314, 9224.7314, ...], ... ], ] sum = 124543.757812 ggml_debug: wqkv_clamped-3 (view) = (f32) VIEW(wqkv_clamped-3{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0920, 4.0920, 8.0920, ...], [32768.0938, 32772.0938, 32776.0938, ...], [65536.0938, 65540.0938, 65544.0938, ...], ], ] sum = 294948.843750 ggml_debug: Vcur-3 = (f32) CONT(wqkv_clamped-3 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0920, 4.0920, 8.0920, ...], [4096.0918, 4100.0918, 4104.0918, ...], [8192.0918, 8196.0918, 8200.0918, ...], ], ] sum = 36900.828125 ggml_debug: k_cache_view-3 = (f16) VIEW(cache_k_l3{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-3 (copy of Kcur-3) = (f16) CPY(Kcur-3{128, 8, 3, 1}, k_cache_view-3{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.7314, 0.7324, 0.7334, ...], ], ] sum = 2.197266 ggml_debug: v_cur_t-3 = (f32) TRANSPOSE(Vcur-3{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0920, 4096.0918, 8192.0918], [ 4.0920, 4100.0918, 8196.0918], [ 8.0920, 4104.0918, 8200.0918], ... ], ] sum = 36900.828125 ggml_debug: v_cache_view-3 = (f16) VIEW(cache_v_l3{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-3 (copy of v_cur_t-3) = (f16) CPY(v_cur_t-3{3, 1024, 1, 1}, v_cache_view-3{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0920, 0.0921, 0.0922], [ 0.1840, 0.1842, 0.1844], [ 0.3679, 0.3684, 0.3689], ... ], ] sum = 1.934143 ggml_debug: v-3 = (f16) VIEW(cache_v_l3{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0920, 0.0921, 0.0922, ...], [ 0.1840, 0.1842, 0.1844, ...], [ 0.3679, 0.3684, 0.3689, ...], ... ], [ [ 0.0920, 0.0921, 0.0922, ...], [ 0.1840, 0.1842, 0.1844, ...], [ 0.3679, 0.3684, 0.3689, ...], ... ], [ [ 0.0920, 0.0921, 0.0922, ...], [ 0.1840, 0.1842, 0.1844, ...], [ 0.3679, 0.3684, 0.3689, ...], ... ], ... ] sum = 5.802429 ggml_debug: k-3 = (f16) VIEW(cache_k_l3{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.7314, 0.7324, 0.7334, ...], [ 2.9258, 2.9297, 2.9336, ...], [ 11.7031, 11.7188, 11.7344, ...], ... ], [ [ 0.8564, 0.8574, 0.8584, ...], [ 3.4258, 3.4297, 3.4336, ...], [ 13.7031, 13.7188, 13.7344, ...], ... ], [ [ 0.9814, 0.9824, 0.9834, ...], [ 3.9258, 3.9297, 3.9336, ...], [ 15.7031, 15.7188, 15.7344, ...], ... ], ... ] sum = 162.052734 ggml_debug: q-3 = (f32) PERMUTE(Qcur-3{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.0710, 3.9290, 7.9290, ...], [24575.9297, 24579.9297, 24583.9297, ...], [49151.9297, 49155.9297, 49159.9297, ...], ], [ [511.9290, 515.9290, 519.9290, ...], [25087.9297, 25091.9297, 25095.9297, ...], [49663.9297, 49667.9297, 49671.9297, ...], ], [ [1023.9290, 1027.9290, 1031.9290, ...], [25599.9297, 25603.9297, 25607.9297, ...], [50175.9297, 50179.9297, 50183.9297, ...], ], ... ] sum = 677482.187500 ggml_debug: kq-3 = (f32) MUL_MAT(k-3{128, 32, 8, 1}, q-3{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 5.6172, 9.6172, 13.6172, ...], [133.6172, 137.6172, 141.6172, ...], [261.6172, 265.6172, 269.6172, ...], ], [ [389.6172, 393.6172, 397.6172, ...], [517.6172, 521.6172, 525.6172, ...], [645.6172, 649.6172, 653.6172, ...], ], [ [773.6172, 777.6172, 781.6172, ...], [901.6172, 905.6172, 909.6172, ...], [1029.6172, 1033.6172, 1037.6172, ...], ], ... ] sum = 14083.664062 ggml_debug: kq_soft_max_ext-3 = (f32) SOFT_MAX(kq-3{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-3 = (f32) MUL_MAT(v-3{32, 128, 8, 1}, kq_soft_max_ext-3{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0920, 4.0920, 8.0920, ...], [512.0920, 516.0920, 520.0920, ...], [1024.0920, 1028.0920, 1032.0920, ...], ], [ [1536.0920, 1540.0920, 1544.0920, ...], [2048.0920, 2052.0920, 2056.0920, ...], [2560.0920, 2564.0920, 2568.0920, ...], ], [ [3072.0920, 3076.0920, 3080.0920, ...], [3584.0920, 3588.0920, 3592.0920, ...], [4096.0918, 4100.0918, 4104.0918, ...], ], ... ] sum = 55406.492188 ggml_debug: kqv_merged-3 = (f32) PERMUTE(kqv-3{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0920, 4.0920, 8.0920, ...], [1536.0920, 1540.0920, 1544.0920, ...], [3072.0920, 3076.0920, 3080.0920, ...], ... ], [ [512.0920, 516.0920, 520.0920, ...], [2048.0920, 2052.0920, 2056.0920, ...], [3584.0920, 3588.0920, 3592.0920, ...], ... ], [ [1024.0920, 1028.0920, 1032.0920, ...], [2560.0920, 2564.0920, 2568.0920, ...], [4096.0918, 4100.0918, 4104.0918, ...], ... ], ] sum = 55406.492188 ggml_debug: kqv_merged_cont-3 = (f32) CONT(kqv_merged-3{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0920, 4.0920, 8.0920, ...], [24576.0918, 24580.0918, 24584.0918, ...], [49152.0938, 49156.0938, 49160.0938, ...], ], ] sum = 221220.843750 ggml_debug: kqv_out-3 = (f32) MUL_MAT(blk.3.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-3{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0162, 3.9838, 7.9838, ...], [24575.9844, 24579.9844, 24583.9844, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: norm-3 = (f32) NORM(kqv_out-3{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.4369, 3.5631, 7.5631, ...], [24575.5625, 24579.5625, 24583.5625, ...], [49151.5625, 49155.5625, 49159.5625, ...], ], ] sum = 221216.062500 ggml_debug: attn_out_norm-3 = (f32) MUL(norm-3{6144, 3, 1, 1}, blk.3.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0559, 3.9441, 7.9441, ...], [24575.9434, 24579.9434, 24583.9434, ...], [49151.9453, 49155.9453, 49159.9453, ...], ], ] sum = 221219.500000 ggml_debug: ffn_moe_logits-3 = (f32) MUL_MAT(blk.3.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.1444, 4.1444, 8.1444, ...], [ 64.1444, 68.1444, 72.1444, ...], [128.1444, 132.1444, 136.1444, ...], ], ] sum = 613.299683 ggml_debug: ffn_moe_probs-3 = (f32) SOFT_MAX(ffn_moe_logits-3{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0693, 4.0693, 8.0693, ...], [ 64.0693, 68.0693, 72.0693, ...], [128.0693, 132.0693, 136.0693, ...], ], ] sum = 612.624023 ggml_debug: ffn_moe_argsort-3 = (i32) ARGSORT(ffn_moe_probs-3{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 12.0000, 16.0000, 20.0000, ...], [ 76.0000, 80.0000, 84.0000, ...], [140.0000, 144.0000, 148.0000, ...], ], ] sum = 720.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-3{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 12.0000, 16.0000, 20.0000, ...], [ 76.0000, 80.0000, 84.0000, ...], [140.0000, 144.0000, 148.0000, ...], ], ] sum = 720.000000 ggml_debug: ffn_moe_up-3 = (f32) MUL_MAT_ID(blk.3.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1560, 3.8440, 7.8440, ...], [43007.8438, 43011.8438, 43015.8438, ...], [86015.8438, 86019.8438, 86023.8438, ...], ], ] sum = 387106.593750 ggml_debug: ffn_moe_gate-3 = (f32) MUL_MAT_ID(blk.3.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1301, 4.1301, 8.1301, ...], [43008.1289, 43012.1289, 43016.1289, ...], [86016.1328, 86020.1328, 86024.1328, ...], ], ] sum = 387109.156250 ggml_debug: ffn_moe_silu-3 = (f32) UNARY(ffn_moe_gate-3{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0693, 4.0693, 8.0693, ...], [43008.0703, 43012.0703, 43016.0703, ...], [86016.0703, 86020.0703, 86024.0703, ...], ], ] sum = 387108.625000 ggml_debug: ffn_moe_gate_par-3 = (f32) MUL(ffn_moe_up-3{10752, 3, 1, 1}, ffn_moe_silu-3{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0108, 3.9892, 7.9892, ...], [43007.9883, 43011.9883, 43015.9883, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-3 = (f32) MUL_MAT_ID(blk.3.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-3{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0135, 4.0135, 8.0135, ...], [24576.0137, 24580.0137, 24584.0137, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.125000 ggml_debug: ffn_moe_probs-3 (reshaped) = (f32) RESHAPE(ffn_moe_probs-3{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0693], [ 4.0693], [ 8.0693], ... ], [ [ 64.0693], [ 68.0693], [ 72.0693], ... ], [ [128.0693], [132.0693], [136.0693], ... ], ] sum = 612.624023 ggml_debug: ffn_moe_weights-3 = (f32) GET_ROWS(ffn_moe_probs-3 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0885], [ 4.0885], [ 8.0885], ... ], [ [ 16.0885], [ 20.0885], [ 24.0885], ... ], [ [ 32.0885], [ 36.0885], [ 40.0885], ... ], ] sum = 180.796692 ggml_debug: ffn_moe_weights-3 (reshaped) = (f32) RESHAPE(ffn_moe_weights-3{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0885, 4.0885, 8.0885, ...], [ 16.0885, 20.0885, 24.0885, ...], [ 32.0885, 36.0885, 40.0885, ...], ], ] sum = 180.796692 ggml_debug: ffn_moe_weights_sum-3 = (f32) SUM_ROWS(ffn_moe_weights-3 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3257], [ 4.3257], [ 8.3257], ], ] sum = 12.977154 ggml_debug: ffn_moe_weights_norm-3 = (f32) DIV(ffn_moe_weights-3 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-3{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2718, 4.2718, 8.2718, ...], [ 16.2718, 20.2718, 24.2718, ...], [ 32.2718, 36.2718, 40.2718, ...], ], ] sum = 182.445969 ggml_debug: ffn_moe_weights_norm-3 (view) = (f32) VIEW(ffn_moe_weights_norm-3{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2718], [ 16.2718], [ 32.2718], ], ] sum = 48.815323 ggml_debug: ffn_moe_weighted-3 = (f32) MUL(ffn_moe_down-3{6144, 3, 1, 1}, ffn_moe_weights_norm-3 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0037, 4.0037, 8.0037, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_up-3 = (f32) MUL_MAT_ID(blk.3.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1200, 3.8800, 7.8800, ...], [43007.8789, 43011.8789, 43015.8789, ...], [86015.8828, 86019.8828, 86023.8828, ...], ], ] sum = 387106.906250 ggml_debug: ffn_moe_gate-3 = (f32) MUL_MAT_ID(blk.3.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1031, 3.8969, 7.8969, ...], [43007.8984, 43011.8984, 43015.8984, ...], [86015.8984, 86019.8984, 86023.8984, ...], ], ] sum = 387107.093750 ggml_debug: ffn_moe_silu-3 = (f32) UNARY(ffn_moe_gate-3{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0489, 3.9511, 7.9511, ...], [43007.9492, 43011.9492, 43015.9492, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_gate_par-3 = (f32) MUL(ffn_moe_up-3{10752, 3, 1, 1}, ffn_moe_silu-3{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0059, 4.0059, 8.0059, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_down-3 = (f32) MUL_MAT_ID(blk.3.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-3{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0028, 3.9972, 7.9972, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.984375 ggml_debug: ffn_moe_weights_norm-3 (view) = (f32) VIEW(ffn_moe_weights_norm-3{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2485], [ 16.2485], [ 32.2485], ], ] sum = 48.745445 ggml_debug: ffn_moe_weighted-3 = (f32) MUL(ffn_moe_down-3{6144, 3, 1, 1}, ffn_moe_weights_norm-3 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0007, 3.9993, 7.9993, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-3 = (f32) ADD(ffn_moe_weighted-3{6144, 3, 1, 1}, ffn_moe_weighted-3{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0030, 4.0030, 8.0030, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_up-3 = (f32) MUL_MAT_ID(blk.3.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0588, 4.0588, 8.0588, ...], [43008.0586, 43012.0586, 43016.0586, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_gate-3 = (f32) MUL_MAT_ID(blk.3.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0604, 3.9396, 7.9396, ...], [43007.9414, 43011.9414, 43015.9414, ...], [86015.9375, 86019.9375, 86023.9375, ...], ], ] sum = 387107.437500 ggml_debug: ffn_moe_silu-3 = (f32) UNARY(ffn_moe_gate-3{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0293, 3.9707, 7.9707, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9688, 86019.9688, 86023.9688, ...], ], ] sum = 387107.718750 ggml_debug: ffn_moe_gate_par-3 = (f32) MUL(ffn_moe_up-3{10752, 3, 1, 1}, ffn_moe_silu-3{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0017, 3.9983, 7.9983, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-3 = (f32) MUL_MAT_ID(blk.3.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-3{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0066, 3.9934, 7.9934, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_weights_norm-3 (view) = (f32) VIEW(ffn_moe_weights_norm-3{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2446], [ 16.2446], [ 32.2446], ], ] sum = 48.733711 ggml_debug: ffn_moe_weighted-3 = (f32) MUL(ffn_moe_down-3{6144, 3, 1, 1}, ffn_moe_weights_norm-3 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0016, 3.9984, 7.9984, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-3 = (f32) ADD(ffn_moe_out-3{6144, 3, 1, 1}, ffn_moe_weighted-3{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0014, 4.0014, 8.0014, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_up-3 = (f32) MUL_MAT_ID(blk.3.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0855, 4.0855, 8.0855, ...], [43008.0859, 43012.0859, 43016.0859, ...], [86016.0859, 86020.0859, 86024.0859, ...], ], ] sum = 387108.781250 ggml_debug: ffn_moe_gate-3 = (f32) MUL_MAT_ID(blk.3.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-3{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0904, 4.0904, 8.0904, ...], [43008.0898, 43012.0898, 43016.0898, ...], [86016.0938, 86020.0938, 86024.0938, ...], ], ] sum = 387108.843750 ggml_debug: ffn_moe_silu-3 = (f32) UNARY(ffn_moe_gate-3{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0472, 4.0472, 8.0472, ...], [43008.0469, 43012.0469, 43016.0469, ...], [86016.0469, 86020.0469, 86024.0469, ...], ], ] sum = 387108.437500 ggml_debug: ffn_moe_gate_par-3 = (f32) MUL(ffn_moe_up-3{10752, 3, 1, 1}, ffn_moe_silu-3{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0040, 4.0040, 8.0040, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.031250 ggml_debug: ffn_moe_down-3 = (f32) MUL_MAT_ID(blk.3.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-3{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0170, 4.0170, 8.0170, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_weights_norm-3 (view) = (f32) VIEW(ffn_moe_weights_norm-3{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2352], [ 16.2352], [ 32.2352], ], ] sum = 48.705517 ggml_debug: ffn_moe_weighted-3 = (f32) MUL(ffn_moe_down-3{6144, 3, 1, 1}, ffn_moe_weights_norm-3 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0040, 4.0040, 8.0040, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_out-3 = (f32) ADD(ffn_moe_out-3{6144, 3, 1, 1}, ffn_moe_weighted-3{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0054, 4.0054, 8.0054, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_inp-3 = (f32) ADD(kqv_out-3{6144, 3, 1, 1}, l_out-2{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0228, 3.9772, 7.9772, ...], [24575.9766, 24579.9766, 24583.9766, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: l_out-3 = (f32) ADD(ffn_moe_out-3{6144, 3, 1, 1}, ffn_inp-3{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0174, 3.9826, 7.9826, ...], [24575.9824, 24579.9824, 24583.9824, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: norm-4 = (f32) NORM(l_out-3{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1933, 3.8067, 7.8067, ...], [24575.8066, 24579.8066, 24583.8066, ...], [49151.8086, 49155.8086, 49159.8086, ...], ], ] sum = 221218.265625 ggml_debug: attn_norm-4 = (f32) MUL(norm-4{6144, 3, 1, 1}, blk.4.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0072, 3.9928, 7.9928, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: wqkv-4 = (f32) MUL_MAT(blk.4.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-4{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.0058, 3.9942, 7.9942, ...], [32767.9941, 32771.9961, 32775.9961, ...], [65535.9961, 65539.9922, 65543.9922, ...], ], ] sum = 294947.968750 ggml_debug: wqkv_clamped-4 = (f32) CLAMP(wqkv-4{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.0058, 3.9942, 7.9942, ...], [32767.9941, 32771.9961, 32775.9961, ...], [65535.9961, 65539.9922, 65543.9922, ...], ], ] sum = 294947.968750 ggml_debug: wqkv_clamped-4 (view) = (f32) VIEW(wqkv_clamped-4{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0058, 3.9942, 7.9942, ...], [32767.9941, 32771.9961, 32775.9961, ...], [65535.9961, 65539.9922, 65543.9922, ...], ], ] sum = 294947.968750 ggml_debug: Qcur-4 = (f32) CONT(wqkv_clamped-4 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0058, 3.9942, 7.9942, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.953125 ggml_debug: Qcur-4 (reshaped) = (f32) RESHAPE(Qcur-4{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0058, 3.9942, 7.9942, ...], [511.9942, 515.9941, 519.9941, ...], [1023.9941, 1027.9941, 1031.9941, ...], ... ], [ [24575.9941, 24579.9941, 24583.9941, ...], [25087.9941, 25091.9941, 25095.9941, ...], [25599.9941, 25603.9941, 25607.9941, ...], ... ], [ [49151.9961, 49155.9961, 49159.9961, ...], [49663.9961, 49667.9961, 49671.9961, ...], [50175.9961, 50179.9961, 50183.9961, ...], ... ], ] sum = 677483.875000 ggml_debug: Qcur-4 = (f32) ROPE(Qcur-4 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.0058, 3.9942, 7.9942, ...], [511.9942, 515.9941, 519.9941, ...], [1023.9941, 1027.9941, 1031.9941, ...], ... ], [ [24575.9941, 24579.9941, 24583.9941, ...], [25087.9941, 25091.9941, 25095.9941, ...], [25599.9941, 25603.9941, 25607.9941, ...], ... ], [ [49151.9961, 49155.9961, 49159.9961, ...], [49663.9961, 49667.9961, 49671.9961, ...], [50175.9961, 50179.9961, 50183.9961, ...], ... ], ] sum = 677483.875000 ggml_debug: wqkv_clamped-4 (view) = (f32) VIEW(wqkv_clamped-4{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.3614, 4.3614, 8.3614, ...], [32768.3633, 32772.3633, 32776.3633, ...], [65536.3594, 65540.3594, 65544.3594, ...], ], ] sum = 294951.250000 ggml_debug: Kcur-4 = (f32) CONT(wqkv_clamped-4 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.3614, 4.3614, 8.3614, ...], [4096.3613, 4100.3613, 4104.3613, ...], [8192.3613, 8196.3613, 8200.3613, ...], ], ] sum = 36903.250000 ggml_debug: Kcur-4 (reshaped) = (f32) RESHAPE(Kcur-4{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.3614, 4.3614, 8.3614, ...], [512.3615, 516.3615, 520.3615, ...], [1024.3615, 1028.3615, 1032.3615, ...], ... ], [ [4096.3613, 4100.3613, 4104.3613, ...], [4608.3613, 4612.3613, 4616.3613, ...], [5120.3613, 5124.3613, 5128.3613, ...], ... ], [ [8192.3613, 8196.3613, 8200.3613, ...], [8704.3613, 8708.3613, 8712.3613, ...], [9216.3613, 9220.3613, 9224.3613, ...], ... ], ] sum = 124533.734375 ggml_debug: Kcur-4 = (f32) ROPE(Kcur-4 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.3614, 4.3614, 8.3614, ...], [512.3615, 516.3615, 520.3615, ...], [1024.3615, 1028.3615, 1032.3615, ...], ... ], [ [4096.3613, 4100.3613, 4104.3613, ...], [4608.3613, 4612.3613, 4616.3613, ...], [5120.3613, 5124.3613, 5128.3613, ...], ... ], [ [8192.3613, 8196.3613, 8200.3613, ...], [8704.3613, 8708.3613, 8712.3613, ...], [9216.3613, 9220.3613, 9224.3613, ...], ... ], ] sum = 124533.734375 ggml_debug: wqkv_clamped-4 (view) = (f32) VIEW(wqkv_clamped-4{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0582, 3.9418, 7.9418, ...], [32767.9414, 32771.9414, 32775.9414, ...], [65535.9414, 65539.9453, 65543.9453, ...], ], ] sum = 294947.468750 ggml_debug: Vcur-4 = (f32) CONT(wqkv_clamped-4 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0582, 3.9418, 7.9418, ...], [4095.9419, 4099.9419, 4103.9419, ...], [8191.9419, 8195.9414, 8199.9414, ...], ], ] sum = 36899.476562 ggml_debug: k_cache_view-4 = (f16) VIEW(cache_k_l4{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-4 (copy of Kcur-4) = (f16) CPY(Kcur-4{128, 8, 3, 1}, k_cache_view-4{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.3613, 0.3618, 0.3623, ...], ], ] sum = 1.085449 ggml_debug: v_cur_t-4 = (f32) TRANSPOSE(Vcur-4{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0582, 4095.9419, 8191.9419], [ 3.9418, 4099.9419, 8195.9414], [ 7.9418, 4103.9419, 8199.9414], ... ], ] sum = 36899.476562 ggml_debug: v_cache_view-4 = (f16) VIEW(cache_v_l4{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-4 (copy of v_cur_t-4) = (f16) CPY(v_cur_t-4{3, 1024, 1, 1}, v_cache_view-4{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0582, -0.0582, -0.0583], [ -0.1163, -0.1165, -0.1166], [ -0.2327, -0.2329, -0.2332], ... ], ] sum = -1.222778 ggml_debug: v-4 = (f16) VIEW(cache_v_l4{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0582, -0.0582, -0.0583, ...], [ -0.1163, -0.1165, -0.1166, ...], [ -0.2327, -0.2329, -0.2332, ...], ... ], [ [ -0.0582, -0.0582, -0.0583, ...], [ -0.1163, -0.1165, -0.1166, ...], [ -0.2327, -0.2329, -0.2332, ...], ... ], [ [ -0.0582, -0.0582, -0.0583, ...], [ -0.1163, -0.1165, -0.1166, ...], [ -0.2327, -0.2329, -0.2332, ...], ... ], ... ] sum = -3.668335 ggml_debug: k-4 = (f16) VIEW(cache_k_l4{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.3613, 0.3618, 0.3623, ...], [ 1.4453, 1.4473, 1.4492, ...], [ 5.7812, 5.7891, 5.7969, ...], ... ], [ [ 0.4238, 0.4243, 0.4248, ...], [ 1.6953, 1.6973, 1.6992, ...], [ 6.7812, 6.7891, 6.7969, ...], ... ], [ [ 0.4863, 0.4868, 0.4873, ...], [ 1.9453, 1.9473, 1.9492, ...], [ 7.7812, 7.7891, 7.7969, ...], ... ], ... ] sum = 80.195801 ggml_debug: q-4 = (f32) PERMUTE(Qcur-4{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.0058, 3.9942, 7.9942, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], [ [511.9942, 515.9941, 519.9941, ...], [25087.9941, 25091.9941, 25095.9941, ...], [49663.9961, 49667.9961, 49671.9961, ...], ], [ [1023.9941, 1027.9941, 1031.9941, ...], [25599.9941, 25603.9941, 25607.9941, ...], [50175.9961, 50179.9961, 50183.9961, ...], ], ... ] sum = 677483.937500 ggml_debug: kq-4 = (f32) MUL_MAT(k-4{128, 32, 8, 1}, q-4{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 4.1367, 8.1367, 12.1367, ...], [132.1367, 136.1367, 140.1367, ...], [260.1367, 264.1367, 268.1367, ...], ], [ [388.1367, 392.1367, 396.1367, ...], [516.1367, 520.1367, 524.1367, ...], [644.1367, 648.1367, 652.1367, ...], ], [ [772.1367, 776.1367, 780.1367, ...], [900.1367, 904.1367, 908.1367, ...], [1028.1367, 1032.1367, 1036.1367, ...], ], ... ] sum = 14043.691406 ggml_debug: kq_soft_max_ext-4 = (f32) SOFT_MAX(kq-4{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-4 = (f32) MUL_MAT(v-4{32, 128, 8, 1}, kq_soft_max_ext-4{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0582, 3.9418, 7.9418, ...], [511.9418, 515.9418, 519.9418, ...], [1023.9418, 1027.9419, 1031.9419, ...], ], [ [1535.9419, 1539.9419, 1543.9419, ...], [2047.9419, 2051.9419, 2055.9419, ...], [2559.9419, 2563.9419, 2567.9419, ...], ], [ [3071.9419, 3075.9419, 3079.9419, ...], [3583.9419, 3587.9419, 3591.9419, ...], [4095.9419, 4099.9419, 4103.9419, ...], ], ... ] sum = 55402.425781 ggml_debug: kqv_merged-4 = (f32) PERMUTE(kqv-4{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0582, 3.9418, 7.9418, ...], [1535.9419, 1539.9419, 1543.9419, ...], [3071.9419, 3075.9419, 3079.9419, ...], ... ], [ [511.9418, 515.9418, 519.9418, ...], [2047.9419, 2051.9419, 2055.9419, ...], [3583.9419, 3587.9419, 3591.9419, ...], ... ], [ [1023.9418, 1027.9419, 1031.9419, ...], [2559.9419, 2563.9419, 2567.9419, ...], [4095.9419, 4099.9419, 4103.9419, ...], ... ], ] sum = 55402.421875 ggml_debug: kqv_merged_cont-4 = (f32) CONT(kqv_merged-4{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0582, 3.9418, 7.9418, ...], [24575.9414, 24579.9414, 24583.9414, ...], [49151.9414, 49155.9414, 49159.9414, ...], ], ] sum = 221219.468750 ggml_debug: kqv_out-4 = (f32) MUL_MAT(blk.4.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-4{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0492, 4.0492, 8.0492, ...], [24576.0488, 24580.0488, 24584.0488, ...], [49152.0508, 49156.0508, 49160.0508, ...], ], ] sum = 221220.437500 ggml_debug: norm-4 = (f32) NORM(kqv_out-4{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.5389, 4.5389, 8.5389, ...], [24576.5391, 24580.5391, 24584.5391, ...], [49152.5391, 49156.5391, 49160.5391, ...], ], ] sum = 221224.843750 ggml_debug: attn_out_norm-4 = (f32) MUL(norm-4{6144, 3, 1, 1}, blk.4.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0826, 4.0826, 8.0826, ...], [24576.0820, 24580.0820, 24584.0820, ...], [49152.0820, 49156.0820, 49160.0820, ...], ], ] sum = 221220.734375 ggml_debug: ffn_moe_logits-4 = (f32) MUL_MAT(blk.4.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.1753, 3.8247, 7.8247, ...], [ 63.8247, 67.8247, 71.8247, ...], [127.8247, 131.8247, 135.8247, ...], ], ] sum = 610.422363 ggml_debug: ffn_moe_probs-4 = (f32) SOFT_MAX(ffn_moe_logits-4{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0538, 4.0538, 8.0538, ...], [ 64.0538, 68.0538, 72.0538, ...], [128.0538, 132.0538, 136.0538, ...], ], ] sum = 612.484192 ggml_debug: ffn_moe_argsort-4 = (i32) ARGSORT(ffn_moe_probs-4{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 15.0000, 19.0000, 23.0000, ...], [ 79.0000, 83.0000, 87.0000, ...], [143.0000, 147.0000, 151.0000, ...], ], ] sum = 747.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-4{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 15.0000, 19.0000, 23.0000, ...], [ 79.0000, 83.0000, 87.0000, ...], [143.0000, 147.0000, 151.0000, ...], ], ] sum = 747.000000 ggml_debug: ffn_moe_up-4 = (f32) MUL_MAT_ID(blk.4.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0137, 4.0137, 8.0137, ...], [43008.0156, 43012.0156, 43016.0156, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_gate-4 = (f32) MUL_MAT_ID(blk.4.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2344, 4.2344, 8.2344, ...], [43008.2344, 43012.2344, 43016.2344, ...], [86016.2344, 86020.2344, 86024.2344, ...], ], ] sum = 387110.125000 ggml_debug: ffn_moe_silu-4 = (f32) UNARY(ffn_moe_gate-4{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1308, 4.1308, 8.1308, ...], [43008.1289, 43012.1289, 43016.1289, ...], [86016.1328, 86020.1328, 86024.1328, ...], ], ] sum = 387109.156250 ggml_debug: ffn_moe_gate_par-4 = (f32) MUL(ffn_moe_up-4{10752, 3, 1, 1}, ffn_moe_silu-4{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0018, 4.0018, 8.0018, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-4 = (f32) MUL_MAT_ID(blk.4.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-4{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0635, 4.0635, 8.0635, ...], [24576.0625, 24580.0625, 24584.0625, ...], [49152.0625, 49156.0625, 49160.0625, ...], ], ] sum = 221220.562500 ggml_debug: ffn_moe_probs-4 (reshaped) = (f32) RESHAPE(ffn_moe_probs-4{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0538], [ 4.0538], [ 8.0538], ... ], [ [ 64.0538], [ 68.0538], [ 72.0538], ... ], [ [128.0538], [132.0538], [136.0538], ... ], ] sum = 612.484192 ggml_debug: ffn_moe_weights-4 = (f32) GET_ROWS(ffn_moe_probs-4 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1026], [ 4.1026], [ 8.1026], ... ], [ [ 16.1026], [ 20.1026], [ 24.1026], ... ], [ [ 32.1026], [ 36.1026], [ 40.1026], ... ], ] sum = 180.923737 ggml_debug: ffn_moe_weights-4 (reshaped) = (f32) RESHAPE(ffn_moe_weights-4{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1026, 4.1026, 8.1026, ...], [ 16.1026, 20.1026, 24.1026, ...], [ 32.1026, 36.1026, 40.1026, ...], ], ] sum = 180.923737 ggml_debug: ffn_moe_weights_sum-4 = (f32) SUM_ROWS(ffn_moe_weights-4 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3314], [ 4.3314], [ 8.3314], ], ] sum = 12.994082 ggml_debug: ffn_moe_weights_norm-4 = (f32) DIV(ffn_moe_weights-4 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-4{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3097, 4.3097, 8.3097, ...], [ 16.3097, 20.3097, 24.3097, ...], [ 32.3097, 36.3097, 40.3097, ...], ], ] sum = 182.787720 ggml_debug: ffn_moe_weights_norm-4 (view) = (f32) VIEW(ffn_moe_weights_norm-4{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3097], [ 16.3097], [ 32.3097], ], ] sum = 48.929241 ggml_debug: ffn_moe_weighted-4 = (f32) MUL(ffn_moe_down-4{6144, 3, 1, 1}, ffn_moe_weights_norm-4 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0197, 4.0197, 8.0197, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: ffn_moe_up-4 = (f32) MUL_MAT_ID(blk.4.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4564, 3.5436, 7.5436, ...], [43007.5430, 43011.5430, 43015.5430, ...], [86015.5469, 86019.5469, 86023.5469, ...], ], ] sum = 387103.937500 ggml_debug: ffn_moe_gate-4 = (f32) MUL_MAT_ID(blk.4.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1450, 4.1450, 8.1450, ...], [43008.1445, 43012.1445, 43016.1445, ...], [86016.1484, 86020.1484, 86024.1484, ...], ], ] sum = 387109.312500 ggml_debug: ffn_moe_silu-4 = (f32) UNARY(ffn_moe_gate-4{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0778, 4.0778, 8.0778, ...], [43008.0781, 43012.0781, 43016.0781, ...], [86016.0781, 86020.0781, 86024.0781, ...], ], ] sum = 387108.687500 ggml_debug: ffn_moe_gate_par-4 = (f32) MUL(ffn_moe_up-4{10752, 3, 1, 1}, ffn_moe_silu-4{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0355, 3.9645, 7.9645, ...], [43007.9648, 43011.9648, 43015.9648, ...], [86015.9609, 86019.9609, 86023.9609, ...], ], ] sum = 387107.687500 ggml_debug: ffn_moe_down-4 = (f32) MUL_MAT_ID(blk.4.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-4{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0344, 4.0344, 8.0344, ...], [24576.0352, 24580.0352, 24584.0352, ...], [49152.0352, 49156.0352, 49160.0352, ...], ], ] sum = 221220.296875 ggml_debug: ffn_moe_weights_norm-4 (view) = (f32) VIEW(ffn_moe_weights_norm-4{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2486], [ 16.2486], [ 32.2486], ], ] sum = 48.745754 ggml_debug: ffn_moe_weighted-4 = (f32) MUL(ffn_moe_down-4{6144, 3, 1, 1}, ffn_moe_weights_norm-4 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0086, 4.0086, 8.0086, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_out-4 = (f32) ADD(ffn_moe_weighted-4{6144, 3, 1, 1}, ffn_moe_weighted-4{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0282, 4.0282, 8.0282, ...], [24576.0273, 24580.0273, 24584.0273, ...], [49152.0273, 49156.0273, 49160.0273, ...], ], ] sum = 221220.265625 ggml_debug: ffn_moe_up-4 = (f32) MUL_MAT_ID(blk.4.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2385, 3.7615, 7.7615, ...], [43007.7617, 43011.7617, 43015.7617, ...], [86015.7578, 86019.7578, 86023.7578, ...], ], ] sum = 387105.843750 ggml_debug: ffn_moe_gate-4 = (f32) MUL_MAT_ID(blk.4.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2650, 4.2650, 8.2650, ...], [43008.2656, 43012.2656, 43016.2656, ...], [86016.2656, 86020.2656, 86024.2656, ...], ], ] sum = 387110.375000 ggml_debug: ffn_moe_silu-4 = (f32) UNARY(ffn_moe_gate-4{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1500, 4.1500, 8.1500, ...], [43008.1484, 43012.1484, 43016.1484, ...], [86016.1484, 86020.1484, 86024.1484, ...], ], ] sum = 387109.343750 ggml_debug: ffn_moe_gate_par-4 = (f32) MUL(ffn_moe_up-4{10752, 3, 1, 1}, ffn_moe_silu-4{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0358, 3.9642, 7.9642, ...], [43007.9648, 43011.9648, 43015.9648, ...], [86015.9609, 86019.9609, 86023.9609, ...], ], ] sum = 387107.687500 ggml_debug: ffn_moe_down-4 = (f32) MUL_MAT_ID(blk.4.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-4{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0207, 3.9793, 7.9793, ...], [24575.9785, 24579.9785, 24583.9785, ...], [49151.9805, 49155.9805, 49159.9805, ...], ], ] sum = 221219.828125 ggml_debug: ffn_moe_weights_norm-4 (view) = (f32) VIEW(ffn_moe_weights_norm-4{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2273], [ 16.2273], [ 32.2273], ], ] sum = 48.681927 ggml_debug: ffn_moe_weighted-4 = (f32) MUL(ffn_moe_down-4{6144, 3, 1, 1}, ffn_moe_weights_norm-4 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0047, 3.9953, 7.9953, ...], [24575.9961, 24579.9961, 24583.9961, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: ffn_moe_out-4 = (f32) ADD(ffn_moe_out-4{6144, 3, 1, 1}, ffn_moe_weighted-4{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0235, 4.0235, 8.0235, ...], [24576.0234, 24580.0234, 24584.0234, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: ffn_moe_up-4 = (f32) MUL_MAT_ID(blk.4.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1635, 4.1635, 8.1635, ...], [43008.1641, 43012.1641, 43016.1641, ...], [86016.1641, 86020.1641, 86024.1641, ...], ], ] sum = 387109.468750 ggml_debug: ffn_moe_gate-4 = (f32) MUL_MAT_ID(blk.4.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-4{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0524, 3.9476, 7.9476, ...], [43007.9492, 43011.9492, 43015.9492, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.500000 ggml_debug: ffn_moe_silu-4 = (f32) UNARY(ffn_moe_gate-4{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0255, 3.9745, 7.9745, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.750000 ggml_debug: ffn_moe_gate_par-4 = (f32) MUL(ffn_moe_up-4{10752, 3, 1, 1}, ffn_moe_silu-4{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0042, 3.9958, 7.9958, ...], [43007.9961, 43011.9961, 43015.9961, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.968750 ggml_debug: ffn_moe_down-4 = (f32) MUL_MAT_ID(blk.4.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-4{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0113, 3.9887, 7.9887, ...], [24575.9883, 24579.9883, 24583.9883, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_weights_norm-4 (view) = (f32) VIEW(ffn_moe_weights_norm-4{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2144], [ 16.2144], [ 32.2144], ], ] sum = 48.643078 ggml_debug: ffn_moe_weighted-4 = (f32) MUL(ffn_moe_down-4{6144, 3, 1, 1}, ffn_moe_weights_norm-4 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0024, 3.9976, 7.9976, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.984375 ggml_debug: ffn_moe_out-4 = (f32) ADD(ffn_moe_out-4{6144, 3, 1, 1}, ffn_moe_weighted-4{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0211, 4.0211, 8.0211, ...], [24576.0215, 24580.0215, 24584.0215, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: ffn_inp-4 = (f32) ADD(kqv_out-4{6144, 3, 1, 1}, l_out-3{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0318, 4.0318, 8.0318, ...], [24576.0312, 24580.0312, 24584.0312, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: l_out-4 = (f32) ADD(ffn_moe_out-4{6144, 3, 1, 1}, ffn_inp-4{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0529, 4.0529, 8.0529, ...], [24576.0527, 24580.0527, 24584.0527, ...], [49152.0547, 49156.0547, 49160.0547, ...], ], ] sum = 221220.500000 ggml_debug: norm-5 = (f32) NORM(l_out-4{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.5222, 4.5222, 8.5222, ...], [24576.5215, 24580.5215, 24584.5215, ...], [49152.5234, 49156.5234, 49160.5234, ...], ], ] sum = 221224.718750 ggml_debug: attn_norm-5 = (f32) MUL(norm-5{6144, 3, 1, 1}, blk.5.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0398, 4.0398, 8.0398, ...], [24576.0391, 24580.0391, 24584.0391, ...], [49152.0391, 49156.0391, 49160.0391, ...], ], ] sum = 221220.343750 ggml_debug: wqkv-5 = (f32) MUL_MAT(blk.5.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-5{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.4616, 4.4616, 8.4616, ...], [32768.4609, 32772.4609, 32776.4609, ...], [65536.4609, 65540.4609, 65544.4609, ...], ], ] sum = 294952.156250 ggml_debug: wqkv_clamped-5 = (f32) CLAMP(wqkv-5{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.4616, 4.4616, 8.4616, ...], [32768.4609, 32772.4609, 32776.4609, ...], [65536.4609, 65540.4609, 65544.4609, ...], ], ] sum = 294952.156250 ggml_debug: wqkv_clamped-5 (view) = (f32) VIEW(wqkv_clamped-5{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4616, 4.4616, 8.4616, ...], [32768.4609, 32772.4609, 32776.4609, ...], [65536.4609, 65540.4609, 65544.4609, ...], ], ] sum = 294952.156250 ggml_debug: Qcur-5 = (f32) CONT(wqkv_clamped-5 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4616, 4.4616, 8.4616, ...], [24576.4609, 24580.4609, 24584.4609, ...], [49152.4609, 49156.4609, 49160.4609, ...], ], ] sum = 221224.156250 ggml_debug: Qcur-5 (reshaped) = (f32) RESHAPE(Qcur-5{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.4616, 4.4616, 8.4616, ...], [512.4616, 516.4616, 520.4616, ...], [1024.4617, 1028.4617, 1032.4617, ...], ... ], [ [24576.4609, 24580.4609, 24584.4609, ...], [25088.4609, 25092.4609, 25096.4609, ...], [25600.4609, 25604.4609, 25608.4609, ...], ... ], [ [49152.4609, 49156.4609, 49160.4609, ...], [49664.4609, 49668.4609, 49672.4609, ...], [50176.4609, 50180.4609, 50184.4609, ...], ... ], ] sum = 677496.437500 ggml_debug: Qcur-5 = (f32) ROPE(Qcur-5 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.4616, 4.4616, 8.4616, ...], [512.4616, 516.4616, 520.4616, ...], [1024.4617, 1028.4617, 1032.4617, ...], ... ], [ [24576.4609, 24580.4609, 24584.4609, ...], [25088.4609, 25092.4609, 25096.4609, ...], [25600.4609, 25604.4609, 25608.4609, ...], ... ], [ [49152.4609, 49156.4609, 49160.4609, ...], [49664.4609, 49668.4609, 49672.4609, ...], [50176.4609, 50180.4609, 50184.4609, ...], ... ], ] sum = 677496.437500 ggml_debug: wqkv_clamped-5 (view) = (f32) VIEW(wqkv_clamped-5{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.7017, 6.7017, 10.7017, ...], [32770.7031, 32774.7031, 32778.7031, ...], [65538.7031, 65542.7031, 65546.7031, ...], ], ] sum = 294972.312500 ggml_debug: Kcur-5 = (f32) CONT(wqkv_clamped-5 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.7017, 6.7017, 10.7017, ...], [4098.7017, 4102.7017, 4106.7017, ...], [8194.7021, 8198.7021, 8202.7021, ...], ], ] sum = 36924.316406 ggml_debug: Kcur-5 (reshaped) = (f32) RESHAPE(Kcur-5{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 2.7017, 6.7017, 10.7017, ...], [514.7017, 518.7017, 522.7017, ...], [1026.7017, 1030.7017, 1034.7017, ...], ... ], [ [4098.7017, 4102.7017, 4106.7017, ...], [4610.7017, 4614.7017, 4618.7017, ...], [5122.7017, 5126.7017, 5130.7017, ...], ... ], [ [8194.7021, 8198.7021, 8202.7021, ...], [8706.7021, 8710.7021, 8714.7021, ...], [9218.7021, 9222.7021, 9226.7021, ...], ... ], ] sum = 124596.960938 ggml_debug: Kcur-5 = (f32) ROPE(Kcur-5 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 2.7017, 6.7017, 10.7017, ...], [514.7017, 518.7017, 522.7017, ...], [1026.7017, 1030.7017, 1034.7017, ...], ... ], [ [4098.7017, 4102.7017, 4106.7017, ...], [4610.7017, 4614.7017, 4618.7017, ...], [5122.7017, 5126.7017, 5130.7017, ...], ... ], [ [8194.7021, 8198.7021, 8202.7021, ...], [8706.7021, 8710.7021, 8714.7021, ...], [9218.7021, 9222.7021, 9226.7021, ...], ... ], ] sum = 124596.960938 ggml_debug: wqkv_clamped-5 (view) = (f32) VIEW(wqkv_clamped-5{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0595, 3.9405, 7.9405, ...], [32767.9414, 32771.9414, 32775.9414, ...], [65535.9414, 65539.9375, 65543.9375, ...], ], ] sum = 294947.437500 ggml_debug: Vcur-5 = (f32) CONT(wqkv_clamped-5 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0595, 3.9405, 7.9405, ...], [4095.9404, 4099.9404, 4103.9404, ...], [8191.9404, 8195.9404, 8199.9404, ...], ], ] sum = 36899.464844 ggml_debug: k_cache_view-5 = (f16) VIEW(cache_k_l5{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-5 (copy of Kcur-5) = (f16) CPY(Kcur-5{128, 8, 3, 1}, k_cache_view-5{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 2.7012, 2.7051, 2.7090, ...], ], ] sum = 8.115234 ggml_debug: v_cur_t-5 = (f32) TRANSPOSE(Vcur-5{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0595, 4095.9404, 8191.9404], [ 3.9405, 4099.9404, 8195.9404], [ 7.9405, 4103.9404, 8199.9404], ... ], ] sum = 36899.468750 ggml_debug: v_cache_view-5 = (f16) VIEW(cache_v_l5{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-5 (copy of v_cur_t-5) = (f16) CPY(v_cur_t-5{3, 1024, 1, 1}, v_cache_view-5{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0594, -0.0595, -0.0596], [ -0.1189, -0.1190, -0.1191], [ -0.2378, -0.2380, -0.2383], ... ], ] sum = -1.249695 ggml_debug: v-5 = (f16) VIEW(cache_v_l5{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0594, -0.0595, -0.0596, ...], [ -0.1189, -0.1190, -0.1191, ...], [ -0.2378, -0.2380, -0.2383, ...], ... ], [ [ -0.0594, -0.0595, -0.0596, ...], [ -0.1189, -0.1190, -0.1191, ...], [ -0.2378, -0.2380, -0.2383, ...], ... ], [ [ -0.0594, -0.0595, -0.0596, ...], [ -0.1189, -0.1190, -0.1191, ...], [ -0.2378, -0.2380, -0.2383, ...], ... ], ... ] sum = -3.749084 ggml_debug: k-5 = (f16) VIEW(cache_k_l5{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 2.7012, 2.7051, 2.7090, ...], [ 10.8047, 10.8203, 10.8359, ...], [ 43.2188, 43.2812, 43.3438, ...], ... ], [ [ 3.2012, 3.2051, 3.2090, ...], [ 12.8047, 12.8203, 12.8359, ...], [ 51.2188, 51.2812, 51.3438, ...], ... ], [ [ 3.7012, 3.7051, 3.7090, ...], [ 14.8047, 14.8203, 14.8359, ...], [ 59.2188, 59.2812, 59.3438, ...], ... ], ... ] sum = 605.759766 ggml_debug: q-5 = (f32) PERMUTE(Qcur-5{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.4616, 4.4616, 8.4616, ...], [24576.4609, 24580.4609, 24584.4609, ...], [49152.4609, 49156.4609, 49160.4609, ...], ], [ [512.4616, 516.4616, 520.4616, ...], [25088.4609, 25092.4609, 25096.4609, ...], [49664.4609, 49668.4609, 49672.4609, ...], ], [ [1024.4617, 1028.4617, 1032.4617, ...], [25600.4609, 25604.4609, 25608.4609, ...], [50176.4609, 50180.4609, 50184.4609, ...], ], ... ] sum = 677496.500000 ggml_debug: kq-5 = (f32) MUL_MAT(k-5{128, 32, 8, 1}, q-5{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 26.5312, 30.5312, 34.5312, ...], [154.5312, 158.5312, 162.5312, ...], [282.5312, 286.5312, 290.5312, ...], ], [ [410.5312, 414.5312, 418.5312, ...], [538.5312, 542.5312, 546.5312, ...], [666.5312, 670.5312, 674.5312, ...], ], [ [794.5312, 798.5312, 802.5312, ...], [922.5312, 926.5312, 930.5312, ...], [1050.5312, 1054.5312, 1058.5312, ...], ], ... ] sum = 14648.343750 ggml_debug: kq_soft_max_ext-5 = (f32) SOFT_MAX(kq-5{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-5 = (f32) MUL_MAT(v-5{32, 128, 8, 1}, kq_soft_max_ext-5{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0594, 3.9406, 7.9406, ...], [511.9406, 515.9406, 519.9406, ...], [1023.9406, 1027.9406, 1031.9406, ...], ], [ [1535.9406, 1539.9406, 1543.9406, ...], [2047.9406, 2051.9404, 2055.9404, ...], [2559.9404, 2563.9404, 2567.9404, ...], ], [ [3071.9404, 3075.9404, 3079.9404, ...], [3583.9404, 3587.9404, 3591.9404, ...], [4095.9404, 4099.9404, 4103.9404, ...], ], ... ] sum = 55402.402344 ggml_debug: kqv_merged-5 = (f32) PERMUTE(kqv-5{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0594, 3.9406, 7.9406, ...], [1535.9406, 1539.9406, 1543.9406, ...], [3071.9404, 3075.9404, 3079.9404, ...], ... ], [ [511.9406, 515.9406, 519.9406, ...], [2047.9406, 2051.9404, 2055.9404, ...], [3583.9404, 3587.9404, 3591.9404, ...], ... ], [ [1023.9406, 1027.9406, 1031.9406, ...], [2559.9404, 2563.9404, 2567.9404, ...], [4095.9404, 4099.9404, 4103.9404, ...], ... ], ] sum = 55402.406250 ggml_debug: kqv_merged_cont-5 = (f32) CONT(kqv_merged-5{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0594, 3.9406, 7.9406, ...], [24575.9414, 24579.9414, 24583.9414, ...], [49151.9414, 49155.9414, 49159.9414, ...], ], ] sum = 221219.453125 ggml_debug: kqv_out-5 = (f32) MUL_MAT(blk.5.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-5{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0840, 3.9160, 7.9160, ...], [24575.9160, 24579.9160, 24583.9160, ...], [49151.9180, 49155.9180, 49159.9180, ...], ], ] sum = 221219.250000 ggml_debug: norm-5 = (f32) NORM(kqv_out-5{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.3984, 2.6016, 6.6016, ...], [24574.6016, 24578.6016, 24582.6016, ...], [49150.6016, 49154.6016, 49158.6016, ...], ], ] sum = 221207.406250 ggml_debug: attn_out_norm-5 = (f32) MUL(norm-5{6144, 3, 1, 1}, blk.5.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2390, 3.7610, 7.7610, ...], [24575.7617, 24579.7617, 24583.7617, ...], [49151.7617, 49155.7617, 49159.7617, ...], ], ] sum = 221217.859375 ggml_debug: ffn_moe_logits-5 = (f32) MUL_MAT(blk.5.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.0071, 3.9929, 7.9929, ...], [ 63.9929, 67.9929, 71.9929, ...], [127.9929, 131.9929, 135.9929, ...], ], ] sum = 611.936401 ggml_debug: ffn_moe_probs-5 = (f32) SOFT_MAX(ffn_moe_logits-5{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0617, 4.0617, 8.0617, ...], [ 64.0617, 68.0617, 72.0617, ...], [128.0617, 132.0617, 136.0617, ...], ], ] sum = 612.554932 ggml_debug: ffn_moe_argsort-5 = (i32) ARGSORT(ffn_moe_probs-5{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 2.0000, 6.0000, 10.0000, ...], [ 66.0000, 70.0000, 74.0000, ...], [130.0000, 134.0000, 138.0000, ...], ], ] sum = 630.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-5{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 2.0000, 6.0000, 10.0000, ...], [ 66.0000, 70.0000, 74.0000, ...], [130.0000, 134.0000, 138.0000, ...], ], ] sum = 630.000000 ggml_debug: ffn_moe_up-5 = (f32) MUL_MAT_ID(blk.5.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0763, 3.9237, 7.9237, ...], [43007.9219, 43011.9219, 43015.9219, ...], [86015.9219, 86019.9219, 86023.9219, ...], ], ] sum = 387107.312500 ggml_debug: ffn_moe_gate-5 = (f32) MUL_MAT_ID(blk.5.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1923, 4.1923, 8.1923, ...], [43008.1914, 43012.1914, 43016.1914, ...], [86016.1953, 86020.1953, 86024.1953, ...], ], ] sum = 387109.718750 ggml_debug: ffn_moe_silu-5 = (f32) UNARY(ffn_moe_gate-5{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1054, 4.1054, 8.1054, ...], [43008.1055, 43012.1055, 43016.1055, ...], [86016.1016, 86020.1016, 86024.1016, ...], ], ] sum = 387108.937500 ggml_debug: ffn_moe_gate_par-5 = (f32) MUL(ffn_moe_up-5{10752, 3, 1, 1}, ffn_moe_silu-5{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0080, 3.9920, 7.9920, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-5 = (f32) MUL_MAT_ID(blk.5.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-5{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0075, 4.0075, 8.0075, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_probs-5 (reshaped) = (f32) RESHAPE(ffn_moe_probs-5{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0617], [ 4.0617], [ 8.0617], ... ], [ [ 64.0617], [ 68.0617], [ 72.0617], ... ], [ [128.0617], [132.0617], [136.0617], ... ], ] sum = 612.554932 ggml_debug: ffn_moe_weights-5 = (f32) GET_ROWS(ffn_moe_probs-5 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0901], [ 4.0901], [ 8.0901], ... ], [ [ 16.0901], [ 20.0901], [ 24.0901], ... ], [ [ 32.0901], [ 36.0901], [ 40.0901], ... ], ] sum = 180.810791 ggml_debug: ffn_moe_weights-5 (reshaped) = (f32) RESHAPE(ffn_moe_weights-5{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0901, 4.0901, 8.0901, ...], [ 16.0901, 20.0901, 24.0901, ...], [ 32.0901, 36.0901, 40.0901, ...], ], ] sum = 180.810791 ggml_debug: ffn_moe_weights_sum-5 = (f32) SUM_ROWS(ffn_moe_weights-5 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3095], [ 4.3095], [ 8.3095], ], ] sum = 12.928485 ggml_debug: ffn_moe_weights_norm-5 = (f32) DIV(ffn_moe_weights-5 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-5{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2911, 4.2911, 8.2911, ...], [ 16.2911, 20.2911, 24.2911, ...], [ 32.2911, 36.2911, 40.2911, ...], ], ] sum = 182.619751 ggml_debug: ffn_moe_weights_norm-5 (view) = (f32) VIEW(ffn_moe_weights_norm-5{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2911], [ 16.2911], [ 32.2911], ], ] sum = 48.873253 ggml_debug: ffn_moe_weighted-5 = (f32) MUL(ffn_moe_down-5{6144, 3, 1, 1}, ffn_moe_weights_norm-5 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0022, 4.0022, 8.0022, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_up-5 = (f32) MUL_MAT_ID(blk.5.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0281, 4.0281, 8.0281, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_gate-5 = (f32) MUL_MAT_ID(blk.5.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0214, 4.0214, 8.0214, ...], [43008.0195, 43012.0195, 43016.0195, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.218750 ggml_debug: ffn_moe_silu-5 = (f32) UNARY(ffn_moe_gate-5{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0108, 4.0108, 8.0108, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_gate_par-5 = (f32) MUL(ffn_moe_up-5{10752, 3, 1, 1}, ffn_moe_silu-5{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0003, 4.0003, 8.0003, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-5 = (f32) MUL_MAT_ID(blk.5.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-5{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0178, 4.0178, 8.0178, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: ffn_moe_weights_norm-5 (view) = (f32) VIEW(ffn_moe_weights_norm-5{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2411], [ 16.2411], [ 32.2411], ], ] sum = 48.723335 ggml_debug: ffn_moe_weighted-5 = (f32) MUL(ffn_moe_down-5{6144, 3, 1, 1}, ffn_moe_weights_norm-5 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0043, 4.0043, 8.0043, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_out-5 = (f32) ADD(ffn_moe_weighted-5{6144, 3, 1, 1}, ffn_moe_weighted-5{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0065, 4.0065, 8.0065, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_up-5 = (f32) MUL_MAT_ID(blk.5.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3764, 3.6236, 7.6236, ...], [43007.6250, 43011.6250, 43015.6250, ...], [86015.6250, 86019.6250, 86023.6250, ...], ], ] sum = 387104.625000 ggml_debug: ffn_moe_gate-5 = (f32) MUL_MAT_ID(blk.5.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0441, 3.9559, 7.9559, ...], [43007.9570, 43011.9570, 43015.9570, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_silu-5 = (f32) UNARY(ffn_moe_gate-5{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0215, 3.9785, 7.9785, ...], [43007.9766, 43011.9766, 43015.9766, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.781250 ggml_debug: ffn_moe_gate_par-5 = (f32) MUL(ffn_moe_up-5{10752, 3, 1, 1}, ffn_moe_silu-5{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0081, 4.0081, 8.0081, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_down-5 = (f32) MUL_MAT_ID(blk.5.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-5{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0668, 4.0668, 8.0668, ...], [24576.0664, 24580.0664, 24584.0664, ...], [49152.0664, 49156.0664, 49160.0664, ...], ], ] sum = 221220.593750 ggml_debug: ffn_moe_weights_norm-5 (view) = (f32) VIEW(ffn_moe_weights_norm-5{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2360], [ 16.2360], [ 32.2360], ], ] sum = 48.707874 ggml_debug: ffn_moe_weighted-5 = (f32) MUL(ffn_moe_down-5{6144, 3, 1, 1}, ffn_moe_weights_norm-5 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0158, 4.0158, 8.0158, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_out-5 = (f32) ADD(ffn_moe_out-5{6144, 3, 1, 1}, ffn_moe_weighted-5{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0222, 4.0222, 8.0222, ...], [24576.0215, 24580.0215, 24584.0215, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: ffn_moe_up-5 = (f32) MUL_MAT_ID(blk.5.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2038, 4.2038, 8.2038, ...], [43008.2031, 43012.2031, 43016.2031, ...], [86016.2031, 86020.2031, 86024.2031, ...], ], ] sum = 387109.812500 ggml_debug: ffn_moe_gate-5 = (f32) MUL_MAT_ID(blk.5.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-5{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2131, 4.2131, 8.2131, ...], [43008.2148, 43012.2148, 43016.2148, ...], [86016.2109, 86020.2109, 86024.2109, ...], ], ] sum = 387109.937500 ggml_debug: ffn_moe_silu-5 = (f32) UNARY(ffn_moe_gate-5{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1179, 4.1179, 8.1179, ...], [43008.1172, 43012.1172, 43016.1172, ...], [86016.1172, 86020.1172, 86024.1172, ...], ], ] sum = 387109.062500 ggml_debug: ffn_moe_gate_par-5 = (f32) MUL(ffn_moe_up-5{10752, 3, 1, 1}, ffn_moe_silu-5{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0240, 4.0240, 8.0240, ...], [43008.0234, 43012.0234, 43016.0234, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.218750 ggml_debug: ffn_moe_down-5 = (f32) MUL_MAT_ID(blk.5.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-5{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0502, 3.9498, 7.9498, ...], [24575.9492, 24579.9492, 24583.9492, ...], [49151.9492, 49155.9492, 49159.9492, ...], ], ] sum = 221219.562500 ggml_debug: ffn_moe_weights_norm-5 (view) = (f32) VIEW(ffn_moe_weights_norm-5{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2318], [ 16.2318], [ 32.2318], ], ] sum = 48.695538 ggml_debug: ffn_moe_weighted-5 = (f32) MUL(ffn_moe_down-5{6144, 3, 1, 1}, ffn_moe_weights_norm-5 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0116, 3.9884, 7.9884, ...], [24575.9883, 24579.9883, 24583.9883, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_out-5 = (f32) ADD(ffn_moe_out-5{6144, 3, 1, 1}, ffn_moe_weighted-5{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0106, 4.0106, 8.0106, ...], [24576.0098, 24580.0098, 24584.0098, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.109375 ggml_debug: ffn_inp-5 = (f32) ADD(kqv_out-5{6144, 3, 1, 1}, l_out-4{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0311, 3.9689, 7.9689, ...], [24575.9688, 24579.9688, 24583.9688, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], ] sum = 221219.718750 ggml_debug: l_out-5 = (f32) ADD(ffn_moe_out-5{6144, 3, 1, 1}, ffn_inp-5{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0205, 3.9795, 7.9795, ...], [24575.9805, 24579.9805, 24583.9805, ...], [49151.9805, 49155.9805, 49159.9805, ...], ], ] sum = 221219.828125 ggml_debug: norm-6 = (f32) NORM(l_out-5{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1718, 3.8282, 7.8282, ...], [24575.8281, 24579.8281, 24583.8281, ...], [49151.8281, 49155.8281, 49159.8281, ...], ], ] sum = 221218.453125 ggml_debug: attn_norm-6 = (f32) MUL(norm-6{6144, 3, 1, 1}, blk.6.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0150, 3.9850, 7.9850, ...], [24575.9844, 24579.9844, 24583.9844, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: wqkv-6 = (f32) MUL_MAT(blk.6.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-6{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.8074, 4.8074, 8.8074, ...], [32768.8086, 32772.8086, 32776.8086, ...], [65536.8047, 65540.8047, 65544.8047, ...], ], ] sum = 294955.281250 ggml_debug: wqkv_clamped-6 = (f32) CLAMP(wqkv-6{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.8074, 4.8074, 8.8074, ...], [32768.8086, 32772.8086, 32776.8086, ...], [65536.8047, 65540.8047, 65544.8047, ...], ], ] sum = 294955.281250 ggml_debug: wqkv_clamped-6 (view) = (f32) VIEW(wqkv_clamped-6{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.8074, 4.8074, 8.8074, ...], [32768.8086, 32772.8086, 32776.8086, ...], [65536.8047, 65540.8047, 65544.8047, ...], ], ] sum = 294955.281250 ggml_debug: Qcur-6 = (f32) CONT(wqkv_clamped-6 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.8074, 4.8074, 8.8074, ...], [24576.8066, 24580.8066, 24584.8066, ...], [49152.8086, 49156.8086, 49160.8086, ...], ], ] sum = 221227.281250 ggml_debug: Qcur-6 (reshaped) = (f32) RESHAPE(Qcur-6{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.8074, 4.8074, 8.8074, ...], [512.8074, 516.8074, 520.8074, ...], [1024.8074, 1028.8074, 1032.8074, ...], ... ], [ [24576.8066, 24580.8066, 24584.8066, ...], [25088.8066, 25092.8066, 25096.8066, ...], [25600.8066, 25604.8066, 25608.8066, ...], ... ], [ [49152.8086, 49156.8086, 49160.8086, ...], [49664.8086, 49668.8086, 49672.8086, ...], [50176.8086, 50180.8086, 50184.8086, ...], ... ], ] sum = 677505.812500 ggml_debug: Qcur-6 = (f32) ROPE(Qcur-6 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.8074, 4.8074, 8.8074, ...], [512.8074, 516.8074, 520.8074, ...], [1024.8074, 1028.8074, 1032.8074, ...], ... ], [ [24576.8066, 24580.8066, 24584.8066, ...], [25088.8066, 25092.8066, 25096.8066, ...], [25600.8066, 25604.8066, 25608.8066, ...], ... ], [ [49152.8086, 49156.8086, 49160.8086, ...], [49664.8086, 49668.8086, 49672.8086, ...], [50176.8086, 50180.8086, 50184.8086, ...], ... ], ] sum = 677505.812500 ggml_debug: wqkv_clamped-6 (view) = (f32) VIEW(wqkv_clamped-6{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.7076, 5.7076, 9.7076, ...], [32769.7070, 32773.7070, 32777.7070, ...], [65537.7109, 65541.7109, 65545.7109, ...], ], ] sum = 294963.375000 ggml_debug: Kcur-6 = (f32) CONT(wqkv_clamped-6 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.7076, 5.7076, 9.7076, ...], [4097.7075, 4101.7075, 4105.7075, ...], [8193.7080, 8197.7080, 8201.7080, ...], ], ] sum = 36915.367188 ggml_debug: Kcur-6 (reshaped) = (f32) RESHAPE(Kcur-6{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 1.7076, 5.7076, 9.7076, ...], [513.7076, 517.7076, 521.7076, ...], [1025.7076, 1029.7076, 1033.7076, ...], ... ], [ [4097.7075, 4101.7075, 4105.7075, ...], [4609.7075, 4613.7075, 4617.7075, ...], [5121.7075, 5125.7075, 5129.7075, ...], ... ], [ [8193.7080, 8197.7080, 8201.7080, ...], [8705.7080, 8709.7080, 8713.7080, ...], [9217.7080, 9221.7080, 9225.7080, ...], ... ], ] sum = 124570.125000 ggml_debug: Kcur-6 = (f32) ROPE(Kcur-6 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 1.7076, 5.7076, 9.7076, ...], [513.7076, 517.7076, 521.7076, ...], [1025.7076, 1029.7076, 1033.7076, ...], ... ], [ [4097.7075, 4101.7075, 4105.7075, ...], [4609.7075, 4613.7075, 4617.7075, ...], [5121.7075, 5125.7075, 5129.7075, ...], ... ], [ [8193.7080, 8197.7080, 8201.7080, ...], [8705.7080, 8709.7080, 8713.7080, ...], [9217.7080, 9221.7080, 9225.7080, ...], ... ], ] sum = 124570.125000 ggml_debug: wqkv_clamped-6 (view) = (f32) VIEW(wqkv_clamped-6{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0321, 3.9679, 7.9679, ...], [32767.9688, 32771.9688, 32775.9688, ...], [65535.9688, 65539.9688, 65543.9688, ...], ], ] sum = 294947.718750 ggml_debug: Vcur-6 = (f32) CONT(wqkv_clamped-6 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0321, 3.9679, 7.9679, ...], [4095.9678, 4099.9678, 4103.9678, ...], [8191.9678, 8195.9678, 8199.9678, ...], ], ] sum = 36899.710938 ggml_debug: k_cache_view-6 = (f16) VIEW(cache_k_l6{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-6 (copy of Kcur-6) = (f16) CPY(Kcur-6{128, 8, 3, 1}, k_cache_view-6{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 1.7080, 1.7100, 1.7119, ...], ], ] sum = 5.129883 ggml_debug: v_cur_t-6 = (f32) TRANSPOSE(Vcur-6{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0321, 4095.9678, 8191.9678], [ 3.9679, 4099.9678, 8195.9678], [ 7.9679, 4103.9678, 8199.9678], ... ], ] sum = 36899.714844 ggml_debug: v_cache_view-6 = (f16) VIEW(cache_v_l6{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-6 (copy of v_cur_t-6) = (f16) CPY(v_cur_t-6{3, 1024, 1, 1}, v_cache_view-6{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0321, -0.0322, -0.0323], [ -0.0643, -0.0644, -0.0645], [ -0.1285, -0.1288, -0.1290], ... ], ] sum = -0.676117 ggml_debug: v-6 = (f16) VIEW(cache_v_l6{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0321, -0.0322, -0.0323, ...], [ -0.0643, -0.0644, -0.0645, ...], [ -0.1285, -0.1288, -0.1290, ...], ... ], [ [ -0.0321, -0.0322, -0.0323, ...], [ -0.0643, -0.0644, -0.0645, ...], [ -0.1285, -0.1288, -0.1290, ...], ... ], [ [ -0.0321, -0.0322, -0.0323, ...], [ -0.0643, -0.0644, -0.0645, ...], [ -0.1285, -0.1288, -0.1290, ...], ... ], ... ] sum = -2.028351 ggml_debug: k-6 = (f16) VIEW(cache_k_l6{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 1.7080, 1.7100, 1.7119, ...], [ 6.8320, 6.8398, 6.8477, ...], [ 27.3281, 27.3594, 27.3906, ...], ... ], [ [ 1.9580, 1.9600, 1.9619, ...], [ 7.8320, 7.8398, 7.8477, ...], [ 31.3281, 31.3594, 31.3906, ...], ... ], [ [ 2.4160, 2.4199, 2.4238, ...], [ 9.6641, 9.6797, 9.6953, ...], [ 38.6562, 38.7188, 38.7812, ...], ... ], ... ] sum = 383.660156 ggml_debug: q-6 = (f32) PERMUTE(Qcur-6{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.8074, 4.8074, 8.8074, ...], [24576.8066, 24580.8066, 24584.8066, ...], [49152.8086, 49156.8086, 49160.8086, ...], ], [ [512.8074, 516.8074, 520.8074, ...], [25088.8066, 25092.8066, 25096.8066, ...], [49664.8086, 49668.8086, 49672.8086, ...], ], [ [1024.8074, 1028.8074, 1032.8074, ...], [25600.8066, 25604.8066, 25608.8066, ...], [50176.8086, 50180.8086, 50184.8086, ...], ], ... ] sum = 677505.875000 ggml_debug: kq-6 = (f32) MUL_MAT(k-6{128, 32, 8, 1}, q-6{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 16.0156, 20.0156, 24.0156, ...], [144.0156, 148.0156, 152.0156, ...], [272.0156, 276.0156, 280.0156, ...], ], [ [400.0156, 404.0156, 408.0156, ...], [528.0156, 532.0156, 536.0156, ...], [656.0156, 660.0156, 664.0156, ...], ], [ [784.0156, 788.0156, 792.0156, ...], [912.0156, 916.0156, 920.0156, ...], [1040.0156, 1044.0156, 1048.0156, ...], ], ... ] sum = 14364.421875 ggml_debug: kq_soft_max_ext-6 = (f32) SOFT_MAX(kq-6{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-6 = (f32) MUL_MAT(v-6{32, 128, 8, 1}, kq_soft_max_ext-6{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0321, 3.9679, 7.9679, ...], [511.9679, 515.9679, 519.9679, ...], [1023.9679, 1027.9679, 1031.9679, ...], ], [ [1535.9679, 1539.9679, 1543.9679, ...], [2047.9679, 2051.9678, 2055.9678, ...], [2559.9678, 2563.9678, 2567.9678, ...], ], [ [3071.9678, 3075.9678, 3079.9678, ...], [3583.9678, 3587.9678, 3591.9678, ...], [4095.9678, 4099.9678, 4103.9678, ...], ], ... ] sum = 55403.140625 ggml_debug: kqv_merged-6 = (f32) PERMUTE(kqv-6{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0321, 3.9679, 7.9679, ...], [1535.9679, 1539.9679, 1543.9679, ...], [3071.9678, 3075.9678, 3079.9678, ...], ... ], [ [511.9679, 515.9679, 519.9679, ...], [2047.9679, 2051.9678, 2055.9678, ...], [3583.9678, 3587.9678, 3591.9678, ...], ... ], [ [1023.9679, 1027.9679, 1031.9679, ...], [2559.9678, 2563.9678, 2567.9678, ...], [4095.9678, 4099.9678, 4103.9678, ...], ... ], ] sum = 55403.144531 ggml_debug: kqv_merged_cont-6 = (f32) CONT(kqv_merged-6{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0321, 3.9679, 7.9679, ...], [24575.9688, 24579.9688, 24583.9688, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], ] sum = 221219.718750 ggml_debug: kqv_out-6 = (f32) MUL_MAT(blk.6.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-6{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0485, 4.0485, 8.0485, ...], [24576.0488, 24580.0488, 24584.0488, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: norm-6 = (f32) NORM(kqv_out-6{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.6302, 4.6302, 8.6302, ...], [24576.6309, 24580.6309, 24584.6309, ...], [49152.6289, 49156.6289, 49160.6289, ...], ], ] sum = 221225.656250 ggml_debug: attn_out_norm-6 = (f32) MUL(norm-6{6144, 3, 1, 1}, blk.6.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1231, 4.1231, 8.1231, ...], [24576.1230, 24580.1230, 24584.1230, ...], [49152.1250, 49156.1250, 49160.1250, ...], ], ] sum = 221221.125000 ggml_debug: ffn_moe_logits-6 = (f32) MUL_MAT(blk.6.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.0844, 3.9156, 7.9156, ...], [ 63.9156, 67.9156, 71.9156, ...], [127.9156, 131.9156, 135.9156, ...], ], ] sum = 611.240845 ggml_debug: ffn_moe_probs-6 = (f32) SOFT_MAX(ffn_moe_logits-6{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0543, 4.0543, 8.0543, ...], [ 64.0543, 68.0543, 72.0543, ...], [128.0543, 132.0543, 136.0543, ...], ], ] sum = 612.488831 ggml_debug: ffn_moe_argsort-6 = (i32) ARGSORT(ffn_moe_probs-6{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 14.0000, 18.0000, 22.0000, ...], [ 78.0000, 82.0000, 86.0000, ...], [142.0000, 146.0000, 150.0000, ...], ], ] sum = 738.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-6{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 14.0000, 18.0000, 22.0000, ...], [ 78.0000, 82.0000, 86.0000, ...], [142.0000, 146.0000, 150.0000, ...], ], ] sum = 738.000000 ggml_debug: ffn_moe_up-6 = (f32) MUL_MAT_ID(blk.6.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1482, 3.8518, 7.8518, ...], [43007.8516, 43011.8516, 43015.8516, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.656250 ggml_debug: ffn_moe_gate-6 = (f32) MUL_MAT_ID(blk.6.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2295, 3.7705, 7.7705, ...], [43007.7695, 43011.7695, 43015.7695, ...], [86015.7734, 86019.7734, 86023.7734, ...], ], ] sum = 387105.968750 ggml_debug: ffn_moe_silu-6 = (f32) UNARY(ffn_moe_gate-6{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1016, 3.8984, 7.8984, ...], [43007.8984, 43011.8984, 43015.8984, ...], [86015.8984, 86019.8984, 86023.8984, ...], ], ] sum = 387107.093750 ggml_debug: ffn_moe_gate_par-6 = (f32) MUL(ffn_moe_up-6{10752, 3, 1, 1}, ffn_moe_silu-6{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0151, 4.0151, 8.0151, ...], [43008.0156, 43012.0156, 43016.0156, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_down-6 = (f32) MUL_MAT_ID(blk.6.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-6{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0844, 3.9156, 7.9156, ...], [24575.9160, 24579.9160, 24583.9160, ...], [49151.9141, 49155.9141, 49159.9141, ...], ], ] sum = 221219.218750 ggml_debug: ffn_moe_probs-6 (reshaped) = (f32) RESHAPE(ffn_moe_probs-6{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0543], [ 4.0543], [ 8.0543], ... ], [ [ 64.0543], [ 68.0543], [ 72.0543], ... ], [ [128.0543], [132.0543], [136.0543], ... ], ] sum = 612.488831 ggml_debug: ffn_moe_weights-6 = (f32) GET_ROWS(ffn_moe_probs-6 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1119], [ 4.1119], [ 8.1119], ... ], [ [ 16.1119], [ 20.1119], [ 24.1119], ... ], [ [ 32.1119], [ 36.1119], [ 40.1119], ... ], ] sum = 181.007233 ggml_debug: ffn_moe_weights-6 (reshaped) = (f32) RESHAPE(ffn_moe_weights-6{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1119, 4.1119, 8.1119, ...], [ 16.1119, 20.1119, 24.1119, ...], [ 32.1119, 36.1119, 40.1119, ...], ], ] sum = 181.007233 ggml_debug: ffn_moe_weights_sum-6 = (f32) SUM_ROWS(ffn_moe_weights-6 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3842], [ 4.3842], [ 8.3842], ], ] sum = 13.152463 ggml_debug: ffn_moe_weights_norm-6 = (f32) DIV(ffn_moe_weights-6 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-6{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2913, 4.2913, 8.2913, ...], [ 16.2913, 20.2913, 24.2913, ...], [ 32.2913, 36.2913, 40.2913, ...], ], ] sum = 182.621948 ggml_debug: ffn_moe_weights_norm-6 (view) = (f32) VIEW(ffn_moe_weights_norm-6{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2913], [ 16.2913], [ 32.2913], ], ] sum = 48.873985 ggml_debug: ffn_moe_weighted-6 = (f32) MUL(ffn_moe_down-6{6144, 3, 1, 1}, ffn_moe_weights_norm-6 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0246, 3.9754, 7.9754, ...], [24575.9746, 24579.9746, 24583.9746, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_up-6 = (f32) MUL_MAT_ID(blk.6.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2092, 3.7908, 7.7908, ...], [43007.7891, 43011.7891, 43015.7891, ...], [86015.7891, 86019.7891, 86023.7891, ...], ], ] sum = 387106.093750 ggml_debug: ffn_moe_gate-6 = (f32) MUL_MAT_ID(blk.6.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5534, 4.5534, 8.5534, ...], [43008.5547, 43012.5547, 43016.5547, ...], [86016.5547, 86020.5547, 86024.5547, ...], ], ] sum = 387113.000000 ggml_debug: ffn_moe_silu-6 = (f32) UNARY(ffn_moe_gate-6{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.3514, 4.3514, 8.3514, ...], [43008.3516, 43012.3516, 43016.3516, ...], [86016.3516, 86020.3516, 86024.3516, ...], ], ] sum = 387111.156250 ggml_debug: ffn_moe_gate_par-6 = (f32) MUL(ffn_moe_up-6{10752, 3, 1, 1}, ffn_moe_silu-6{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0735, 3.9265, 7.9265, ...], [43007.9258, 43011.9258, 43015.9258, ...], [86015.9297, 86019.9297, 86023.9297, ...], ], ] sum = 387107.375000 ggml_debug: ffn_moe_down-6 = (f32) MUL_MAT_ID(blk.6.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-6{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0092, 3.9908, 7.9908, ...], [24575.9902, 24579.9902, 24583.9902, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_weights_norm-6 (view) = (f32) VIEW(ffn_moe_weights_norm-6{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2641], [ 16.2641], [ 32.2641], ], ] sum = 48.792282 ggml_debug: ffn_moe_weighted-6 = (f32) MUL(ffn_moe_down-6{6144, 3, 1, 1}, ffn_moe_weights_norm-6 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0024, 3.9976, 7.9976, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.984375 ggml_debug: ffn_moe_out-6 = (f32) ADD(ffn_moe_weighted-6{6144, 3, 1, 1}, ffn_moe_weighted-6{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0270, 3.9730, 7.9730, ...], [24575.9727, 24579.9727, 24583.9727, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.750000 ggml_debug: ffn_moe_up-6 = (f32) MUL_MAT_ID(blk.6.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0189, 4.0189, 8.0189, ...], [43008.0195, 43012.0195, 43016.0195, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_gate-6 = (f32) MUL_MAT_ID(blk.6.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0890, 4.0890, 8.0890, ...], [43008.0898, 43012.0898, 43016.0898, ...], [86016.0859, 86020.0859, 86024.0859, ...], ], ] sum = 387108.812500 ggml_debug: ffn_moe_silu-6 = (f32) UNARY(ffn_moe_gate-6{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0465, 4.0465, 8.0465, ...], [43008.0469, 43012.0469, 43016.0469, ...], [86016.0469, 86020.0469, 86024.0469, ...], ], ] sum = 387108.437500 ggml_debug: ffn_moe_gate_par-6 = (f32) MUL(ffn_moe_up-6{10752, 3, 1, 1}, ffn_moe_silu-6{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0009, 4.0009, 8.0009, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-6 = (f32) MUL_MAT_ID(blk.6.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-6{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0195, 4.0195, 8.0195, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: ffn_moe_weights_norm-6 (view) = (f32) VIEW(ffn_moe_weights_norm-6{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2461], [ 16.2461], [ 32.2461], ], ] sum = 48.738308 ggml_debug: ffn_moe_weighted-6 = (f32) MUL(ffn_moe_down-6{6144, 3, 1, 1}, ffn_moe_weights_norm-6 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0048, 4.0048, 8.0048, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_out-6 = (f32) ADD(ffn_moe_out-6{6144, 3, 1, 1}, ffn_moe_weighted-6{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0222, 3.9778, 7.9778, ...], [24575.9785, 24579.9785, 24583.9785, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_up-6 = (f32) MUL_MAT_ID(blk.6.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0666, 4.0666, 8.0666, ...], [43008.0664, 43012.0664, 43016.0664, ...], [86016.0703, 86020.0703, 86024.0703, ...], ], ] sum = 387108.593750 ggml_debug: ffn_moe_gate-6 = (f32) MUL_MAT_ID(blk.6.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-6{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1146, 4.1146, 8.1146, ...], [43008.1133, 43012.1133, 43016.1133, ...], [86016.1172, 86020.1172, 86024.1172, ...], ], ] sum = 387109.062500 ggml_debug: ffn_moe_silu-6 = (f32) UNARY(ffn_moe_gate-6{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0606, 4.0606, 8.0606, ...], [43008.0625, 43012.0625, 43016.0625, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_gate_par-6 = (f32) MUL(ffn_moe_up-6{10752, 3, 1, 1}, ffn_moe_silu-6{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0040, 4.0040, 8.0040, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.031250 ggml_debug: ffn_moe_down-6 = (f32) MUL_MAT_ID(blk.6.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-6{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0342, 3.9658, 7.9658, ...], [24575.9648, 24579.9648, 24583.9648, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.703125 ggml_debug: ffn_moe_weights_norm-6 (view) = (f32) VIEW(ffn_moe_weights_norm-6{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1985], [ 16.1985], [ 32.1985], ], ] sum = 48.595425 ggml_debug: ffn_moe_weighted-6 = (f32) MUL(ffn_moe_down-6{6144, 3, 1, 1}, ffn_moe_weights_norm-6 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0068, 3.9932, 7.9932, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_out-6 = (f32) ADD(ffn_moe_out-6{6144, 3, 1, 1}, ffn_moe_weighted-6{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0290, 3.9710, 7.9710, ...], [24575.9707, 24579.9707, 24583.9707, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.734375 ggml_debug: ffn_inp-6 = (f32) ADD(kqv_out-6{6144, 3, 1, 1}, l_out-5{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0280, 4.0280, 8.0280, ...], [24576.0273, 24580.0273, 24584.0273, ...], [49152.0273, 49156.0273, 49160.0273, ...], ], ] sum = 221220.265625 ggml_debug: l_out-6 = (f32) ADD(ffn_moe_out-6{6144, 3, 1, 1}, ffn_inp-6{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0010, 3.9990, 7.9990, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: norm-7 = (f32) NORM(l_out-6{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0071, 4.0071, 8.0071, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: attn_norm-7 = (f32) MUL(norm-7{6144, 3, 1, 1}, blk.7.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0008, 4.0008, 8.0008, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: wqkv-7 = (f32) MUL_MAT(blk.7.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-7{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.3076, 4.3076, 8.3076, ...], [32768.3086, 32772.3086, 32776.3086, ...], [65536.3047, 65540.3047, 65544.3047, ...], ], ] sum = 294950.781250 ggml_debug: wqkv_clamped-7 = (f32) CLAMP(wqkv-7{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.3076, 4.3076, 8.3076, ...], [32768.3086, 32772.3086, 32776.3086, ...], [65536.3047, 65540.3047, 65544.3047, ...], ], ] sum = 294950.781250 ggml_debug: wqkv_clamped-7 (view) = (f32) VIEW(wqkv_clamped-7{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.3076, 4.3076, 8.3076, ...], [32768.3086, 32772.3086, 32776.3086, ...], [65536.3047, 65540.3047, 65544.3047, ...], ], ] sum = 294950.781250 ggml_debug: Qcur-7 = (f32) CONT(wqkv_clamped-7 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.3076, 4.3076, 8.3076, ...], [24576.3066, 24580.3066, 24584.3066, ...], [49152.3086, 49156.3086, 49160.3086, ...], ], ] sum = 221222.781250 ggml_debug: Qcur-7 (reshaped) = (f32) RESHAPE(Qcur-7{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.3076, 4.3076, 8.3076, ...], [512.3076, 516.3076, 520.3076, ...], [1024.3076, 1028.3076, 1032.3076, ...], ... ], [ [24576.3066, 24580.3066, 24584.3066, ...], [25088.3066, 25092.3066, 25096.3066, ...], [25600.3066, 25604.3066, 25608.3066, ...], ... ], [ [49152.3086, 49156.3086, 49160.3086, ...], [49664.3086, 49668.3086, 49672.3086, ...], [50176.3086, 50180.3086, 50184.3086, ...], ... ], ] sum = 677492.312500 ggml_debug: Qcur-7 = (f32) ROPE(Qcur-7 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.3076, 4.3076, 8.3076, ...], [512.3076, 516.3076, 520.3076, ...], [1024.3076, 1028.3076, 1032.3076, ...], ... ], [ [24576.3066, 24580.3066, 24584.3066, ...], [25088.3066, 25092.3066, 25096.3066, ...], [25600.3066, 25604.3066, 25608.3066, ...], ... ], [ [49152.3086, 49156.3086, 49160.3086, ...], [49664.3086, 49668.3086, 49672.3086, ...], [50176.3086, 50180.3086, 50184.3086, ...], ... ], ] sum = 677492.312500 ggml_debug: wqkv_clamped-7 (view) = (f32) VIEW(wqkv_clamped-7{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 3.6603, 7.6603, 11.6603, ...], [32771.6602, 32775.6602, 32779.6602, ...], [65539.6641, 65543.6641, 65547.6641, ...], ], ] sum = 294980.937500 ggml_debug: Kcur-7 = (f32) CONT(wqkv_clamped-7 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 3.6603, 7.6603, 11.6603, ...], [4099.6602, 4103.6602, 4107.6602, ...], [8195.6602, 8199.6602, 8203.6602, ...], ], ] sum = 36932.941406 ggml_debug: Kcur-7 (reshaped) = (f32) RESHAPE(Kcur-7{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 3.6603, 7.6603, 11.6603, ...], [515.6603, 519.6603, 523.6603, ...], [1027.6604, 1031.6604, 1035.6604, ...], ... ], [ [4099.6602, 4103.6602, 4107.6602, ...], [4611.6602, 4615.6602, 4619.6602, ...], [5123.6602, 5127.6602, 5131.6602, ...], ... ], [ [8195.6602, 8199.6602, 8203.6602, ...], [8707.6602, 8711.6602, 8715.6602, ...], [9219.6602, 9223.6602, 9227.6602, ...], ... ], ] sum = 124622.796875 ggml_debug: Kcur-7 = (f32) ROPE(Kcur-7 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 3.6603, 7.6603, 11.6603, ...], [515.6603, 519.6603, 523.6603, ...], [1027.6604, 1031.6604, 1035.6604, ...], ... ], [ [4099.6602, 4103.6602, 4107.6602, ...], [4611.6602, 4615.6602, 4619.6602, ...], [5123.6602, 5127.6602, 5131.6602, ...], ... ], [ [8195.6602, 8199.6602, 8203.6602, ...], [8707.6602, 8711.6602, 8715.6602, ...], [9219.6602, 9223.6602, 9227.6602, ...], ... ], ] sum = 124622.796875 ggml_debug: wqkv_clamped-7 (view) = (f32) VIEW(wqkv_clamped-7{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2822, 3.7178, 7.7178, ...], [32767.7188, 32771.7188, 32775.7188, ...], [65535.7188, 65539.7188, 65543.7188, ...], ], ] sum = 294945.468750 ggml_debug: Vcur-7 = (f32) CONT(wqkv_clamped-7 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2822, 3.7178, 7.7178, ...], [4095.7178, 4099.7178, 4103.7178, ...], [8191.7178, 8195.7178, 8199.7178, ...], ], ] sum = 36897.460938 ggml_debug: k_cache_view-7 = (f16) VIEW(cache_k_l7{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-7 (copy of Kcur-7) = (f16) CPY(Kcur-7{128, 8, 3, 1}, k_cache_view-7{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 3.6602, 3.6641, 3.6680, ...], ], ] sum = 10.992188 ggml_debug: v_cur_t-7 = (f32) TRANSPOSE(Vcur-7{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.2822, 4095.7178, 8191.7178], [ 3.7178, 4099.7178, 8195.7178], [ 7.7178, 4103.7178, 8199.7178], ... ], ] sum = 36897.464844 ggml_debug: v_cache_view-7 = (f16) VIEW(cache_v_l7{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-7 (copy of v_cur_t-7) = (f16) CPY(v_cur_t-7{3, 1024, 1, 1}, v_cache_view-7{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.2822, -0.2827, -0.2832], [ -0.5645, -0.5654, -0.5664], [ -1.1289, -1.1309, -1.1328], ... ], ] sum = -5.937012 ggml_debug: v-7 = (f16) VIEW(cache_v_l7{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.2822, -0.2827, -0.2832, ...], [ -0.5645, -0.5654, -0.5664, ...], [ -1.1289, -1.1309, -1.1328, ...], ... ], [ [ -0.2822, -0.2827, -0.2832, ...], [ -0.5645, -0.5654, -0.5664, ...], [ -1.1289, -1.1309, -1.1328, ...], ... ], [ [ -0.2822, -0.2827, -0.2832, ...], [ -0.5645, -0.5654, -0.5664, ...], [ -1.1289, -1.1309, -1.1328, ...], ... ], ... ] sum = -17.811035 ggml_debug: k-7 = (f16) VIEW(cache_k_l7{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 3.6602, 3.6641, 3.6680, ...], [ 14.6406, 14.6562, 14.6719, ...], [ 58.5625, 58.6250, 58.6875, ...], ... ], [ [ 4.3203, 4.3281, 4.3359, ...], [ 17.2812, 17.3125, 17.3438, ...], [ 69.1250, 69.2500, 69.3750, ...], ... ], [ [ 5.3203, 5.3281, 5.3359, ...], [ 21.2812, 21.3125, 21.3438, ...], [ 85.1250, 85.2500, 85.3750, ...], ... ], ... ] sum = 839.179688 ggml_debug: q-7 = (f32) PERMUTE(Qcur-7{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.3076, 4.3076, 8.3076, ...], [24576.3066, 24580.3066, 24584.3066, ...], [49152.3086, 49156.3086, 49160.3086, ...], ], [ [512.3076, 516.3076, 520.3076, ...], [25088.3066, 25092.3066, 25096.3066, ...], [49664.3086, 49668.3086, 49672.3086, ...], ], [ [1024.3076, 1028.3076, 1032.3076, ...], [25600.3066, 25604.3066, 25608.3066, ...], [50176.3086, 50180.3086, 50184.3086, ...], ], ... ] sum = 677492.375000 ggml_debug: kq-7 = (f32) MUL_MAT(k-7{128, 32, 8, 1}, q-7{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 48.0625, 52.0625, 56.0625, ...], [176.0625, 180.0625, 184.0625, ...], [304.0625, 308.0625, 312.0625, ...], ], [ [432.0625, 436.0625, 440.0625, ...], [560.0625, 564.0625, 568.0625, ...], [688.0625, 692.0625, 696.0625, ...], ], [ [816.0625, 820.0625, 824.0625, ...], [944.0625, 948.0625, 952.0625, ...], [1072.0625, 1076.0625, 1080.0625, ...], ], ... ] sum = 15229.687500 ggml_debug: kq_soft_max_ext-7 = (f32) SOFT_MAX(kq-7{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-7 = (f32) MUL_MAT(v-7{32, 128, 8, 1}, kq_soft_max_ext-7{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.2822, 3.7178, 7.7178, ...], [511.7178, 515.7178, 519.7178, ...], [1023.7178, 1027.7178, 1031.7178, ...], ], [ [1535.7178, 1539.7178, 1543.7178, ...], [2047.7178, 2051.7178, 2055.7178, ...], [2559.7178, 2563.7178, 2567.7178, ...], ], [ [3071.7178, 3075.7178, 3079.7178, ...], [3583.7178, 3587.7178, 3591.7178, ...], [4095.7178, 4099.7178, 4103.7178, ...], ], ... ] sum = 55396.390625 ggml_debug: kqv_merged-7 = (f32) PERMUTE(kqv-7{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.2822, 3.7178, 7.7178, ...], [1535.7178, 1539.7178, 1543.7178, ...], [3071.7178, 3075.7178, 3079.7178, ...], ... ], [ [511.7178, 515.7178, 519.7178, ...], [2047.7178, 2051.7178, 2055.7178, ...], [3583.7178, 3587.7178, 3591.7178, ...], ... ], [ [1023.7178, 1027.7178, 1031.7178, ...], [2559.7178, 2563.7178, 2567.7178, ...], [4095.7178, 4099.7178, 4103.7178, ...], ... ], ] sum = 55396.394531 ggml_debug: kqv_merged_cont-7 = (f32) CONT(kqv_merged-7{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2822, 3.7178, 7.7178, ...], [24575.7188, 24579.7188, 24583.7188, ...], [49151.7188, 49155.7188, 49159.7188, ...], ], ] sum = 221217.468750 ggml_debug: kqv_out-7 = (f32) MUL_MAT(blk.7.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-7{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2099, 4.2099, 8.2099, ...], [24576.2090, 24580.2090, 24584.2090, ...], [49152.2109, 49156.2109, 49160.2109, ...], ], ] sum = 221221.906250 ggml_debug: norm-7 = (f32) NORM(kqv_out-7{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 2.3835, 6.3835, 10.3835, ...], [24578.3828, 24582.3828, 24586.3828, ...], [49154.3828, 49158.3828, 49162.3828, ...], ], ] sum = 221241.437500 ggml_debug: attn_out_norm-7 = (f32) MUL(norm-7{6144, 3, 1, 1}, blk.7.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.5098, 4.5098, 8.5098, ...], [24576.5098, 24580.5098, 24584.5098, ...], [49152.5078, 49156.5078, 49160.5078, ...], ], ] sum = 221224.562500 ggml_debug: ffn_moe_logits-7 = (f32) MUL_MAT(blk.7.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.2113, 3.7887, 7.7887, ...], [ 63.7887, 67.7887, 71.7887, ...], [127.7887, 131.7887, 135.7887, ...], ], ] sum = 610.098267 ggml_debug: ffn_moe_probs-7 = (f32) SOFT_MAX(ffn_moe_logits-7{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0486, 4.0486, 8.0486, ...], [ 64.0486, 68.0486, 72.0486, ...], [128.0486, 132.0486, 136.0486, ...], ], ] sum = 612.437378 ggml_debug: ffn_moe_argsort-7 = (i32) ARGSORT(ffn_moe_probs-7{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 13.0000, 17.0000, 21.0000, ...], [ 77.0000, 81.0000, 85.0000, ...], [141.0000, 145.0000, 149.0000, ...], ], ] sum = 729.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-7{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 13.0000, 17.0000, 21.0000, ...], [ 77.0000, 81.0000, 85.0000, ...], [141.0000, 145.0000, 149.0000, ...], ], ] sum = 729.000000 ggml_debug: ffn_moe_up-7 = (f32) MUL_MAT_ID(blk.7.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1316, 4.1316, 8.1316, ...], [43008.1328, 43012.1328, 43016.1328, ...], [86016.1328, 86020.1328, 86024.1328, ...], ], ] sum = 387109.187500 ggml_debug: ffn_moe_gate-7 = (f32) MUL_MAT_ID(blk.7.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0768, 3.9232, 7.9232, ...], [43007.9219, 43011.9219, 43015.9219, ...], [86015.9219, 86019.9219, 86023.9219, ...], ], ] sum = 387107.312500 ggml_debug: ffn_moe_silu-7 = (f32) UNARY(ffn_moe_gate-7{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0369, 3.9631, 7.9631, ...], [43007.9648, 43011.9648, 43015.9648, ...], [86015.9609, 86019.9609, 86023.9609, ...], ], ] sum = 387107.687500 ggml_debug: ffn_moe_gate_par-7 = (f32) MUL(ffn_moe_up-7{10752, 3, 1, 1}, ffn_moe_silu-7{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0049, 3.9951, 7.9951, ...], [43007.9961, 43011.9961, 43015.9961, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.968750 ggml_debug: ffn_moe_down-7 = (f32) MUL_MAT_ID(blk.7.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-7{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0424, 3.9576, 7.9576, ...], [24575.9570, 24579.9570, 24583.9570, ...], [49151.9570, 49155.9570, 49159.9570, ...], ], ] sum = 221219.609375 ggml_debug: ffn_moe_probs-7 (reshaped) = (f32) RESHAPE(ffn_moe_probs-7{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0486], [ 4.0486], [ 8.0486], ... ], [ [ 64.0486], [ 68.0486], [ 72.0486], ... ], [ [128.0486], [132.0486], [136.0486], ... ], ] sum = 612.437378 ggml_debug: ffn_moe_weights-7 = (f32) GET_ROWS(ffn_moe_probs-7 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1229], [ 4.1229], [ 8.1229], ... ], [ [ 16.1229], [ 20.1229], [ 24.1229], ... ], [ [ 32.1229], [ 36.1229], [ 40.1229], ... ], ] sum = 181.105927 ggml_debug: ffn_moe_weights-7 (reshaped) = (f32) RESHAPE(ffn_moe_weights-7{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1229, 4.1229, 8.1229, ...], [ 16.1229, 20.1229, 24.1229, ...], [ 32.1229, 36.1229, 40.1229, ...], ], ] sum = 181.105927 ggml_debug: ffn_moe_weights_sum-7 = (f32) SUM_ROWS(ffn_moe_weights-7 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3701], [ 4.3701], [ 8.3701], ], ] sum = 13.110297 ggml_debug: ffn_moe_weights_norm-7 = (f32) DIV(ffn_moe_weights-7 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-7{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3320, 4.3320, 8.3320, ...], [ 16.3320, 20.3320, 24.3320, ...], [ 32.3320, 36.3320, 40.3320, ...], ], ] sum = 182.988220 ggml_debug: ffn_moe_weights_norm-7 (view) = (f32) VIEW(ffn_moe_weights_norm-7{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3320], [ 16.3320], [ 32.3320], ], ] sum = 48.996067 ggml_debug: ffn_moe_weighted-7 = (f32) MUL(ffn_moe_down-7{6144, 3, 1, 1}, ffn_moe_weights_norm-7 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0141, 3.9859, 7.9859, ...], [24575.9863, 24579.9863, 24583.9863, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_moe_up-7 = (f32) MUL_MAT_ID(blk.7.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2701, 3.7299, 7.7299, ...], [43007.7305, 43011.7305, 43015.7305, ...], [86015.7266, 86019.7266, 86023.7266, ...], ], ] sum = 387105.562500 ggml_debug: ffn_moe_gate-7 = (f32) MUL_MAT_ID(blk.7.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0008, 3.9992, 7.9992, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_silu-7 = (f32) UNARY(ffn_moe_gate-7{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0004, 3.9996, 7.9996, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_gate_par-7 = (f32) MUL(ffn_moe_up-7{10752, 3, 1, 1}, ffn_moe_silu-7{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0001, 4.0001, 8.0001, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-7 = (f32) MUL_MAT_ID(blk.7.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-7{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0299, 4.0299, 8.0299, ...], [24576.0293, 24580.0293, 24584.0293, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: ffn_moe_weights_norm-7 (view) = (f32) VIEW(ffn_moe_weights_norm-7{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2326], [ 16.2326], [ 32.2326], ], ] sum = 48.697918 ggml_debug: ffn_moe_weighted-7 = (f32) MUL(ffn_moe_down-7{6144, 3, 1, 1}, ffn_moe_weights_norm-7 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0070, 4.0070, 8.0070, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_out-7 = (f32) ADD(ffn_moe_weighted-7{6144, 3, 1, 1}, ffn_moe_weighted-7{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0071, 3.9929, 7.9929, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_up-7 = (f32) MUL_MAT_ID(blk.7.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6076, 4.6076, 8.6076, ...], [43008.6094, 43012.6094, 43016.6094, ...], [86016.6094, 86020.6094, 86024.6094, ...], ], ] sum = 387113.500000 ggml_debug: ffn_moe_gate-7 = (f32) MUL_MAT_ID(blk.7.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3967, 4.3967, 8.3967, ...], [43008.3984, 43012.3984, 43016.3984, ...], [86016.3984, 86020.3984, 86024.3984, ...], ], ] sum = 387111.593750 ggml_debug: ffn_moe_silu-7 = (f32) UNARY(ffn_moe_gate-7{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2372, 4.2372, 8.2372, ...], [43008.2383, 43012.2383, 43016.2383, ...], [86016.2344, 86020.2344, 86024.2344, ...], ], ] sum = 387110.125000 ggml_debug: ffn_moe_gate_par-7 = (f32) MUL(ffn_moe_up-7{10752, 3, 1, 1}, ffn_moe_silu-7{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1441, 4.1441, 8.1441, ...], [43008.1445, 43012.1445, 43016.1445, ...], [86016.1406, 86020.1406, 86024.1406, ...], ], ] sum = 387109.250000 ggml_debug: ffn_moe_down-7 = (f32) MUL_MAT_ID(blk.7.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-7{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1042, 3.8958, 7.8958, ...], [24575.8965, 24579.8965, 24583.8965, ...], [49151.8945, 49155.8945, 49159.8945, ...], ], ] sum = 221219.046875 ggml_debug: ffn_moe_weights_norm-7 (view) = (f32) VIEW(ffn_moe_weights_norm-7{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2226], [ 16.2226], [ 32.2226], ], ] sum = 48.667912 ggml_debug: ffn_moe_weighted-7 = (f32) MUL(ffn_moe_down-7{6144, 3, 1, 1}, ffn_moe_weights_norm-7 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0232, 3.9768, 7.9768, ...], [24575.9766, 24579.9766, 24583.9766, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_out-7 = (f32) ADD(ffn_moe_out-7{6144, 3, 1, 1}, ffn_moe_weighted-7{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0303, 3.9697, 7.9697, ...], [24575.9688, 24579.9688, 24583.9688, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], ] sum = 221219.718750 ggml_debug: ffn_moe_up-7 = (f32) MUL_MAT_ID(blk.7.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0300, 3.9700, 7.9700, ...], [43007.9688, 43011.9688, 43015.9688, ...], [86015.9688, 86019.9688, 86023.9688, ...], ], ] sum = 387107.718750 ggml_debug: ffn_moe_gate-7 = (f32) MUL_MAT_ID(blk.7.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-7{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3156, 4.3156, 8.3156, ...], [43008.3164, 43012.3164, 43016.3164, ...], [86016.3125, 86020.3125, 86024.3125, ...], ], ] sum = 387110.812500 ggml_debug: ffn_moe_silu-7 = (f32) UNARY(ffn_moe_gate-7{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1825, 4.1825, 8.1825, ...], [43008.1836, 43012.1836, 43016.1836, ...], [86016.1797, 86020.1797, 86024.1797, ...], ], ] sum = 387109.656250 ggml_debug: ffn_moe_gate_par-7 = (f32) MUL(ffn_moe_up-7{10752, 3, 1, 1}, ffn_moe_silu-7{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0055, 3.9945, 7.9945, ...], [43007.9961, 43011.9961, 43015.9961, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.968750 ggml_debug: ffn_moe_down-7 = (f32) MUL_MAT_ID(blk.7.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-7{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0437, 3.9563, 7.9563, ...], [24575.9570, 24579.9570, 24583.9570, ...], [49151.9570, 49155.9570, 49159.9570, ...], ], ] sum = 221219.593750 ggml_debug: ffn_moe_weights_norm-7 (view) = (f32) VIEW(ffn_moe_weights_norm-7{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2127], [ 16.2127], [ 32.2127], ], ] sum = 48.638103 ggml_debug: ffn_moe_weighted-7 = (f32) MUL(ffn_moe_down-7{6144, 3, 1, 1}, ffn_moe_weights_norm-7 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0093, 3.9907, 7.9907, ...], [24575.9902, 24579.9902, 24583.9902, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_out-7 = (f32) ADD(ffn_moe_out-7{6144, 3, 1, 1}, ffn_moe_weighted-7{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0396, 3.9604, 7.9604, ...], [24575.9609, 24579.9609, 24583.9609, ...], [49151.9609, 49155.9609, 49159.9609, ...], ], ] sum = 221219.656250 ggml_debug: ffn_inp-7 = (f32) ADD(kqv_out-7{6144, 3, 1, 1}, l_out-6{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2089, 4.2089, 8.2089, ...], [24576.2090, 24580.2090, 24584.2090, ...], [49152.2070, 49156.2070, 49160.2070, ...], ], ] sum = 221221.875000 ggml_debug: l_out-7 = (f32) ADD(ffn_moe_out-7{6144, 3, 1, 1}, ffn_inp-7{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1693, 4.1693, 8.1693, ...], [24576.1699, 24580.1699, 24584.1699, ...], [49152.1680, 49156.1680, 49160.1680, ...], ], ] sum = 221221.531250 ggml_debug: norm-8 = (f32) NORM(l_out-7{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 1.1495, 5.1495, 9.1495, ...], [24577.1504, 24581.1504, 24585.1504, ...], [49153.1484, 49157.1484, 49161.1484, ...], ], ] sum = 221230.343750 ggml_debug: attn_norm-8 = (f32) MUL(norm-8{6144, 3, 1, 1}, blk.8.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1706, 4.1706, 8.1706, ...], [24576.1699, 24580.1699, 24584.1699, ...], [49152.1719, 49156.1719, 49160.1719, ...], ], ] sum = 221221.546875 ggml_debug: wqkv-8 = (f32) MUL_MAT(blk.8.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-8{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.0379, 4.0379, 8.0379, ...], [32768.0391, 32772.0391, 32776.0391, ...], [65536.0391, 65540.0391, 65544.0391, ...], ], ] sum = 294948.343750 ggml_debug: wqkv_clamped-8 = (f32) CLAMP(wqkv-8{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.0379, 4.0379, 8.0379, ...], [32768.0391, 32772.0391, 32776.0391, ...], [65536.0391, 65540.0391, 65544.0391, ...], ], ] sum = 294948.343750 ggml_debug: wqkv_clamped-8 (view) = (f32) VIEW(wqkv_clamped-8{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0379, 4.0379, 8.0379, ...], [32768.0391, 32772.0391, 32776.0391, ...], [65536.0391, 65540.0391, 65544.0391, ...], ], ] sum = 294948.343750 ggml_debug: Qcur-8 = (f32) CONT(wqkv_clamped-8 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0379, 4.0379, 8.0379, ...], [24576.0371, 24580.0371, 24584.0371, ...], [49152.0391, 49156.0391, 49160.0391, ...], ], ] sum = 221220.343750 ggml_debug: Qcur-8 (reshaped) = (f32) RESHAPE(Qcur-8{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0379, 4.0379, 8.0379, ...], [512.0380, 516.0380, 520.0380, ...], [1024.0380, 1028.0380, 1032.0380, ...], ... ], [ [24576.0371, 24580.0371, 24584.0371, ...], [25088.0371, 25092.0371, 25096.0371, ...], [25600.0371, 25604.0371, 25608.0371, ...], ... ], [ [49152.0391, 49156.0391, 49160.0391, ...], [49664.0391, 49668.0391, 49672.0391, ...], [50176.0391, 50180.0391, 50184.0391, ...], ... ], ] sum = 677485.062500 ggml_debug: Qcur-8 = (f32) ROPE(Qcur-8 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.0379, 4.0379, 8.0379, ...], [512.0380, 516.0380, 520.0380, ...], [1024.0380, 1028.0380, 1032.0380, ...], ... ], [ [24576.0371, 24580.0371, 24584.0371, ...], [25088.0371, 25092.0371, 25096.0371, ...], [25600.0371, 25604.0371, 25608.0371, ...], ... ], [ [49152.0391, 49156.0391, 49160.0391, ...], [49664.0391, 49668.0391, 49672.0391, ...], [50176.0391, 50180.0391, 50184.0391, ...], ... ], ] sum = 677485.062500 ggml_debug: wqkv_clamped-8 (view) = (f32) VIEW(wqkv_clamped-8{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.4566, 5.4566, 9.4566, ...], [32769.4570, 32773.4570, 32777.4570, ...], [65537.4531, 65541.4531, 65545.4531, ...], ], ] sum = 294961.093750 ggml_debug: Kcur-8 = (f32) CONT(wqkv_clamped-8 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.4566, 5.4566, 9.4566, ...], [4097.4565, 4101.4565, 4105.4565, ...], [8193.4570, 8197.4570, 8201.4570, ...], ], ] sum = 36913.109375 ggml_debug: Kcur-8 (reshaped) = (f32) RESHAPE(Kcur-8{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 1.4566, 5.4566, 9.4566, ...], [513.4566, 517.4566, 521.4566, ...], [1025.4567, 1029.4567, 1033.4567, ...], ... ], [ [4097.4565, 4101.4565, 4105.4565, ...], [4609.4565, 4613.4565, 4617.4565, ...], [5121.4565, 5125.4565, 5129.4565, ...], ... ], [ [8193.4570, 8197.4570, 8201.4570, ...], [8705.4570, 8709.4570, 8713.4570, ...], [9217.4570, 9221.4570, 9225.4570, ...], ... ], ] sum = 124563.312500 ggml_debug: Kcur-8 = (f32) ROPE(Kcur-8 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 1.4566, 5.4566, 9.4566, ...], [513.4566, 517.4566, 521.4566, ...], [1025.4567, 1029.4567, 1033.4567, ...], ... ], [ [4097.4565, 4101.4565, 4105.4565, ...], [4609.4565, 4613.4565, 4617.4565, ...], [5121.4565, 5125.4565, 5129.4565, ...], ... ], [ [8193.4570, 8197.4570, 8201.4570, ...], [8705.4570, 8709.4570, 8713.4570, ...], [9217.4570, 9221.4570, 9225.4570, ...], ... ], ] sum = 124563.312500 ggml_debug: wqkv_clamped-8 (view) = (f32) VIEW(wqkv_clamped-8{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0406, 4.0406, 8.0406, ...], [32768.0391, 32772.0391, 32776.0391, ...], [65536.0391, 65540.0391, 65544.0391, ...], ], ] sum = 294948.343750 ggml_debug: Vcur-8 = (f32) CONT(wqkv_clamped-8 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0406, 4.0406, 8.0406, ...], [4096.0405, 4100.0405, 4104.0405, ...], [8192.0410, 8196.0410, 8200.0410, ...], ], ] sum = 36900.367188 ggml_debug: k_cache_view-8 = (f16) VIEW(cache_k_l8{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-8 (copy of Kcur-8) = (f16) CPY(Kcur-8{128, 8, 3, 1}, k_cache_view-8{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 1.4570, 1.4590, 1.4609, ...], ], ] sum = 4.376953 ggml_debug: v_cur_t-8 = (f32) TRANSPOSE(Vcur-8{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0406, 4096.0405, 8192.0410], [ 4.0406, 4100.0405, 8196.0410], [ 8.0406, 4104.0405, 8200.0410], ... ], ] sum = 36900.367188 ggml_debug: v_cache_view-8 = (f16) VIEW(cache_v_l8{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-8 (copy of v_cur_t-8) = (f16) CPY(v_cur_t-8{3, 1024, 1, 1}, v_cache_view-8{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0406, 0.0407, 0.0407], [ 0.0812, 0.0814, 0.0815], [ 0.1625, 0.1627, 0.1630], ... ], ] sum = 0.854279 ggml_debug: v-8 = (f16) VIEW(cache_v_l8{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0406, 0.0407, 0.0407, ...], [ 0.0812, 0.0814, 0.0815, ...], [ 0.1625, 0.1627, 0.1630, ...], ... ], [ [ 0.0406, 0.0407, 0.0407, ...], [ 0.0812, 0.0814, 0.0815, ...], [ 0.1625, 0.1627, 0.1630, ...], ... ], [ [ 0.0406, 0.0407, 0.0407, ...], [ 0.0812, 0.0814, 0.0815, ...], [ 0.1625, 0.1627, 0.1630, ...], ... ], ... ] sum = 2.562836 ggml_debug: k-8 = (f16) VIEW(cache_k_l8{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 1.4570, 1.4590, 1.4609, ...], [ 5.8281, 5.8359, 5.8438, ...], [ 23.3125, 23.3438, 23.3750, ...], ... ], [ [ 1.7070, 1.7090, 1.7109, ...], [ 6.8281, 6.8359, 6.8438, ...], [ 27.3125, 27.3438, 27.3750, ...], ... ], [ [ 1.9570, 1.9590, 1.9609, ...], [ 7.8281, 7.8359, 7.8438, ...], [ 31.3125, 31.3438, 31.3750, ...], ... ], ... ] sum = 322.998047 ggml_debug: q-8 = (f32) PERMUTE(Qcur-8{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.0379, 4.0379, 8.0379, ...], [24576.0371, 24580.0371, 24584.0371, ...], [49152.0391, 49156.0391, 49160.0391, ...], ], [ [512.0380, 516.0380, 520.0380, ...], [25088.0371, 25092.0371, 25096.0371, ...], [49664.0391, 49668.0391, 49672.0391, ...], ], [ [1024.0380, 1028.0380, 1032.0380, ...], [25600.0371, 25604.0371, 25608.0371, ...], [50176.0391, 50180.0391, 50184.0391, ...], ], ... ] sum = 677485.000000 ggml_debug: kq-8 = (f32) MUL_MAT(k-8{128, 32, 8, 1}, q-8{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 30.7812, 34.7812, 38.7812, ...], [158.7812, 162.7812, 166.7812, ...], [286.7812, 290.7812, 294.7812, ...], ], [ [414.7812, 418.7812, 422.7812, ...], [542.7812, 546.7812, 550.7812, ...], [670.7812, 674.7812, 678.7812, ...], ], [ [798.7812, 802.7812, 806.7812, ...], [926.7812, 930.7812, 934.7812, ...], [1054.7812, 1058.7812, 1062.7812, ...], ], ... ] sum = 14763.093750 ggml_debug: kq_soft_max_ext-8 = (f32) SOFT_MAX(kq-8{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-8 = (f32) MUL_MAT(v-8{32, 128, 8, 1}, kq_soft_max_ext-8{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0406, 4.0406, 8.0406, ...], [512.0406, 516.0406, 520.0406, ...], [1024.0406, 1028.0406, 1032.0406, ...], ], [ [1536.0406, 1540.0406, 1544.0406, ...], [2048.0405, 2052.0405, 2056.0405, ...], [2560.0405, 2564.0405, 2568.0405, ...], ], [ [3072.0405, 3076.0405, 3080.0405, ...], [3584.0405, 3588.0405, 3592.0405, ...], [4096.0405, 4100.0405, 4104.0405, ...], ], ... ] sum = 55405.089844 ggml_debug: kqv_merged-8 = (f32) PERMUTE(kqv-8{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0406, 4.0406, 8.0406, ...], [1536.0406, 1540.0406, 1544.0406, ...], [3072.0405, 3076.0405, 3080.0405, ...], ... ], [ [512.0406, 516.0406, 520.0406, ...], [2048.0405, 2052.0405, 2056.0405, ...], [3584.0405, 3588.0405, 3592.0405, ...], ... ], [ [1024.0406, 1028.0406, 1032.0406, ...], [2560.0405, 2564.0405, 2568.0405, ...], [4096.0405, 4100.0405, 4104.0405, ...], ... ], ] sum = 55405.085938 ggml_debug: kqv_merged_cont-8 = (f32) CONT(kqv_merged-8{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0406, 4.0406, 8.0406, ...], [24576.0410, 24580.0410, 24584.0410, ...], [49152.0391, 49156.0391, 49160.0391, ...], ], ] sum = 221220.343750 ggml_debug: kqv_out-8 = (f32) MUL_MAT(blk.8.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-8{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1503, 3.8497, 7.8497, ...], [24575.8496, 24579.8496, 24583.8496, ...], [49151.8516, 49155.8516, 49159.8516, ...], ], ] sum = 221218.656250 ggml_debug: norm-8 = (f32) NORM(kqv_out-8{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.1790, 2.8210, 6.8210, ...], [24574.8203, 24578.8203, 24582.8203, ...], [49150.8203, 49154.8203, 49158.8203, ...], ], ] sum = 221209.375000 ggml_debug: attn_out_norm-8 = (f32) MUL(norm-8{6144, 3, 1, 1}, blk.8.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2694, 3.7306, 7.7306, ...], [24575.7305, 24579.7305, 24583.7305, ...], [49151.7305, 49155.7305, 49159.7305, ...], ], ] sum = 221217.578125 ggml_debug: ffn_moe_logits-8 = (f32) MUL_MAT(blk.8.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.1785, 3.8215, 7.8215, ...], [ 63.8215, 67.8215, 71.8215, ...], [127.8215, 131.8215, 135.8215, ...], ], ] sum = 610.393799 ggml_debug: ffn_moe_probs-8 = (f32) SOFT_MAX(ffn_moe_logits-8{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0503, 4.0503, 8.0503, ...], [ 64.0503, 68.0503, 72.0503, ...], [128.0503, 132.0503, 136.0503, ...], ], ] sum = 612.452759 ggml_debug: ffn_moe_argsort-8 = (i32) ARGSORT(ffn_moe_probs-8{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 12.0000, 16.0000, 20.0000, ...], [ 76.0000, 80.0000, 84.0000, ...], [140.0000, 144.0000, 148.0000, ...], ], ] sum = 720.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-8{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 12.0000, 16.0000, 20.0000, ...], [ 76.0000, 80.0000, 84.0000, ...], [140.0000, 144.0000, 148.0000, ...], ], ] sum = 720.000000 ggml_debug: ffn_moe_up-8 = (f32) MUL_MAT_ID(blk.8.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1747, 4.1747, 8.1747, ...], [43008.1758, 43012.1758, 43016.1758, ...], [86016.1719, 86020.1719, 86024.1719, ...], ], ] sum = 387109.562500 ggml_debug: ffn_moe_gate-8 = (f32) MUL_MAT_ID(blk.8.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2161, 3.7839, 7.7839, ...], [43007.7852, 43011.7852, 43015.7852, ...], [86015.7812, 86019.7812, 86023.7812, ...], ], ] sum = 387106.031250 ggml_debug: ffn_moe_silu-8 = (f32) UNARY(ffn_moe_gate-8{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0964, 3.9036, 7.9036, ...], [43007.9023, 43011.9023, 43015.9023, ...], [86015.9062, 86019.9062, 86023.9062, ...], ], ] sum = 387107.156250 ggml_debug: ffn_moe_gate_par-8 = (f32) MUL(ffn_moe_up-8{10752, 3, 1, 1}, ffn_moe_silu-8{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0168, 3.9832, 7.9832, ...], [43007.9844, 43011.9844, 43015.9844, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_down-8 = (f32) MUL_MAT_ID(blk.8.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-8{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0895, 4.0895, 8.0895, ...], [24576.0898, 24580.0898, 24584.0898, ...], [49152.0898, 49156.0898, 49160.0898, ...], ], ] sum = 221220.812500 ggml_debug: ffn_moe_probs-8 (reshaped) = (f32) RESHAPE(ffn_moe_probs-8{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0503], [ 4.0503], [ 8.0503], ... ], [ [ 64.0503], [ 68.0503], [ 72.0503], ... ], [ [128.0503], [132.0503], [136.0503], ... ], ] sum = 612.452759 ggml_debug: ffn_moe_weights-8 = (f32) GET_ROWS(ffn_moe_probs-8 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1101], [ 4.1101], [ 8.1101], ... ], [ [ 16.1101], [ 20.1101], [ 24.1101], ... ], [ [ 32.1101], [ 36.1101], [ 40.1101], ... ], ] sum = 180.990570 ggml_debug: ffn_moe_weights-8 (reshaped) = (f32) RESHAPE(ffn_moe_weights-8{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1101, 4.1101, 8.1101, ...], [ 16.1101, 20.1101, 24.1101, ...], [ 32.1101, 36.1101, 40.1101, ...], ], ] sum = 180.990570 ggml_debug: ffn_moe_weights_sum-8 = (f32) SUM_ROWS(ffn_moe_weights-8 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3367], [ 4.3367], [ 8.3367], ], ] sum = 13.010173 ggml_debug: ffn_moe_weights_norm-8 = (f32) DIV(ffn_moe_weights-8 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-8{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3269, 4.3269, 8.3269, ...], [ 16.3269, 20.3269, 24.3269, ...], [ 32.3269, 36.3269, 40.3269, ...], ], ] sum = 182.941833 ggml_debug: ffn_moe_weights_norm-8 (view) = (f32) VIEW(ffn_moe_weights_norm-8{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3269], [ 16.3269], [ 32.3269], ], ] sum = 48.980610 ggml_debug: ffn_moe_weighted-8 = (f32) MUL(ffn_moe_down-8{6144, 3, 1, 1}, ffn_moe_weights_norm-8 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0293, 4.0293, 8.0293, ...], [24576.0293, 24580.0293, 24584.0293, ...], [49152.0273, 49156.0273, 49160.0273, ...], ], ] sum = 221220.265625 ggml_debug: ffn_moe_up-8 = (f32) MUL_MAT_ID(blk.8.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1502, 4.1502, 8.1502, ...], [43008.1484, 43012.1484, 43016.1484, ...], [86016.1484, 86020.1484, 86024.1484, ...], ], ] sum = 387109.343750 ggml_debug: ffn_moe_gate-8 = (f32) MUL_MAT_ID(blk.8.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1631, 3.8369, 7.8369, ...], [43007.8359, 43011.8359, 43015.8359, ...], [86015.8359, 86019.8359, 86023.8359, ...], ], ] sum = 387106.531250 ggml_debug: ffn_moe_silu-8 = (f32) UNARY(ffn_moe_gate-8{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0749, 3.9251, 7.9251, ...], [43007.9258, 43011.9258, 43015.9258, ...], [86015.9219, 86019.9219, 86023.9219, ...], ], ] sum = 387107.312500 ggml_debug: ffn_moe_gate_par-8 = (f32) MUL(ffn_moe_up-8{10752, 3, 1, 1}, ffn_moe_silu-8{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0113, 3.9887, 7.9887, ...], [43007.9883, 43011.9883, 43015.9883, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.906250 ggml_debug: ffn_moe_down-8 = (f32) MUL_MAT_ID(blk.8.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-8{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0215, 3.9785, 7.9785, ...], [24575.9785, 24579.9785, 24583.9785, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_weights_norm-8 (view) = (f32) VIEW(ffn_moe_weights_norm-8{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2393], [ 16.2393], [ 32.2393], ], ] sum = 48.717957 ggml_debug: ffn_moe_weighted-8 = (f32) MUL(ffn_moe_down-8{6144, 3, 1, 1}, ffn_moe_weights_norm-8 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0051, 3.9949, 7.9949, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: ffn_moe_out-8 = (f32) ADD(ffn_moe_weighted-8{6144, 3, 1, 1}, ffn_moe_weighted-8{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0241, 4.0241, 8.0241, ...], [24576.0234, 24580.0234, 24584.0234, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: ffn_moe_up-8 = (f32) MUL_MAT_ID(blk.8.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1241, 3.8759, 7.8759, ...], [43007.8750, 43011.8750, 43015.8750, ...], [86015.8750, 86019.8750, 86023.8750, ...], ], ] sum = 387106.875000 ggml_debug: ffn_moe_gate-8 = (f32) MUL_MAT_ID(blk.8.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1389, 4.1389, 8.1389, ...], [43008.1406, 43012.1406, 43016.1406, ...], [86016.1406, 86020.1406, 86024.1406, ...], ], ] sum = 387109.250000 ggml_debug: ffn_moe_silu-8 = (f32) UNARY(ffn_moe_gate-8{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0742, 4.0742, 8.0742, ...], [43008.0742, 43012.0742, 43016.0742, ...], [86016.0781, 86020.0781, 86024.0781, ...], ], ] sum = 387108.687500 ggml_debug: ffn_moe_gate_par-8 = (f32) MUL(ffn_moe_up-8{10752, 3, 1, 1}, ffn_moe_silu-8{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0092, 3.9908, 7.9908, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-8 = (f32) MUL_MAT_ID(blk.8.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-8{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0221, 3.9779, 7.9779, ...], [24575.9785, 24579.9785, 24583.9785, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_weights_norm-8 (view) = (f32) VIEW(ffn_moe_weights_norm-8{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2192], [ 16.2192], [ 32.2192], ], ] sum = 48.657707 ggml_debug: ffn_moe_weighted-8 = (f32) MUL(ffn_moe_down-8{6144, 3, 1, 1}, ffn_moe_weights_norm-8 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0049, 3.9951, 7.9951, ...], [24575.9961, 24579.9961, 24583.9961, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: ffn_moe_out-8 = (f32) ADD(ffn_moe_out-8{6144, 3, 1, 1}, ffn_moe_weighted-8{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0192, 4.0192, 8.0192, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: ffn_moe_up-8 = (f32) MUL_MAT_ID(blk.8.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2885, 4.2885, 8.2885, ...], [43008.2891, 43012.2891, 43016.2891, ...], [86016.2891, 86020.2891, 86024.2891, ...], ], ] sum = 387110.593750 ggml_debug: ffn_moe_gate-8 = (f32) MUL_MAT_ID(blk.8.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-8{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1056, 3.8944, 7.8944, ...], [43007.8945, 43011.8945, 43015.8945, ...], [86015.8906, 86019.8906, 86023.8906, ...], ], ] sum = 387107.000000 ggml_debug: ffn_moe_silu-8 = (f32) UNARY(ffn_moe_gate-8{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0500, 3.9500, 7.9500, ...], [43007.9492, 43011.9492, 43015.9492, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_gate_par-8 = (f32) MUL(ffn_moe_up-8{10752, 3, 1, 1}, ffn_moe_silu-8{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0144, 3.9856, 7.9856, ...], [43007.9844, 43011.9844, 43015.9844, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_down-8 = (f32) MUL_MAT_ID(blk.8.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-8{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0054, 3.9946, 7.9946, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: ffn_moe_weights_norm-8 (view) = (f32) VIEW(ffn_moe_weights_norm-8{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2146], [ 16.2146], [ 32.2146], ], ] sum = 48.643730 ggml_debug: ffn_moe_weighted-8 = (f32) MUL(ffn_moe_down-8{6144, 3, 1, 1}, ffn_moe_weights_norm-8 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0012, 3.9988, 7.9988, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-8 = (f32) ADD(ffn_moe_out-8{6144, 3, 1, 1}, ffn_moe_weighted-8{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0181, 4.0181, 8.0181, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: ffn_inp-8 = (f32) ADD(kqv_out-8{6144, 3, 1, 1}, l_out-7{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0190, 4.0190, 8.0190, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: l_out-8 = (f32) ADD(ffn_moe_out-8{6144, 3, 1, 1}, ffn_inp-8{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0371, 4.0371, 8.0371, ...], [24576.0371, 24580.0371, 24584.0371, ...], [49152.0352, 49156.0352, 49160.0352, ...], ], ] sum = 221220.328125 ggml_debug: norm-9 = (f32) NORM(l_out-8{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.2030, 4.2030, 8.2030, ...], [24576.2031, 24580.2031, 24584.2031, ...], [49152.2031, 49156.2031, 49160.2031, ...], ], ] sum = 221221.828125 ggml_debug: attn_norm-9 = (f32) MUL(norm-9{6144, 3, 1, 1}, blk.9.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0291, 4.0291, 8.0291, ...], [24576.0293, 24580.0293, 24584.0293, ...], [49152.0273, 49156.0273, 49160.0273, ...], ], ] sum = 221220.265625 ggml_debug: wqkv-9 = (f32) MUL_MAT(blk.9.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-9{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.2993, 3.7007, 7.7007, ...], [32767.7012, 32771.6992, 32775.6992, ...], [65535.6992, 65539.7031, 65543.7031, ...], ], ] sum = 294945.312500 ggml_debug: wqkv_clamped-9 = (f32) CLAMP(wqkv-9{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.2993, 3.7007, 7.7007, ...], [32767.7012, 32771.6992, 32775.6992, ...], [65535.6992, 65539.7031, 65543.7031, ...], ], ] sum = 294945.312500 ggml_debug: wqkv_clamped-9 (view) = (f32) VIEW(wqkv_clamped-9{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2993, 3.7007, 7.7007, ...], [32767.7012, 32771.6992, 32775.6992, ...], [65535.6992, 65539.7031, 65543.7031, ...], ], ] sum = 294945.312500 ggml_debug: Qcur-9 = (f32) CONT(wqkv_clamped-9 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2993, 3.7007, 7.7007, ...], [24575.7012, 24579.7012, 24583.7012, ...], [49151.6992, 49155.6992, 49159.6992, ...], ], ] sum = 221217.312500 ggml_debug: Qcur-9 (reshaped) = (f32) RESHAPE(Qcur-9{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.2993, 3.7007, 7.7007, ...], [511.7007, 515.7007, 519.7007, ...], [1023.7007, 1027.7007, 1031.7007, ...], ... ], [ [24575.7012, 24579.7012, 24583.7012, ...], [25087.7012, 25091.7012, 25095.7012, ...], [25599.7012, 25603.7012, 25607.7012, ...], ... ], [ [49151.6992, 49155.6992, 49159.6992, ...], [49663.6992, 49667.6992, 49671.6992, ...], [50175.6992, 50179.6992, 50183.6992, ...], ... ], ] sum = 677475.812500 ggml_debug: Qcur-9 = (f32) ROPE(Qcur-9 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.2993, 3.7007, 7.7007, ...], [511.7007, 515.7007, 519.7007, ...], [1023.7007, 1027.7007, 1031.7007, ...], ... ], [ [24575.7012, 24579.7012, 24583.7012, ...], [25087.7012, 25091.7012, 25095.7012, ...], [25599.7012, 25603.7012, 25607.7012, ...], ... ], [ [49151.6992, 49155.6992, 49159.6992, ...], [49663.6992, 49667.6992, 49671.6992, ...], [50175.6992, 50179.6992, 50183.6992, ...], ... ], ] sum = 677475.812500 ggml_debug: wqkv_clamped-9 (view) = (f32) VIEW(wqkv_clamped-9{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -2.6714, 1.3286, 5.3286, ...], [32765.3281, 32769.3281, 32773.3281, ...], [65533.3281, 65537.3281, 65541.3281, ...], ], ] sum = 294923.937500 ggml_debug: Kcur-9 = (f32) CONT(wqkv_clamped-9 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -2.6714, 1.3286, 5.3286, ...], [4093.3286, 4097.3286, 4101.3286, ...], [8189.3286, 8193.3281, 8197.3281, ...], ], ] sum = 36875.953125 ggml_debug: Kcur-9 (reshaped) = (f32) RESHAPE(Kcur-9{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -2.6714, 1.3286, 5.3286, ...], [509.3286, 513.3286, 517.3286, ...], [1021.3286, 1025.3286, 1029.3286, ...], ... ], [ [4093.3286, 4097.3286, 4101.3286, ...], [4605.3286, 4609.3286, 4613.3286, ...], [5117.3286, 5121.3286, 5125.3286, ...], ... ], [ [8189.3286, 8193.3281, 8197.3281, ...], [8701.3281, 8705.3281, 8709.3281, ...], [9213.3281, 9217.3281, 9221.3281, ...], ... ], ] sum = 124451.859375 ggml_debug: Kcur-9 = (f32) ROPE(Kcur-9 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -2.6714, 1.3286, 5.3286, ...], [509.3286, 513.3286, 517.3286, ...], [1021.3286, 1025.3286, 1029.3286, ...], ... ], [ [4093.3286, 4097.3286, 4101.3286, ...], [4605.3286, 4609.3286, 4613.3286, ...], [5117.3286, 5121.3286, 5125.3286, ...], ... ], [ [8189.3286, 8193.3281, 8197.3281, ...], [8701.3281, 8705.3281, 8709.3281, ...], [9213.3281, 9217.3281, 9221.3281, ...], ... ], ] sum = 124451.859375 ggml_debug: wqkv_clamped-9 (view) = (f32) VIEW(wqkv_clamped-9{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0192, 3.9808, 7.9808, ...], [32767.9805, 32771.9805, 32775.9805, ...], [65535.9805, 65539.9844, 65543.9844, ...], ], ] sum = 294947.843750 ggml_debug: Vcur-9 = (f32) CONT(wqkv_clamped-9 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0192, 3.9808, 7.9808, ...], [4095.9807, 4099.9810, 4103.9810, ...], [8191.9810, 8195.9805, 8199.9805, ...], ], ] sum = 36899.828125 ggml_debug: k_cache_view-9 = (f16) VIEW(cache_k_l9{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-9 (copy of Kcur-9) = (f16) CPY(Kcur-9{128, 8, 3, 1}, k_cache_view-9{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -2.6719, -2.6758, -2.6797, ...], ], ] sum = -8.027344 ggml_debug: v_cur_t-9 = (f32) TRANSPOSE(Vcur-9{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0192, 4095.9807, 8191.9810], [ 3.9808, 4099.9810, 8195.9805], [ 7.9808, 4103.9810, 8199.9805], ... ], ] sum = 36899.828125 ggml_debug: v_cache_view-9 = (f16) VIEW(cache_v_l9{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-9 (copy of v_cur_t-9) = (f16) CPY(v_cur_t-9{3, 1024, 1, 1}, v_cache_view-9{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0192, -0.0193, -0.0193], [ -0.0385, -0.0385, -0.0386], [ -0.0769, -0.0770, -0.0771], ... ], ] sum = -0.404388 ggml_debug: v-9 = (f16) VIEW(cache_v_l9{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0192, -0.0193, -0.0193, ...], [ -0.0385, -0.0385, -0.0386, ...], [ -0.0769, -0.0770, -0.0771, ...], ... ], [ [ -0.0192, -0.0193, -0.0193, ...], [ -0.0385, -0.0385, -0.0386, ...], [ -0.0769, -0.0770, -0.0771, ...], ... ], [ [ -0.0192, -0.0193, -0.0193, ...], [ -0.0385, -0.0385, -0.0386, ...], [ -0.0769, -0.0770, -0.0771, ...], ... ], ... ] sum = -1.213165 ggml_debug: k-9 = (f16) VIEW(cache_k_l9{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -2.6719, -2.6758, -2.6797, ...], [-10.6875, -10.7031, -10.7188, ...], [-42.7500, -42.8125, -42.8750, ...], ... ], [ [ -3.1719, -3.1758, -3.1797, ...], [-12.6875, -12.7031, -12.7188, ...], [-50.7500, -50.8125, -50.8750, ...], ... ], [ [ -3.6719, -3.6758, -3.6797, ...], [-14.6875, -14.7031, -14.7188, ...], [-58.7500, -58.8125, -58.8750, ...], ... ], ... ] sum = -600.222656 ggml_debug: q-9 = (f32) PERMUTE(Qcur-9{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.2993, 3.7007, 7.7007, ...], [24575.7012, 24579.7012, 24583.7012, ...], [49151.6992, 49155.6992, 49159.6992, ...], ], [ [511.7007, 515.7007, 519.7007, ...], [25087.7012, 25091.7012, 25095.7012, ...], [49663.6992, 49667.6992, 49671.6992, ...], ], [ [1023.7007, 1027.7007, 1031.7007, ...], [25599.7012, 25603.7012, 25607.7012, ...], [50175.6992, 50179.6992, 50183.6992, ...], ], ... ] sum = 677475.750000 ggml_debug: kq-9 = (f32) MUL_MAT(k-9{128, 32, 8, 1}, q-9{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 18.7812, 22.7812, 26.7812, ...], [146.7812, 150.7812, 154.7812, ...], [274.7812, 278.7812, 282.7812, ...], ], [ [402.7812, 406.7812, 410.7812, ...], [530.7812, 534.7812, 538.7812, ...], [658.7812, 662.7812, 666.7812, ...], ], [ [786.7812, 790.7812, 794.7812, ...], [914.7812, 918.7812, 922.7812, ...], [1042.7812, 1046.7812, 1050.7812, ...], ], ... ] sum = 14439.093750 ggml_debug: kq_soft_max_ext-9 = (f32) SOFT_MAX(kq-9{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-9 = (f32) MUL_MAT(v-9{32, 128, 8, 1}, kq_soft_max_ext-9{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0192, 3.9808, 7.9808, ...], [511.9808, 515.9808, 519.9808, ...], [1023.9808, 1027.9807, 1031.9807, ...], ], [ [1535.9807, 1539.9807, 1543.9807, ...], [2047.9807, 2051.9807, 2055.9807, ...], [2559.9807, 2563.9807, 2567.9807, ...], ], [ [3071.9807, 3075.9807, 3079.9807, ...], [3583.9807, 3587.9807, 3591.9807, ...], [4095.9807, 4099.9810, 4103.9810, ...], ], ... ] sum = 55403.476562 ggml_debug: kqv_merged-9 = (f32) PERMUTE(kqv-9{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0192, 3.9808, 7.9808, ...], [1535.9807, 1539.9807, 1543.9807, ...], [3071.9807, 3075.9807, 3079.9807, ...], ... ], [ [511.9808, 515.9808, 519.9808, ...], [2047.9807, 2051.9807, 2055.9807, ...], [3583.9807, 3587.9807, 3591.9807, ...], ... ], [ [1023.9808, 1027.9807, 1031.9807, ...], [2559.9807, 2563.9807, 2567.9807, ...], [4095.9807, 4099.9810, 4103.9810, ...], ... ], ] sum = 55403.476562 ggml_debug: kqv_merged_cont-9 = (f32) CONT(kqv_merged-9{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0192, 3.9808, 7.9808, ...], [24575.9805, 24579.9805, 24583.9805, ...], [49151.9805, 49155.9805, 49159.9805, ...], ], ] sum = 221219.828125 ggml_debug: kqv_out-9 = (f32) MUL_MAT(blk.9.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-9{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1219, 3.8781, 7.8781, ...], [24575.8789, 24579.8789, 24583.8789, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], ] sum = 221218.890625 ggml_debug: norm-9 = (f32) NORM(kqv_out-9{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.2966, 2.7034, 6.7034, ...], [24574.7031, 24578.7031, 24582.7031, ...], [49150.7031, 49154.7031, 49158.7031, ...], ], ] sum = 221208.328125 ggml_debug: attn_out_norm-9 = (f32) MUL(norm-9{6144, 3, 1, 1}, blk.9.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3178, 3.6822, 7.6822, ...], [24575.6816, 24579.6816, 24583.6816, ...], [49151.6836, 49155.6836, 49159.6836, ...], ], ] sum = 221217.156250 ggml_debug: ffn_moe_logits-9 = (f32) MUL_MAT(blk.9.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.2581, 4.2581, 8.2581, ...], [ 64.2581, 68.2581, 72.2581, ...], [128.2581, 132.2581, 136.2581, ...], ], ] sum = 614.322510 ggml_debug: ffn_moe_probs-9 = (f32) SOFT_MAX(ffn_moe_logits-9{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0780, 4.0780, 8.0780, ...], [ 64.0780, 68.0780, 72.0780, ...], [128.0780, 132.0780, 136.0780, ...], ], ] sum = 612.702332 ggml_debug: ffn_moe_argsort-9 = (i32) ARGSORT(ffn_moe_probs-9{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 8.0000, 12.0000, 16.0000, ...], [ 72.0000, 76.0000, 80.0000, ...], [136.0000, 140.0000, 144.0000, ...], ], ] sum = 684.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-9{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 8.0000, 12.0000, 16.0000, ...], [ 72.0000, 76.0000, 80.0000, ...], [136.0000, 140.0000, 144.0000, ...], ], ] sum = 684.000000 ggml_debug: ffn_moe_up-9 = (f32) MUL_MAT_ID(blk.9.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0843, 3.9157, 7.9157, ...], [43007.9141, 43011.9141, 43015.9141, ...], [86015.9141, 86019.9141, 86023.9141, ...], ], ] sum = 387107.218750 ggml_debug: ffn_moe_gate-9 = (f32) MUL_MAT_ID(blk.9.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0204, 4.0204, 8.0204, ...], [43008.0195, 43012.0195, 43016.0195, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.218750 ggml_debug: ffn_moe_silu-9 = (f32) UNARY(ffn_moe_gate-9{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0103, 4.0103, 8.0103, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_gate_par-9 = (f32) MUL(ffn_moe_up-9{10752, 3, 1, 1}, ffn_moe_silu-9{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0009, 3.9991, 7.9991, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-9 = (f32) MUL_MAT_ID(blk.9.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-9{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0136, 3.9864, 7.9864, ...], [24575.9863, 24579.9863, 24583.9863, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.875000 ggml_debug: ffn_moe_probs-9 (reshaped) = (f32) RESHAPE(ffn_moe_probs-9{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0780], [ 4.0780], [ 8.0780], ... ], [ [ 64.0780], [ 68.0780], [ 72.0780], ... ], [ [128.0780], [132.0780], [136.0780], ... ], ] sum = 612.702332 ggml_debug: ffn_moe_weights-9 = (f32) GET_ROWS(ffn_moe_probs-9 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0965], [ 4.0965], [ 8.0965], ... ], [ [ 16.0965], [ 20.0965], [ 24.0965], ... ], [ [ 32.0965], [ 36.0965], [ 40.0965], ... ], ] sum = 180.868774 ggml_debug: ffn_moe_weights-9 (reshaped) = (f32) RESHAPE(ffn_moe_weights-9{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0965, 4.0965, 8.0965, ...], [ 16.0965, 20.0965, 24.0965, ...], [ 32.0965, 36.0965, 40.0965, ...], ], ] sum = 180.868774 ggml_debug: ffn_moe_weights_sum-9 = (f32) SUM_ROWS(ffn_moe_weights-9 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3247], [ 4.3247], [ 8.3247], ], ] sum = 12.974040 ggml_debug: ffn_moe_weights_norm-9 = (f32) DIV(ffn_moe_weights-9 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-9{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2973, 4.2973, 8.2973, ...], [ 16.2973, 20.2973, 24.2973, ...], [ 32.2973, 36.2973, 40.2973, ...], ], ] sum = 182.675781 ggml_debug: ffn_moe_weights_norm-9 (view) = (f32) VIEW(ffn_moe_weights_norm-9{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2973], [ 16.2973], [ 32.2973], ], ] sum = 48.891933 ggml_debug: ffn_moe_weighted-9 = (f32) MUL(ffn_moe_down-9{6144, 3, 1, 1}, ffn_moe_weights_norm-9 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0040, 3.9960, 7.9960, ...], [24575.9961, 24579.9961, 24583.9961, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: ffn_moe_up-9 = (f32) MUL_MAT_ID(blk.9.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4672, 4.4672, 8.4672, ...], [43008.4688, 43012.4688, 43016.4688, ...], [86016.4688, 86020.4688, 86024.4688, ...], ], ] sum = 387112.218750 ggml_debug: ffn_moe_gate-9 = (f32) MUL_MAT_ID(blk.9.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5068, 4.5068, 8.5068, ...], [43008.5078, 43012.5078, 43016.5078, ...], [86016.5078, 86020.5078, 86024.5078, ...], ], ] sum = 387112.562500 ggml_debug: ffn_moe_silu-9 = (f32) UNARY(ffn_moe_gate-9{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.3163, 4.3163, 8.3163, ...], [43008.3164, 43012.3164, 43016.3164, ...], [86016.3125, 86020.3125, 86024.3125, ...], ], ] sum = 387110.812500 ggml_debug: ffn_moe_gate_par-9 = (f32) MUL(ffn_moe_up-9{10752, 3, 1, 1}, ffn_moe_silu-9{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1478, 4.1478, 8.1478, ...], [43008.1484, 43012.1484, 43016.1484, ...], [86016.1484, 86020.1484, 86024.1484, ...], ], ] sum = 387109.343750 ggml_debug: ffn_moe_down-9 = (f32) MUL_MAT_ID(blk.9.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-9{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0424, 4.0424, 8.0424, ...], [24576.0430, 24580.0430, 24584.0430, ...], [49152.0430, 49156.0430, 49160.0430, ...], ], ] sum = 221220.390625 ggml_debug: ffn_moe_weights_norm-9 (view) = (f32) VIEW(ffn_moe_weights_norm-9{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2404], [ 16.2404], [ 32.2404], ], ] sum = 48.721077 ggml_debug: ffn_moe_weighted-9 = (f32) MUL(ffn_moe_down-9{6144, 3, 1, 1}, ffn_moe_weights_norm-9 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0102, 4.0102, 8.0102, ...], [24576.0098, 24580.0098, 24584.0098, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.109375 ggml_debug: ffn_moe_out-9 = (f32) ADD(ffn_moe_weighted-9{6144, 3, 1, 1}, ffn_moe_weighted-9{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0062, 4.0062, 8.0062, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_up-9 = (f32) MUL_MAT_ID(blk.9.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0574, 4.0574, 8.0574, ...], [43008.0586, 43012.0586, 43016.0586, ...], [86016.0547, 86020.0547, 86024.0547, ...], ], ] sum = 387108.531250 ggml_debug: ffn_moe_gate-9 = (f32) MUL_MAT_ID(blk.9.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2355, 3.7645, 7.7645, ...], [43007.7656, 43011.7656, 43015.7656, ...], [86015.7656, 86019.7656, 86023.7656, ...], ], ] sum = 387105.875000 ggml_debug: ffn_moe_silu-9 = (f32) UNARY(ffn_moe_gate-9{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1040, 3.8960, 7.8960, ...], [43007.8945, 43011.8945, 43015.8945, ...], [86015.8984, 86019.8984, 86023.8984, ...], ], ] sum = 387107.093750 ggml_debug: ffn_moe_gate_par-9 = (f32) MUL(ffn_moe_up-9{10752, 3, 1, 1}, ffn_moe_silu-9{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0060, 3.9940, 7.9940, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-9 = (f32) MUL_MAT_ID(blk.9.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-9{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0543, 4.0543, 8.0543, ...], [24576.0547, 24580.0547, 24584.0547, ...], [49152.0547, 49156.0547, 49160.0547, ...], ], ] sum = 221220.500000 ggml_debug: ffn_moe_weights_norm-9 (view) = (f32) VIEW(ffn_moe_weights_norm-9{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2337], [ 16.2337], [ 32.2337], ], ] sum = 48.701199 ggml_debug: ffn_moe_weighted-9 = (f32) MUL(ffn_moe_down-9{6144, 3, 1, 1}, ffn_moe_weights_norm-9 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0127, 4.0127, 8.0127, ...], [24576.0137, 24580.0137, 24584.0137, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.125000 ggml_debug: ffn_moe_out-9 = (f32) ADD(ffn_moe_out-9{6144, 3, 1, 1}, ffn_moe_weighted-9{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0189, 4.0189, 8.0189, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: ffn_moe_up-9 = (f32) MUL_MAT_ID(blk.9.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0886, 4.0886, 8.0886, ...], [43008.0898, 43012.0898, 43016.0898, ...], [86016.0859, 86020.0859, 86024.0859, ...], ], ] sum = 387108.812500 ggml_debug: ffn_moe_gate-9 = (f32) MUL_MAT_ID(blk.9.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-9{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0276, 4.0276, 8.0276, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_silu-9 = (f32) UNARY(ffn_moe_gate-9{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0140, 4.0140, 8.0140, ...], [43008.0156, 43012.0156, 43016.0156, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_gate_par-9 = (f32) MUL(ffn_moe_up-9{10752, 3, 1, 1}, ffn_moe_silu-9{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0012, 4.0012, 8.0012, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-9 = (f32) MUL_MAT_ID(blk.9.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-9{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0322, 3.9678, 7.9678, ...], [24575.9668, 24579.9668, 24583.9668, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], ] sum = 221219.718750 ggml_debug: ffn_moe_weights_norm-9 (view) = (f32) VIEW(ffn_moe_weights_norm-9{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2286], [ 16.2286], [ 32.2286], ], ] sum = 48.685791 ggml_debug: ffn_moe_weighted-9 = (f32) MUL(ffn_moe_down-9{6144, 3, 1, 1}, ffn_moe_weights_norm-9 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0074, 3.9926, 7.9926, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_out-9 = (f32) ADD(ffn_moe_out-9{6144, 3, 1, 1}, ffn_moe_weighted-9{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0115, 4.0115, 8.0115, ...], [24576.0117, 24580.0117, 24584.0117, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.109375 ggml_debug: ffn_inp-9 = (f32) ADD(kqv_out-9{6144, 3, 1, 1}, l_out-8{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0848, 3.9152, 7.9152, ...], [24575.9160, 24579.9160, 24583.9160, ...], [49151.9141, 49155.9141, 49159.9141, ...], ], ] sum = 221219.218750 ggml_debug: l_out-9 = (f32) ADD(ffn_moe_out-9{6144, 3, 1, 1}, ffn_inp-9{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0733, 3.9267, 7.9267, ...], [24575.9258, 24579.9258, 24583.9258, ...], [49151.9258, 49155.9258, 49159.9258, ...], ], ] sum = 221219.328125 ggml_debug: norm-10 = (f32) NORM(l_out-9{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.3335, 3.6665, 7.6665, ...], [24575.6660, 24579.6660, 24583.6660, ...], [49151.6680, 49155.6680, 49159.6680, ...], ], ] sum = 221217.015625 ggml_debug: attn_norm-10 = (f32) MUL(norm-10{6144, 3, 1, 1}, blk.10.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0573, 3.9427, 7.9427, ...], [24575.9434, 24579.9434, 24583.9434, ...], [49151.9414, 49155.9414, 49159.9414, ...], ], ] sum = 221219.468750 ggml_debug: wqkv-10 = (f32) MUL_MAT(blk.10.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-10{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.7363, 3.2637, 7.2637, ...], [32767.2637, 32771.2656, 32775.2656, ...], [65535.2656, 65539.2656, 65543.2656, ...], ], ] sum = 294941.375000 ggml_debug: wqkv_clamped-10 = (f32) CLAMP(wqkv-10{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.7363, 3.2637, 7.2637, ...], [32767.2637, 32771.2656, 32775.2656, ...], [65535.2656, 65539.2656, 65543.2656, ...], ], ] sum = 294941.375000 ggml_debug: wqkv_clamped-10 (view) = (f32) VIEW(wqkv_clamped-10{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.7363, 3.2637, 7.2637, ...], [32767.2637, 32771.2656, 32775.2656, ...], [65535.2656, 65539.2656, 65543.2656, ...], ], ] sum = 294941.375000 ggml_debug: Qcur-10 = (f32) CONT(wqkv_clamped-10 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.7363, 3.2637, 7.2637, ...], [24575.2637, 24579.2637, 24583.2637, ...], [49151.2656, 49155.2656, 49159.2656, ...], ], ] sum = 221213.390625 ggml_debug: Qcur-10 (reshaped) = (f32) RESHAPE(Qcur-10{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.7363, 3.2637, 7.2637, ...], [511.2637, 515.2637, 519.2637, ...], [1023.2637, 1027.2637, 1031.2637, ...], ... ], [ [24575.2637, 24579.2637, 24583.2637, ...], [25087.2637, 25091.2637, 25095.2637, ...], [25599.2637, 25603.2637, 25607.2637, ...], ... ], [ [49151.2656, 49155.2656, 49159.2656, ...], [49663.2656, 49667.2656, 49671.2656, ...], [50175.2656, 50179.2656, 50183.2656, ...], ... ], ] sum = 677464.000000 ggml_debug: Qcur-10 = (f32) ROPE(Qcur-10 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.7363, 3.2637, 7.2637, ...], [511.2637, 515.2637, 519.2637, ...], [1023.2637, 1027.2637, 1031.2637, ...], ... ], [ [24575.2637, 24579.2637, 24583.2637, ...], [25087.2637, 25091.2637, 25095.2637, ...], [25599.2637, 25603.2637, 25607.2637, ...], ... ], [ [49151.2656, 49155.2656, 49159.2656, ...], [49663.2656, 49667.2656, 49671.2656, ...], [50175.2656, 50179.2656, 50183.2656, ...], ... ], ] sum = 677464.000000 ggml_debug: wqkv_clamped-10 (view) = (f32) VIEW(wqkv_clamped-10{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0574, 3.9426, 7.9426, ...], [32767.9434, 32771.9414, 32775.9414, ...], [65535.9414, 65539.9453, 65543.9453, ...], ], ] sum = 294947.468750 ggml_debug: Kcur-10 = (f32) CONT(wqkv_clamped-10 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0574, 3.9426, 7.9426, ...], [4095.9426, 4099.9429, 4103.9429, ...], [8191.9429, 8195.9424, 8199.9424, ...], ], ] sum = 36899.484375 ggml_debug: Kcur-10 (reshaped) = (f32) RESHAPE(Kcur-10{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -0.0574, 3.9426, 7.9426, ...], [511.9426, 515.9426, 519.9426, ...], [1023.9426, 1027.9426, 1031.9426, ...], ... ], [ [4095.9426, 4099.9429, 4103.9429, ...], [4607.9429, 4611.9429, 4615.9429, ...], [5119.9429, 5123.9429, 5127.9429, ...], ... ], [ [8191.9429, 8195.9424, 8199.9424, ...], [8703.9424, 8707.9424, 8711.9424, ...], [9215.9424, 9219.9424, 9223.9424, ...], ... ], ] sum = 124522.468750 ggml_debug: Kcur-10 = (f32) ROPE(Kcur-10 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -0.0574, 3.9426, 7.9426, ...], [511.9426, 515.9426, 519.9426, ...], [1023.9426, 1027.9426, 1031.9426, ...], ... ], [ [4095.9426, 4099.9429, 4103.9429, ...], [4607.9429, 4611.9429, 4615.9429, ...], [5119.9429, 5123.9429, 5127.9429, ...], ... ], [ [8191.9429, 8195.9424, 8199.9424, ...], [8703.9424, 8707.9424, 8711.9424, ...], [9215.9424, 9219.9424, 9223.9424, ...], ... ], ] sum = 124522.468750 ggml_debug: wqkv_clamped-10 (view) = (f32) VIEW(wqkv_clamped-10{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0800, 4.0800, 8.0800, ...], [32768.0781, 32772.0781, 32776.0781, ...], [65536.0781, 65540.0781, 65544.0781, ...], ], ] sum = 294948.687500 ggml_debug: Vcur-10 = (f32) CONT(wqkv_clamped-10 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0800, 4.0800, 8.0800, ...], [4096.0801, 4100.0801, 4104.0801, ...], [8192.0801, 8196.0801, 8200.0801, ...], ], ] sum = 36900.718750 ggml_debug: k_cache_view-10 = (f16) VIEW(cache_k_l10{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-10 (copy of Kcur-10) = (f16) CPY(Kcur-10{128, 8, 3, 1}, k_cache_view-10{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -0.0574, -0.0574, -0.0575, ...], ], ] sum = -0.172302 ggml_debug: v_cur_t-10 = (f32) TRANSPOSE(Vcur-10{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0800, 4096.0801, 8192.0801], [ 4.0800, 4100.0801, 8196.0801], [ 8.0800, 4104.0801, 8200.0801], ... ], ] sum = 36900.718750 ggml_debug: v_cache_view-10 = (f16) VIEW(cache_v_l10{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-10 (copy of v_cur_t-10) = (f16) CPY(v_cur_t-10{3, 1024, 1, 1}, v_cache_view-10{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0800, 0.0801, 0.0802], [ 0.1599, 0.1602, 0.1604], [ 0.3198, 0.3203, 0.3208], ... ], ] sum = 1.681641 ggml_debug: v-10 = (f16) VIEW(cache_v_l10{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0800, 0.0801, 0.0802, ...], [ 0.1599, 0.1602, 0.1604, ...], [ 0.3198, 0.3203, 0.3208, ...], ... ], [ [ 0.0800, 0.0801, 0.0802, ...], [ 0.1599, 0.1602, 0.1604, ...], [ 0.3198, 0.3203, 0.3208, ...], ... ], [ [ 0.0800, 0.0801, 0.0802, ...], [ 0.1599, 0.1602, 0.1604, ...], [ 0.3198, 0.3203, 0.3208, ...], ... ], ... ] sum = 5.044922 ggml_debug: k-10 = (f16) VIEW(cache_k_l10{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -0.0574, -0.0574, -0.0575, ...], [ -0.2295, -0.2297, -0.2300, ...], [ -0.9180, -0.9189, -0.9199, ...], ... ], [ [ -0.0679, -0.0680, -0.0681, ...], [ -0.2715, -0.2720, -0.2725, ...], [ -1.0859, -1.0879, -1.0898, ...], ... ], [ [ -0.0835, -0.0836, -0.0837, ...], [ -0.3340, -0.3345, -0.3350, ...], [ -1.3359, -1.3379, -1.3398, ...], ... ], ... ] sum = -13.169861 ggml_debug: q-10 = (f32) PERMUTE(Qcur-10{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.7363, 3.2637, 7.2637, ...], [24575.2637, 24579.2637, 24583.2637, ...], [49151.2656, 49155.2656, 49159.2656, ...], ], [ [511.2637, 515.2637, 519.2637, ...], [25087.2637, 25091.2637, 25095.2637, ...], [49663.2656, 49667.2656, 49671.2656, ...], ], [ [1023.2637, 1027.2637, 1031.2637, ...], [25599.2637, 25603.2637, 25607.2637, ...], [50175.2656, 50179.2656, 50183.2656, ...], ], ... ] sum = 677464.000000 ggml_debug: kq-10 = (f32) MUL_MAT(k-10{128, 32, 8, 1}, q-10{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 34.8125, 38.8125, 42.8125, ...], [162.8125, 166.8125, 170.8125, ...], [290.8125, 294.8125, 298.8125, ...], ], [ [418.8125, 422.8125, 426.8125, ...], [546.8125, 550.8125, 554.8125, ...], [674.8125, 678.8125, 682.8125, ...], ], [ [802.8125, 806.8125, 810.8125, ...], [930.8125, 934.8125, 938.8125, ...], [1058.8125, 1062.8125, 1066.8125, ...], ], ... ] sum = 14871.937500 ggml_debug: kq_soft_max_ext-10 = (f32) SOFT_MAX(kq-10{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-10 = (f32) MUL_MAT(v-10{32, 128, 8, 1}, kq_soft_max_ext-10{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0800, 4.0800, 8.0800, ...], [512.0800, 516.0800, 520.0800, ...], [1024.0800, 1028.0800, 1032.0800, ...], ], [ [1536.0800, 1540.0800, 1544.0800, ...], [2048.0801, 2052.0801, 2056.0801, ...], [2560.0801, 2564.0801, 2568.0801, ...], ], [ [3072.0801, 3076.0801, 3080.0801, ...], [3584.0801, 3588.0801, 3592.0801, ...], [4096.0801, 4100.0801, 4104.0801, ...], ], ... ] sum = 55406.156250 ggml_debug: kqv_merged-10 = (f32) PERMUTE(kqv-10{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0800, 4.0800, 8.0800, ...], [1536.0800, 1540.0800, 1544.0800, ...], [3072.0801, 3076.0801, 3080.0801, ...], ... ], [ [512.0800, 516.0800, 520.0800, ...], [2048.0801, 2052.0801, 2056.0801, ...], [3584.0801, 3588.0801, 3592.0801, ...], ... ], [ [1024.0800, 1028.0800, 1032.0800, ...], [2560.0801, 2564.0801, 2568.0801, ...], [4096.0801, 4100.0801, 4104.0801, ...], ... ], ] sum = 55406.148438 ggml_debug: kqv_merged_cont-10 = (f32) CONT(kqv_merged-10{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0800, 4.0800, 8.0800, ...], [24576.0801, 24580.0801, 24584.0801, ...], [49152.0781, 49156.0781, 49160.0781, ...], ], ] sum = 221220.703125 ggml_debug: kqv_out-10 = (f32) MUL_MAT(blk.10.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-10{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0151, 4.0151, 8.0151, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: norm-10 = (f32) NORM(kqv_out-10{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1463, 4.1463, 8.1463, ...], [24576.1465, 24580.1465, 24584.1465, ...], [49152.1445, 49156.1445, 49160.1445, ...], ], ] sum = 221221.312500 ggml_debug: attn_out_norm-10 = (f32) MUL(norm-10{6144, 3, 1, 1}, blk.10.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0383, 4.0383, 8.0383, ...], [24576.0391, 24580.0391, 24584.0391, ...], [49152.0391, 49156.0391, 49160.0391, ...], ], ] sum = 221220.343750 ggml_debug: ffn_moe_logits-10 = (f32) MUL_MAT(blk.10.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.2749, 3.7251, 7.7251, ...], [ 63.7251, 67.7251, 71.7251, ...], [127.7251, 131.7251, 135.7251, ...], ], ] sum = 609.525879 ggml_debug: ffn_moe_probs-10 = (f32) SOFT_MAX(ffn_moe_logits-10{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0477, 4.0477, 8.0477, ...], [ 64.0477, 68.0477, 72.0477, ...], [128.0477, 132.0477, 136.0477, ...], ], ] sum = 612.429260 ggml_debug: ffn_moe_argsort-10 = (i32) ARGSORT(ffn_moe_probs-10{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 6.0000, 10.0000, 14.0000, ...], [ 70.0000, 74.0000, 78.0000, ...], [134.0000, 138.0000, 142.0000, ...], ], ] sum = 666.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-10{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 6.0000, 10.0000, 14.0000, ...], [ 70.0000, 74.0000, 78.0000, ...], [134.0000, 138.0000, 142.0000, ...], ], ] sum = 666.000000 ggml_debug: ffn_moe_up-10 = (f32) MUL_MAT_ID(blk.10.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0425, 3.9575, 7.9575, ...], [43007.9570, 43011.9570, 43015.9570, ...], [86015.9609, 86019.9609, 86023.9609, ...], ], ] sum = 387107.625000 ggml_debug: ffn_moe_gate-10 = (f32) MUL_MAT_ID(blk.10.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0079, 4.0079, 8.0079, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_silu-10 = (f32) UNARY(ffn_moe_gate-10{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0040, 4.0040, 8.0040, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.031250 ggml_debug: ffn_moe_gate_par-10 = (f32) MUL(ffn_moe_up-10{10752, 3, 1, 1}, ffn_moe_silu-10{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0002, 3.9998, 7.9998, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-10 = (f32) MUL_MAT_ID(blk.10.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-10{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0120, 3.9880, 7.9880, ...], [24575.9883, 24579.9883, 24583.9883, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_probs-10 (reshaped) = (f32) RESHAPE(ffn_moe_probs-10{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0477], [ 4.0477], [ 8.0477], ... ], [ [ 64.0477], [ 68.0477], [ 72.0477], ... ], [ [128.0477], [132.0477], [136.0477], ... ], ] sum = 612.429260 ggml_debug: ffn_moe_weights-10 = (f32) GET_ROWS(ffn_moe_probs-10 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0800], [ 4.0800], [ 8.0800], ... ], [ [ 16.0800], [ 20.0800], [ 24.0800], ... ], [ [ 32.0800], [ 36.0800], [ 40.0800], ... ], ] sum = 180.720093 ggml_debug: ffn_moe_weights-10 (reshaped) = (f32) RESHAPE(ffn_moe_weights-10{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0800, 4.0800, 8.0800, ...], [ 16.0800, 20.0800, 24.0800, ...], [ 32.0800, 36.0800, 40.0800, ...], ], ] sum = 180.720093 ggml_debug: ffn_moe_weights_sum-10 = (f32) SUM_ROWS(ffn_moe_weights-10 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3032], [ 4.3032], [ 8.3032], ], ] sum = 12.909569 ggml_debug: ffn_moe_weights_norm-10 = (f32) DIV(ffn_moe_weights-10 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-10{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2639, 4.2639, 8.2639, ...], [ 16.2639, 20.2639, 24.2639, ...], [ 32.2639, 36.2639, 40.2639, ...], ], ] sum = 182.374985 ggml_debug: ffn_moe_weights_norm-10 (view) = (f32) VIEW(ffn_moe_weights_norm-10{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2639], [ 16.2639], [ 32.2639], ], ] sum = 48.791664 ggml_debug: ffn_moe_weighted-10 = (f32) MUL(ffn_moe_down-10{6144, 3, 1, 1}, ffn_moe_weights_norm-10 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0032, 3.9968, 7.9968, ...], [24575.9961, 24579.9961, 24583.9961, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.984375 ggml_debug: ffn_moe_up-10 = (f32) MUL_MAT_ID(blk.10.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0565, 3.9435, 7.9435, ...], [43007.9453, 43011.9453, 43015.9453, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.500000 ggml_debug: ffn_moe_gate-10 = (f32) MUL_MAT_ID(blk.10.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3360, 3.6640, 7.6640, ...], [43007.6641, 43011.6641, 43015.6641, ...], [86015.6641, 86019.6641, 86023.6641, ...], ], ] sum = 387104.968750 ggml_debug: ffn_moe_silu-10 = (f32) UNARY(ffn_moe_gate-10{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1400, 3.8600, 7.8600, ...], [43007.8594, 43011.8594, 43015.8594, ...], [86015.8594, 86019.8594, 86023.8594, ...], ], ] sum = 387106.750000 ggml_debug: ffn_moe_gate_par-10 = (f32) MUL(ffn_moe_up-10{10752, 3, 1, 1}, ffn_moe_silu-10{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0079, 4.0079, 8.0079, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_down-10 = (f32) MUL_MAT_ID(blk.10.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-10{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0675, 4.0675, 8.0675, ...], [24576.0684, 24580.0684, 24584.0684, ...], [49152.0664, 49156.0664, 49160.0664, ...], ], ] sum = 221220.593750 ggml_debug: ffn_moe_weights_norm-10 (view) = (f32) VIEW(ffn_moe_weights_norm-10{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2499], [ 16.2499], [ 32.2499], ], ] sum = 48.749622 ggml_debug: ffn_moe_weighted-10 = (f32) MUL(ffn_moe_down-10{6144, 3, 1, 1}, ffn_moe_weights_norm-10 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0169, 4.0169, 8.0169, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_out-10 = (f32) ADD(ffn_moe_weighted-10{6144, 3, 1, 1}, ffn_moe_weighted-10{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0137, 4.0137, 8.0137, ...], [24576.0137, 24580.0137, 24584.0137, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_up-10 = (f32) MUL_MAT_ID(blk.10.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5129, 3.4871, 7.4871, ...], [43007.4883, 43011.4883, 43015.4883, ...], [86015.4844, 86019.4844, 86023.4844, ...], ], ] sum = 387103.375000 ggml_debug: ffn_moe_gate-10 = (f32) MUL_MAT_ID(blk.10.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4490, 3.5510, 7.5510, ...], [43007.5508, 43011.5508, 43015.5508, ...], [86015.5547, 86019.5547, 86023.5547, ...], ], ] sum = 387103.968750 ggml_debug: ffn_moe_silu-10 = (f32) UNARY(ffn_moe_gate-10{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1749, 3.8251, 7.8251, ...], [43007.8242, 43011.8242, 43015.8242, ...], [86015.8281, 86019.8281, 86023.8281, ...], ], ] sum = 387106.437500 ggml_debug: ffn_moe_gate_par-10 = (f32) MUL(ffn_moe_up-10{10752, 3, 1, 1}, ffn_moe_silu-10{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0897, 4.0897, 8.0897, ...], [43008.0898, 43012.0898, 43016.0898, ...], [86016.0859, 86020.0859, 86024.0859, ...], ], ] sum = 387108.812500 ggml_debug: ffn_moe_down-10 = (f32) MUL_MAT_ID(blk.10.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-10{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0095, 4.0095, 8.0095, ...], [24576.0098, 24580.0098, 24584.0098, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_weights_norm-10 (view) = (f32) VIEW(ffn_moe_weights_norm-10{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2491], [ 16.2491], [ 32.2491], ], ] sum = 48.747337 ggml_debug: ffn_moe_weighted-10 = (f32) MUL(ffn_moe_down-10{6144, 3, 1, 1}, ffn_moe_weights_norm-10 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0024, 4.0024, 8.0024, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_out-10 = (f32) ADD(ffn_moe_out-10{6144, 3, 1, 1}, ffn_moe_weighted-10{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0161, 4.0161, 8.0161, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_up-10 = (f32) MUL_MAT_ID(blk.10.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2558, 3.7442, 7.7442, ...], [43007.7461, 43011.7461, 43015.7461, ...], [86015.7422, 86019.7422, 86023.7422, ...], ], ] sum = 387105.718750 ggml_debug: ffn_moe_gate-10 = (f32) MUL_MAT_ID(blk.10.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-10{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4147, 3.5853, 7.5853, ...], [43007.5859, 43011.5859, 43015.5859, ...], [86015.5859, 86019.5859, 86023.5859, ...], ], ] sum = 387104.281250 ggml_debug: ffn_moe_silu-10 = (f32) UNARY(ffn_moe_gate-10{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1650, 3.8350, 7.8350, ...], [43007.8359, 43011.8359, 43015.8359, ...], [86015.8359, 86019.8359, 86023.8359, ...], ], ] sum = 387106.531250 ggml_debug: ffn_moe_gate_par-10 = (f32) MUL(ffn_moe_up-10{10752, 3, 1, 1}, ffn_moe_silu-10{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0422, 4.0422, 8.0422, ...], [43008.0430, 43012.0430, 43016.0430, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.343750 ggml_debug: ffn_moe_down-10 = (f32) MUL_MAT_ID(blk.10.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-10{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0641, 3.9359, 7.9359, ...], [24575.9355, 24579.9355, 24583.9355, ...], [49151.9375, 49155.9375, 49159.9375, ...], ], ] sum = 221219.437500 ggml_debug: ffn_moe_weights_norm-10 (view) = (f32) VIEW(ffn_moe_weights_norm-10{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2371], [ 16.2371], [ 32.2371], ], ] sum = 48.711376 ggml_debug: ffn_moe_weighted-10 = (f32) MUL(ffn_moe_down-10{6144, 3, 1, 1}, ffn_moe_weights_norm-10 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0152, 3.9848, 7.9848, ...], [24575.9844, 24579.9844, 24583.9844, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_moe_out-10 = (f32) ADD(ffn_moe_out-10{6144, 3, 1, 1}, ffn_moe_weighted-10{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0009, 4.0009, 8.0009, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_inp-10 = (f32) ADD(kqv_out-10{6144, 3, 1, 1}, l_out-9{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0582, 3.9418, 7.9418, ...], [24575.9414, 24579.9414, 24583.9414, ...], [49151.9414, 49155.9414, 49159.9414, ...], ], ] sum = 221219.468750 ggml_debug: l_out-10 = (f32) ADD(ffn_moe_out-10{6144, 3, 1, 1}, ffn_inp-10{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0573, 3.9427, 7.9427, ...], [24575.9434, 24579.9434, 24583.9434, ...], [49151.9414, 49155.9414, 49159.9414, ...], ], ] sum = 221219.468750 ggml_debug: norm-11 = (f32) NORM(l_out-10{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2172, 3.7828, 7.7828, ...], [24575.7832, 24579.7832, 24583.7832, ...], [49151.7812, 49155.7812, 49159.7812, ...], ], ] sum = 221218.031250 ggml_debug: attn_norm-11 = (f32) MUL(norm-11{6144, 3, 1, 1}, blk.11.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0426, 3.9574, 7.9574, ...], [24575.9570, 24579.9570, 24583.9570, ...], [49151.9570, 49155.9570, 49159.9570, ...], ], ] sum = 221219.609375 ggml_debug: wqkv-11 = (f32) MUL_MAT(blk.11.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-11{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.9729, 4.9729, 8.9729, ...], [32768.9727, 32772.9727, 32776.9727, ...], [65536.9766, 65540.9766, 65544.9766, ...], ], ] sum = 294956.750000 ggml_debug: wqkv_clamped-11 = (f32) CLAMP(wqkv-11{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.9729, 4.9729, 8.9729, ...], [32768.9727, 32772.9727, 32776.9727, ...], [65536.9766, 65540.9766, 65544.9766, ...], ], ] sum = 294956.750000 ggml_debug: wqkv_clamped-11 (view) = (f32) VIEW(wqkv_clamped-11{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.9729, 4.9729, 8.9729, ...], [32768.9727, 32772.9727, 32776.9727, ...], [65536.9766, 65540.9766, 65544.9766, ...], ], ] sum = 294956.750000 ggml_debug: Qcur-11 = (f32) CONT(wqkv_clamped-11 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.9729, 4.9729, 8.9729, ...], [24576.9727, 24580.9727, 24584.9727, ...], [49152.9727, 49156.9727, 49160.9727, ...], ], ] sum = 221228.750000 ggml_debug: Qcur-11 (reshaped) = (f32) RESHAPE(Qcur-11{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.9729, 4.9729, 8.9729, ...], [512.9729, 516.9729, 520.9729, ...], [1024.9729, 1028.9729, 1032.9729, ...], ... ], [ [24576.9727, 24580.9727, 24584.9727, ...], [25088.9727, 25092.9727, 25096.9727, ...], [25600.9727, 25604.9727, 25608.9727, ...], ... ], [ [49152.9727, 49156.9727, 49160.9727, ...], [49664.9727, 49668.9727, 49672.9727, ...], [50176.9727, 50180.9727, 50184.9727, ...], ... ], ] sum = 677510.312500 ggml_debug: Qcur-11 = (f32) ROPE(Qcur-11 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.9729, 4.9729, 8.9729, ...], [512.9729, 516.9729, 520.9729, ...], [1024.9729, 1028.9729, 1032.9729, ...], ... ], [ [24576.9727, 24580.9727, 24584.9727, ...], [25088.9727, 25092.9727, 25096.9727, ...], [25600.9727, 25604.9727, 25608.9727, ...], ... ], [ [49152.9727, 49156.9727, 49160.9727, ...], [49664.9727, 49668.9727, 49672.9727, ...], [50176.9727, 50180.9727, 50184.9727, ...], ... ], ] sum = 677510.312500 ggml_debug: wqkv_clamped-11 (view) = (f32) VIEW(wqkv_clamped-11{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.7049, 4.7049, 8.7049, ...], [32768.7031, 32772.7031, 32776.7031, ...], [65536.7031, 65540.7031, 65544.7031, ...], ], ] sum = 294954.312500 ggml_debug: Kcur-11 = (f32) CONT(wqkv_clamped-11 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.7049, 4.7049, 8.7049, ...], [4096.7051, 4100.7051, 4104.7051, ...], [8192.7051, 8196.7051, 8200.7051, ...], ], ] sum = 36906.343750 ggml_debug: Kcur-11 (reshaped) = (f32) RESHAPE(Kcur-11{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.7049, 4.7049, 8.7049, ...], [512.7049, 516.7049, 520.7049, ...], [1024.7050, 1028.7050, 1032.7050, ...], ... ], [ [4096.7051, 4100.7051, 4104.7051, ...], [4608.7051, 4612.7051, 4616.7051, ...], [5120.7051, 5124.7051, 5128.7051, ...], ... ], [ [8192.7051, 8196.7051, 8200.7051, ...], [8704.7051, 8708.7051, 8712.7051, ...], [9216.7051, 9220.7051, 9224.7051, ...], ... ], ] sum = 124543.015625 ggml_debug: Kcur-11 = (f32) ROPE(Kcur-11 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.7049, 4.7049, 8.7049, ...], [512.7049, 516.7049, 520.7049, ...], [1024.7050, 1028.7050, 1032.7050, ...], ... ], [ [4096.7051, 4100.7051, 4104.7051, ...], [4608.7051, 4612.7051, 4616.7051, ...], [5120.7051, 5124.7051, 5128.7051, ...], ... ], [ [8192.7051, 8196.7051, 8200.7051, ...], [8704.7051, 8708.7051, 8712.7051, ...], [9216.7051, 9220.7051, 9224.7051, ...], ... ], ] sum = 124543.015625 ggml_debug: wqkv_clamped-11 (view) = (f32) VIEW(wqkv_clamped-11{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0202, 4.0202, 8.0202, ...], [32768.0195, 32772.0195, 32776.0195, ...], [65536.0234, 65540.0234, 65544.0234, ...], ], ] sum = 294948.187500 ggml_debug: Vcur-11 = (f32) CONT(wqkv_clamped-11 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0202, 4.0202, 8.0202, ...], [4096.0200, 4100.0200, 4104.0200, ...], [8192.0205, 8196.0205, 8200.0205, ...], ], ] sum = 36900.179688 ggml_debug: k_cache_view-11 = (f16) VIEW(cache_k_l11{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-11 (copy of Kcur-11) = (f16) CPY(Kcur-11{128, 8, 3, 1}, k_cache_view-11{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.7051, 0.7061, 0.7070, ...], ], ] sum = 2.118164 ggml_debug: v_cur_t-11 = (f32) TRANSPOSE(Vcur-11{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0202, 4096.0200, 8192.0205], [ 4.0202, 4100.0200, 8196.0205], [ 8.0202, 4104.0200, 8200.0205], ... ], ] sum = 36900.179688 ggml_debug: v_cache_view-11 = (f16) VIEW(cache_v_l11{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-11 (copy of v_cur_t-11) = (f16) CPY(v_cur_t-11{3, 1024, 1, 1}, v_cache_view-11{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0202, 0.0202, 0.0202], [ 0.0403, 0.0404, 0.0404], [ 0.0806, 0.0807, 0.0809], ... ], ] sum = 0.423935 ggml_debug: v-11 = (f16) VIEW(cache_v_l11{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0202, 0.0202, 0.0202, ...], [ 0.0403, 0.0404, 0.0404, ...], [ 0.0806, 0.0807, 0.0809, ...], ... ], [ [ 0.0202, 0.0202, 0.0202, ...], [ 0.0403, 0.0404, 0.0404, ...], [ 0.0806, 0.0807, 0.0809, ...], ... ], [ [ 0.0202, 0.0202, 0.0202, ...], [ 0.0403, 0.0404, 0.0404, ...], [ 0.0806, 0.0807, 0.0809, ...], ... ], ... ] sum = 1.271805 ggml_debug: k-11 = (f16) VIEW(cache_k_l11{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.7051, 0.7061, 0.7070, ...], [ 2.8203, 2.8242, 2.8281, ...], [ 11.2812, 11.2969, 11.3125, ...], ... ], [ [ 0.8301, 0.8311, 0.8320, ...], [ 3.3203, 3.3242, 3.3281, ...], [ 13.2812, 13.2969, 13.3125, ...], ... ], [ [ 0.9551, 0.9561, 0.9570, ...], [ 3.8203, 3.8242, 3.8281, ...], [ 15.2812, 15.2969, 15.3125, ...], ... ], ... ] sum = 157.069336 ggml_debug: q-11 = (f32) PERMUTE(Qcur-11{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.9729, 4.9729, 8.9729, ...], [24576.9727, 24580.9727, 24584.9727, ...], [49152.9727, 49156.9727, 49160.9727, ...], ], [ [512.9729, 516.9729, 520.9729, ...], [25088.9727, 25092.9727, 25096.9727, ...], [49664.9727, 49668.9727, 49672.9727, ...], ], [ [1024.9729, 1028.9729, 1032.9729, ...], [25600.9727, 25604.9727, 25608.9727, ...], [50176.9727, 50180.9727, 50184.9727, ...], ], ... ] sum = 677510.312500 ggml_debug: kq-11 = (f32) MUL_MAT(k-11{128, 32, 8, 1}, q-11{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 13.1953, 17.1953, 21.1953, ...], [141.1953, 145.1953, 149.1953, ...], [269.1953, 273.1953, 277.1953, ...], ], [ [397.1953, 401.1953, 405.1953, ...], [525.1953, 529.1953, 533.1953, ...], [653.1953, 657.1953, 661.1953, ...], ], [ [781.1953, 785.1953, 789.1953, ...], [909.1953, 913.1953, 917.1953, ...], [1037.1953, 1041.1953, 1045.1953, ...], ], ... ] sum = 14288.273438 ggml_debug: kq_soft_max_ext-11 = (f32) SOFT_MAX(kq-11{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-11 = (f32) MUL_MAT(v-11{32, 128, 8, 1}, kq_soft_max_ext-11{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0202, 4.0202, 8.0202, ...], [512.0201, 516.0201, 520.0201, ...], [1024.0201, 1028.0201, 1032.0201, ...], ], [ [1536.0201, 1540.0201, 1544.0201, ...], [2048.0203, 2052.0203, 2056.0203, ...], [2560.0203, 2564.0203, 2568.0203, ...], ], [ [3072.0203, 3076.0203, 3080.0203, ...], [3584.0203, 3588.0203, 3592.0203, ...], [4096.0200, 4100.0200, 4104.0200, ...], ], ... ] sum = 55404.539062 ggml_debug: kqv_merged-11 = (f32) PERMUTE(kqv-11{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0202, 4.0202, 8.0202, ...], [1536.0201, 1540.0201, 1544.0201, ...], [3072.0203, 3076.0203, 3080.0203, ...], ... ], [ [512.0201, 516.0201, 520.0201, ...], [2048.0203, 2052.0203, 2056.0203, ...], [3584.0203, 3588.0203, 3592.0203, ...], ... ], [ [1024.0201, 1028.0201, 1032.0201, ...], [2560.0203, 2564.0203, 2568.0203, ...], [4096.0200, 4100.0200, 4104.0200, ...], ... ], ] sum = 55404.539062 ggml_debug: kqv_merged_cont-11 = (f32) CONT(kqv_merged-11{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0202, 4.0202, 8.0202, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: kqv_out-11 = (f32) MUL_MAT(blk.11.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-11{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1041, 4.1041, 8.1041, ...], [24576.1035, 24580.1035, 24584.1035, ...], [49152.1055, 49156.1055, 49160.1055, ...], ], ] sum = 221220.953125 ggml_debug: norm-11 = (f32) NORM(kqv_out-11{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.8716, 4.8716, 8.8716, ...], [24576.8711, 24580.8711, 24584.8711, ...], [49152.8711, 49156.8711, 49160.8711, ...], ], ] sum = 221227.859375 ggml_debug: attn_out_norm-11 = (f32) MUL(norm-11{6144, 3, 1, 1}, blk.11.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2502, 4.2502, 8.2502, ...], [24576.2500, 24580.2500, 24584.2500, ...], [49152.2500, 49156.2500, 49160.2500, ...], ], ] sum = 221222.250000 ggml_debug: ffn_moe_logits-11 = (f32) MUL_MAT(blk.11.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.0781, 4.0781, 8.0781, ...], [ 64.0781, 68.0781, 72.0781, ...], [128.0781, 132.0781, 136.0781, ...], ], ] sum = 612.703125 ggml_debug: ffn_moe_probs-11 = (f32) SOFT_MAX(ffn_moe_logits-11{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0607, 4.0607, 8.0607, ...], [ 64.0607, 68.0607, 72.0607, ...], [128.0607, 132.0607, 136.0607, ...], ], ] sum = 612.546265 ggml_debug: ffn_moe_argsort-11 = (i32) ARGSORT(ffn_moe_probs-11{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 9.0000, 13.0000, 17.0000, ...], [ 73.0000, 77.0000, 81.0000, ...], [137.0000, 141.0000, 145.0000, ...], ], ] sum = 693.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-11{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 9.0000, 13.0000, 17.0000, ...], [ 73.0000, 77.0000, 81.0000, ...], [137.0000, 141.0000, 145.0000, ...], ], ] sum = 693.000000 ggml_debug: ffn_moe_up-11 = (f32) MUL_MAT_ID(blk.11.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2893, 3.7107, 7.7107, ...], [43007.7109, 43011.7109, 43015.7109, ...], [86015.7109, 86019.7109, 86023.7109, ...], ], ] sum = 387105.406250 ggml_debug: ffn_moe_gate-11 = (f32) MUL_MAT_ID(blk.11.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3244, 4.3244, 8.3244, ...], [43008.3242, 43012.3242, 43016.3242, ...], [86016.3281, 86020.3281, 86024.3281, ...], ], ] sum = 387110.937500 ggml_debug: ffn_moe_silu-11 = (f32) UNARY(ffn_moe_gate-11{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1883, 4.1883, 8.1883, ...], [43008.1875, 43012.1875, 43016.1875, ...], [86016.1875, 86020.1875, 86024.1875, ...], ], ] sum = 387109.687500 ggml_debug: ffn_moe_gate_par-11 = (f32) MUL(ffn_moe_up-11{10752, 3, 1, 1}, ffn_moe_silu-11{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0545, 3.9455, 7.9455, ...], [43007.9453, 43011.9453, 43015.9453, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.500000 ggml_debug: ffn_moe_down-11 = (f32) MUL_MAT_ID(blk.11.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-11{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0860, 4.0860, 8.0860, ...], [24576.0859, 24580.0859, 24584.0859, ...], [49152.0859, 49156.0859, 49160.0859, ...], ], ] sum = 221220.781250 ggml_debug: ffn_moe_probs-11 (reshaped) = (f32) RESHAPE(ffn_moe_probs-11{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0607], [ 4.0607], [ 8.0607], ... ], [ [ 64.0607], [ 68.0607], [ 72.0607], ... ], [ [128.0607], [132.0607], [136.0607], ... ], ] sum = 612.546265 ggml_debug: ffn_moe_weights-11 = (f32) GET_ROWS(ffn_moe_probs-11 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1053], [ 4.1053], [ 8.1053], ... ], [ [ 16.1053], [ 20.1053], [ 24.1053], ... ], [ [ 32.1053], [ 36.1053], [ 40.1053], ... ], ] sum = 180.948120 ggml_debug: ffn_moe_weights-11 (reshaped) = (f32) RESHAPE(ffn_moe_weights-11{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1053, 4.1053, 8.1053, ...], [ 16.1053, 20.1053, 24.1053, ...], [ 32.1053, 36.1053, 40.1053, ...], ], ] sum = 180.948120 ggml_debug: ffn_moe_weights_sum-11 = (f32) SUM_ROWS(ffn_moe_weights-11 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3386], [ 4.3386], [ 8.3386], ], ] sum = 13.015935 ggml_debug: ffn_moe_weights_norm-11 = (f32) DIV(ffn_moe_weights-11 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-11{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3111, 4.3111, 8.3111, ...], [ 16.3111, 20.3111, 24.3111, ...], [ 32.3111, 36.3111, 40.3111, ...], ], ] sum = 182.799728 ggml_debug: ffn_moe_weights_norm-11 (view) = (f32) VIEW(ffn_moe_weights_norm-11{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3111], [ 16.3111], [ 32.3111], ], ] sum = 48.933243 ggml_debug: ffn_moe_weighted-11 = (f32) MUL(ffn_moe_down-11{6144, 3, 1, 1}, ffn_moe_weights_norm-11 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0267, 4.0267, 8.0267, ...], [24576.0273, 24580.0273, 24584.0273, ...], [49152.0273, 49156.0273, 49160.0273, ...], ], ] sum = 221220.250000 ggml_debug: ffn_moe_up-11 = (f32) MUL_MAT_ID(blk.11.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.3816, 2.6184, 6.6184, ...], [43006.6172, 43010.6172, 43014.6172, ...], [86014.6172, 86018.6172, 86022.6172, ...], ], ] sum = 387095.562500 ggml_debug: ffn_moe_gate-11 = (f32) MUL_MAT_ID(blk.11.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1530, 4.1530, 8.1530, ...], [43008.1523, 43012.1523, 43016.1523, ...], [86016.1562, 86020.1562, 86024.1562, ...], ], ] sum = 387109.406250 ggml_debug: ffn_moe_silu-11 = (f32) UNARY(ffn_moe_gate-11{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0823, 4.0823, 8.0823, ...], [43008.0820, 43012.0820, 43016.0820, ...], [86016.0859, 86020.0859, 86024.0859, ...], ], ] sum = 387108.750000 ggml_debug: ffn_moe_gate_par-11 = (f32) MUL(ffn_moe_up-11{10752, 3, 1, 1}, ffn_moe_silu-11{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1138, 3.8862, 7.8862, ...], [43007.8867, 43011.8867, 43015.8867, ...], [86015.8828, 86019.8828, 86023.8828, ...], ], ] sum = 387106.968750 ggml_debug: ffn_moe_down-11 = (f32) MUL_MAT_ID(blk.11.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-11{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0928, 3.9072, 7.9072, ...], [24575.9062, 24579.9062, 24583.9062, ...], [49151.9062, 49155.9062, 49159.9062, ...], ], ] sum = 221219.156250 ggml_debug: ffn_moe_weights_norm-11 (view) = (f32) VIEW(ffn_moe_weights_norm-11{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2693], [ 16.2693], [ 32.2693], ], ] sum = 48.807854 ggml_debug: ffn_moe_weighted-11 = (f32) MUL(ffn_moe_down-11{6144, 3, 1, 1}, ffn_moe_weights_norm-11 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0250, 3.9750, 7.9750, ...], [24575.9746, 24579.9746, 24583.9746, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_out-11 = (f32) ADD(ffn_moe_weighted-11{6144, 3, 1, 1}, ffn_moe_weighted-11{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0017, 4.0017, 8.0017, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_up-11 = (f32) MUL_MAT_ID(blk.11.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1731, 3.8269, 7.8269, ...], [43007.8281, 43011.8281, 43015.8281, ...], [86015.8281, 86019.8281, 86023.8281, ...], ], ] sum = 387106.437500 ggml_debug: ffn_moe_gate-11 = (f32) MUL_MAT_ID(blk.11.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2104, 4.2104, 8.2104, ...], [43008.2109, 43012.2109, 43016.2109, ...], [86016.2109, 86020.2109, 86024.2109, ...], ], ] sum = 387109.906250 ggml_debug: ffn_moe_silu-11 = (f32) UNARY(ffn_moe_gate-11{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1162, 4.1162, 8.1162, ...], [43008.1172, 43012.1172, 43016.1172, ...], [86016.1172, 86020.1172, 86024.1172, ...], ], ] sum = 387109.062500 ggml_debug: ffn_moe_gate_par-11 = (f32) MUL(ffn_moe_up-11{10752, 3, 1, 1}, ffn_moe_silu-11{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0201, 3.9799, 7.9799, ...], [43007.9805, 43011.9805, 43015.9805, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.812500 ggml_debug: ffn_moe_down-11 = (f32) MUL_MAT_ID(blk.11.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-11{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0358, 3.9642, 7.9642, ...], [24575.9648, 24579.9648, 24583.9648, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.687500 ggml_debug: ffn_moe_weights_norm-11 (view) = (f32) VIEW(ffn_moe_weights_norm-11{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2153], [ 16.2153], [ 32.2153], ], ] sum = 48.645966 ggml_debug: ffn_moe_weighted-11 = (f32) MUL(ffn_moe_down-11{6144, 3, 1, 1}, ffn_moe_weights_norm-11 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0077, 3.9923, 7.9923, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_out-11 = (f32) ADD(ffn_moe_out-11{6144, 3, 1, 1}, ffn_moe_weighted-11{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0060, 3.9940, 7.9940, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_up-11 = (f32) MUL_MAT_ID(blk.11.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0445, 3.9555, 7.9555, ...], [43007.9570, 43011.9570, 43015.9570, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_gate-11 = (f32) MUL_MAT_ID(blk.11.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-11{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4189, 4.4189, 8.4189, ...], [43008.4180, 43012.4180, 43016.4180, ...], [86016.4219, 86020.4219, 86024.4219, ...], ], ] sum = 387111.812500 ggml_debug: ffn_moe_silu-11 = (f32) UNARY(ffn_moe_gate-11{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2527, 4.2527, 8.2527, ...], [43008.2539, 43012.2539, 43016.2539, ...], [86016.2500, 86020.2500, 86024.2500, ...], ], ] sum = 387110.250000 ggml_debug: ffn_moe_gate_par-11 = (f32) MUL(ffn_moe_up-11{10752, 3, 1, 1}, ffn_moe_silu-11{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0113, 3.9887, 7.9887, ...], [43007.9883, 43011.9883, 43015.9883, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.906250 ggml_debug: ffn_moe_down-11 = (f32) MUL_MAT_ID(blk.11.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-11{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0521, 3.9479, 7.9479, ...], [24575.9473, 24579.9473, 24583.9473, ...], [49151.9492, 49155.9492, 49159.9492, ...], ], ] sum = 221219.546875 ggml_debug: ffn_moe_weights_norm-11 (view) = (f32) VIEW(ffn_moe_weights_norm-11{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2043], [ 16.2043], [ 32.2043], ], ] sum = 48.612934 ggml_debug: ffn_moe_weighted-11 = (f32) MUL(ffn_moe_down-11{6144, 3, 1, 1}, ffn_moe_weights_norm-11 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0106, 3.9894, 7.9894, ...], [24575.9902, 24579.9902, 24583.9902, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_out-11 = (f32) ADD(ffn_moe_out-11{6144, 3, 1, 1}, ffn_moe_weighted-11{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0166, 3.9834, 7.9834, ...], [24575.9824, 24579.9824, 24583.9824, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_inp-11 = (f32) ADD(kqv_out-11{6144, 3, 1, 1}, l_out-10{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0468, 4.0468, 8.0468, ...], [24576.0469, 24580.0469, 24584.0469, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: l_out-11 = (f32) ADD(ffn_moe_out-11{6144, 3, 1, 1}, ffn_inp-11{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0302, 4.0302, 8.0302, ...], [24576.0293, 24580.0293, 24584.0293, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: norm-12 = (f32) NORM(l_out-11{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1105, 4.1105, 8.1105, ...], [24576.1113, 24580.1113, 24584.1113, ...], [49152.1094, 49156.1094, 49160.1094, ...], ], ] sum = 221220.984375 ggml_debug: attn_norm-12 = (f32) MUL(norm-12{6144, 3, 1, 1}, blk.12.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0239, 4.0239, 8.0239, ...], [24576.0234, 24580.0234, 24584.0234, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: wqkv-12 = (f32) MUL_MAT(blk.12.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-12{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.5401, 4.5401, 8.5401, ...], [32768.5391, 32772.5391, 32776.5391, ...], [65536.5391, 65540.5391, 65544.5391, ...], ], ] sum = 294952.843750 ggml_debug: wqkv_clamped-12 = (f32) CLAMP(wqkv-12{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.5401, 4.5401, 8.5401, ...], [32768.5391, 32772.5391, 32776.5391, ...], [65536.5391, 65540.5391, 65544.5391, ...], ], ] sum = 294952.843750 ggml_debug: wqkv_clamped-12 (view) = (f32) VIEW(wqkv_clamped-12{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.5401, 4.5401, 8.5401, ...], [32768.5391, 32772.5391, 32776.5391, ...], [65536.5391, 65540.5391, 65544.5391, ...], ], ] sum = 294952.843750 ggml_debug: Qcur-12 = (f32) CONT(wqkv_clamped-12 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.5401, 4.5401, 8.5401, ...], [24576.5410, 24580.5410, 24584.5410, ...], [49152.5391, 49156.5391, 49160.5391, ...], ], ] sum = 221224.843750 ggml_debug: Qcur-12 (reshaped) = (f32) RESHAPE(Qcur-12{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.5401, 4.5401, 8.5401, ...], [512.5402, 516.5402, 520.5402, ...], [1024.5402, 1028.5402, 1032.5402, ...], ... ], [ [24576.5410, 24580.5410, 24584.5410, ...], [25088.5410, 25092.5410, 25096.5410, ...], [25600.5410, 25604.5410, 25608.5410, ...], ... ], [ [49152.5391, 49156.5391, 49160.5391, ...], [49664.5391, 49668.5391, 49672.5391, ...], [50176.5391, 50180.5391, 50184.5391, ...], ... ], ] sum = 677498.625000 ggml_debug: Qcur-12 = (f32) ROPE(Qcur-12 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.5401, 4.5401, 8.5401, ...], [512.5402, 516.5402, 520.5402, ...], [1024.5402, 1028.5402, 1032.5402, ...], ... ], [ [24576.5410, 24580.5410, 24584.5410, ...], [25088.5410, 25092.5410, 25096.5410, ...], [25600.5410, 25604.5410, 25608.5410, ...], ... ], [ [49152.5391, 49156.5391, 49160.5391, ...], [49664.5391, 49668.5391, 49672.5391, ...], [50176.5391, 50180.5391, 50184.5391, ...], ... ], ] sum = 677498.625000 ggml_debug: wqkv_clamped-12 (view) = (f32) VIEW(wqkv_clamped-12{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.1332, 5.1332, 9.1332, ...], [32769.1328, 32773.1328, 32777.1328, ...], [65537.1328, 65541.1328, 65545.1328, ...], ], ] sum = 294958.187500 ggml_debug: Kcur-12 = (f32) CONT(wqkv_clamped-12 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.1332, 5.1332, 9.1332, ...], [4097.1333, 4101.1333, 4105.1333, ...], [8193.1328, 8197.1328, 8201.1328, ...], ], ] sum = 36910.195312 ggml_debug: Kcur-12 (reshaped) = (f32) RESHAPE(Kcur-12{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 1.1332, 5.1332, 9.1332, ...], [513.1332, 517.1332, 521.1332, ...], [1025.1332, 1029.1332, 1033.1332, ...], ... ], [ [4097.1333, 4101.1333, 4105.1333, ...], [4609.1333, 4613.1333, 4617.1333, ...], [5121.1333, 5125.1333, 5129.1333, ...], ... ], [ [8193.1328, 8197.1328, 8201.1328, ...], [8705.1328, 8709.1328, 8713.1328, ...], [9217.1328, 9221.1328, 9225.1328, ...], ... ], ] sum = 124554.593750 ggml_debug: Kcur-12 = (f32) ROPE(Kcur-12 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 1.1332, 5.1332, 9.1332, ...], [513.1332, 517.1332, 521.1332, ...], [1025.1332, 1029.1332, 1033.1332, ...], ... ], [ [4097.1333, 4101.1333, 4105.1333, ...], [4609.1333, 4613.1333, 4617.1333, ...], [5121.1333, 5125.1333, 5129.1333, ...], ... ], [ [8193.1328, 8197.1328, 8201.1328, ...], [8705.1328, 8709.1328, 8713.1328, ...], [9217.1328, 9221.1328, 9225.1328, ...], ... ], ] sum = 124554.593750 ggml_debug: wqkv_clamped-12 (view) = (f32) VIEW(wqkv_clamped-12{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.3828, 4.3828, 8.3828, ...], [32768.3828, 32772.3828, 32776.3828, ...], [65536.3828, 65540.3828, 65544.3828, ...], ], ] sum = 294951.437500 ggml_debug: Vcur-12 = (f32) CONT(wqkv_clamped-12 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.3828, 4.3828, 8.3828, ...], [4096.3828, 4100.3828, 4104.3828, ...], [8192.3828, 8196.3828, 8200.3828, ...], ], ] sum = 36903.445312 ggml_debug: k_cache_view-12 = (f16) VIEW(cache_k_l12{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-12 (copy of Kcur-12) = (f16) CPY(Kcur-12{128, 8, 3, 1}, k_cache_view-12{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 1.1328, 1.1348, 1.1367, ...], ], ] sum = 3.404297 ggml_debug: v_cur_t-12 = (f32) TRANSPOSE(Vcur-12{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.3828, 4096.3828, 8192.3828], [ 4.3828, 4100.3828, 8196.3828], [ 8.3828, 4104.3828, 8200.3828], ... ], ] sum = 36903.445312 ggml_debug: v_cache_view-12 = (f16) VIEW(cache_v_l12{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-12 (copy of v_cur_t-12) = (f16) CPY(v_cur_t-12{3, 1024, 1, 1}, v_cache_view-12{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.3828, 0.3833, 0.3838], [ 0.7656, 0.7666, 0.7676], [ 1.5312, 1.5332, 1.5352], ... ], ] sum = 8.049316 ggml_debug: v-12 = (f16) VIEW(cache_v_l12{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.3828, 0.3833, 0.3838, ...], [ 0.7656, 0.7666, 0.7676, ...], [ 1.5312, 1.5332, 1.5352, ...], ... ], [ [ 0.3828, 0.3833, 0.3838, ...], [ 0.7656, 0.7666, 0.7676, ...], [ 1.5312, 1.5332, 1.5352, ...], ... ], [ [ 0.3828, 0.3833, 0.3838, ...], [ 0.7656, 0.7666, 0.7676, ...], [ 1.5312, 1.5332, 1.5352, ...], ... ], ... ] sum = 24.147949 ggml_debug: k-12 = (f16) VIEW(cache_k_l12{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 1.1328, 1.1348, 1.1367, ...], [ 4.5312, 4.5391, 4.5469, ...], [ 18.1250, 18.1562, 18.1875, ...], ... ], [ [ 1.3828, 1.3848, 1.3867, ...], [ 5.5312, 5.5391, 5.5469, ...], [ 22.1250, 22.1562, 22.1875, ...], ... ], [ [ 1.6328, 1.6348, 1.6367, ...], [ 6.5312, 6.5391, 6.5469, ...], [ 26.1250, 26.1562, 26.1875, ...], ... ], ... ] sum = 261.720703 ggml_debug: q-12 = (f32) PERMUTE(Qcur-12{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.5401, 4.5401, 8.5401, ...], [24576.5410, 24580.5410, 24584.5410, ...], [49152.5391, 49156.5391, 49160.5391, ...], ], [ [512.5402, 516.5402, 520.5402, ...], [25088.5410, 25092.5410, 25096.5410, ...], [49664.5391, 49668.5391, 49672.5391, ...], ], [ [1024.5402, 1028.5402, 1032.5402, ...], [25600.5410, 25604.5410, 25608.5410, ...], [50176.5391, 50180.5391, 50184.5391, ...], ], ... ] sum = 677498.562500 ggml_debug: kq-12 = (f32) MUL_MAT(k-12{128, 32, 8, 1}, q-12{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ -3.0918, 0.9082, 4.9082, ...], [124.9082, 128.9082, 132.9082, ...], [252.9082, 256.9082, 260.9082, ...], ], [ [380.9082, 384.9082, 388.9082, ...], [508.9082, 512.9082, 516.9082, ...], [636.9082, 640.9082, 644.9082, ...], ], [ [764.9082, 768.9082, 772.9082, ...], [892.9082, 896.9082, 900.9082, ...], [1020.9082, 1024.9082, 1028.9082, ...], ], ... ] sum = 13848.521484 ggml_debug: kq_soft_max_ext-12 = (f32) SOFT_MAX(kq-12{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-12 = (f32) MUL_MAT(v-12{32, 128, 8, 1}, kq_soft_max_ext-12{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.3828, 4.3828, 8.3828, ...], [512.3828, 516.3828, 520.3828, ...], [1024.3828, 1028.3828, 1032.3828, ...], ], [ [1536.3828, 1540.3828, 1544.3828, ...], [2048.3828, 2052.3828, 2056.3828, ...], [2560.3828, 2564.3828, 2568.3828, ...], ], [ [3072.3828, 3076.3828, 3080.3828, ...], [3584.3828, 3588.3828, 3592.3828, ...], [4096.3828, 4100.3828, 4104.3828, ...], ], ... ] sum = 55414.335938 ggml_debug: kqv_merged-12 = (f32) PERMUTE(kqv-12{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.3828, 4.3828, 8.3828, ...], [1536.3828, 1540.3828, 1544.3828, ...], [3072.3828, 3076.3828, 3080.3828, ...], ... ], [ [512.3828, 516.3828, 520.3828, ...], [2048.3828, 2052.3828, 2056.3828, ...], [3584.3828, 3588.3828, 3592.3828, ...], ... ], [ [1024.3828, 1028.3828, 1032.3828, ...], [2560.3828, 2564.3828, 2568.3828, ...], [4096.3828, 4100.3828, 4104.3828, ...], ... ], ] sum = 55414.335938 ggml_debug: kqv_merged_cont-12 = (f32) CONT(kqv_merged-12{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.3828, 4.3828, 8.3828, ...], [24576.3828, 24580.3828, 24584.3828, ...], [49152.3828, 49156.3828, 49160.3828, ...], ], ] sum = 221223.437500 ggml_debug: kqv_out-12 = (f32) MUL_MAT(blk.12.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-12{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0052, 3.9948, 7.9948, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: norm-12 = (f32) NORM(kqv_out-12{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0454, 3.9546, 7.9546, ...], [24575.9551, 24579.9551, 24583.9551, ...], [49151.9531, 49155.9531, 49159.9531, ...], ], ] sum = 221219.578125 ggml_debug: attn_out_norm-12 = (f32) MUL(norm-12{6144, 3, 1, 1}, blk.12.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0136, 3.9864, 7.9864, ...], [24575.9863, 24579.9863, 24583.9863, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.875000 ggml_debug: ffn_moe_logits-12 = (f32) MUL_MAT(blk.12.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.1974, 3.8026, 7.8026, ...], [ 63.8026, 67.8026, 71.8026, ...], [127.8026, 131.8026, 135.8026, ...], ], ] sum = 610.223511 ggml_debug: ffn_moe_probs-12 = (f32) SOFT_MAX(ffn_moe_logits-12{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0485, 4.0485, 8.0485, ...], [ 64.0485, 68.0485, 72.0485, ...], [128.0485, 132.0485, 136.0485, ...], ], ] sum = 612.436401 ggml_debug: ffn_moe_argsort-12 = (i32) ARGSORT(ffn_moe_probs-12{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 9.0000, 13.0000, 17.0000, ...], [ 73.0000, 77.0000, 81.0000, ...], [137.0000, 141.0000, 145.0000, ...], ], ] sum = 693.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-12{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 9.0000, 13.0000, 17.0000, ...], [ 73.0000, 77.0000, 81.0000, ...], [137.0000, 141.0000, 145.0000, ...], ], ] sum = 693.000000 ggml_debug: ffn_moe_up-12 = (f32) MUL_MAT_ID(blk.12.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0290, 4.0290, 8.0290, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_gate-12 = (f32) MUL_MAT_ID(blk.12.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5057, 3.4943, 7.4943, ...], [43007.4961, 43011.4961, 43015.4961, ...], [86015.4922, 86019.4922, 86023.4922, ...], ], ] sum = 387103.468750 ggml_debug: ffn_moe_silu-12 = (f32) UNARY(ffn_moe_gate-12{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1903, 3.8097, 7.8097, ...], [43007.8086, 43011.8086, 43015.8086, ...], [86015.8125, 86019.8125, 86023.8125, ...], ], ] sum = 387106.312500 ggml_debug: ffn_moe_gate_par-12 = (f32) MUL(ffn_moe_up-12{10752, 3, 1, 1}, ffn_moe_silu-12{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0055, 3.9945, 7.9945, ...], [43007.9961, 43011.9961, 43015.9961, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.968750 ggml_debug: ffn_moe_down-12 = (f32) MUL_MAT_ID(blk.12.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-12{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2309, 3.7691, 7.7691, ...], [24575.7695, 24579.7695, 24583.7695, ...], [49151.7695, 49155.7695, 49159.7695, ...], ], ] sum = 221217.906250 ggml_debug: ffn_moe_probs-12 (reshaped) = (f32) RESHAPE(ffn_moe_probs-12{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0485], [ 4.0485], [ 8.0485], ... ], [ [ 64.0485], [ 68.0485], [ 72.0485], ... ], [ [128.0485], [132.0485], [136.0485], ... ], ] sum = 612.436401 ggml_debug: ffn_moe_weights-12 = (f32) GET_ROWS(ffn_moe_probs-12 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1037], [ 4.1037], [ 8.1037], ... ], [ [ 16.1037], [ 20.1037], [ 24.1037], ... ], [ [ 32.1037], [ 36.1037], [ 40.1037], ... ], ] sum = 180.933014 ggml_debug: ffn_moe_weights-12 (reshaped) = (f32) RESHAPE(ffn_moe_weights-12{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1037, 4.1037, 8.1037, ...], [ 16.1037, 20.1037, 24.1037, ...], [ 32.1037, 36.1037, 40.1037, ...], ], ] sum = 180.933014 ggml_debug: ffn_moe_weights_sum-12 = (f32) SUM_ROWS(ffn_moe_weights-12 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3670], [ 4.3670], [ 8.3670], ], ] sum = 13.101138 ggml_debug: ffn_moe_weights_norm-12 = (f32) DIV(ffn_moe_weights-12 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-12{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2824, 4.2824, 8.2824, ...], [ 16.2824, 20.2824, 24.2824, ...], [ 32.2824, 36.2824, 40.2824, ...], ], ] sum = 182.541931 ggml_debug: ffn_moe_weights_norm-12 (view) = (f32) VIEW(ffn_moe_weights_norm-12{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2824], [ 16.2824], [ 32.2824], ], ] sum = 48.847309 ggml_debug: ffn_moe_weighted-12 = (f32) MUL(ffn_moe_down-12{6144, 3, 1, 1}, ffn_moe_weights_norm-12 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0652, 3.9348, 7.9348, ...], [24575.9355, 24579.9355, 24583.9355, ...], [49151.9336, 49155.9336, 49159.9336, ...], ], ] sum = 221219.421875 ggml_debug: ffn_moe_up-12 = (f32) MUL_MAT_ID(blk.12.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1305, 3.8695, 7.8695, ...], [43007.8711, 43011.8711, 43015.8711, ...], [86015.8672, 86019.8672, 86023.8672, ...], ], ] sum = 387106.843750 ggml_debug: ffn_moe_gate-12 = (f32) MUL_MAT_ID(blk.12.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1107, 3.8893, 7.8893, ...], [43007.8906, 43011.8906, 43015.8906, ...], [86015.8906, 86019.8906, 86023.8906, ...], ], ] sum = 387107.000000 ggml_debug: ffn_moe_silu-12 = (f32) UNARY(ffn_moe_gate-12{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0523, 3.9477, 7.9477, ...], [43007.9492, 43011.9492, 43015.9492, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.500000 ggml_debug: ffn_moe_gate_par-12 = (f32) MUL(ffn_moe_up-12{10752, 3, 1, 1}, ffn_moe_silu-12{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0068, 4.0068, 8.0068, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_down-12 = (f32) MUL_MAT_ID(blk.12.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-12{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2936, 3.7064, 7.7064, ...], [24575.7070, 24579.7070, 24583.7070, ...], [49151.7070, 49155.7070, 49159.7070, ...], ], ] sum = 221217.343750 ggml_debug: ffn_moe_weights_norm-12 (view) = (f32) VIEW(ffn_moe_weights_norm-12{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2584], [ 16.2584], [ 32.2584], ], ] sum = 48.775257 ggml_debug: ffn_moe_weighted-12 = (f32) MUL(ffn_moe_down-12{6144, 3, 1, 1}, ffn_moe_weights_norm-12 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0759, 3.9241, 7.9241, ...], [24575.9238, 24579.9238, 24583.9238, ...], [49151.9258, 49155.9258, 49159.9258, ...], ], ] sum = 221219.312500 ggml_debug: ffn_moe_out-12 = (f32) ADD(ffn_moe_weighted-12{6144, 3, 1, 1}, ffn_moe_weighted-12{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1411, 3.8589, 7.8589, ...], [24575.8594, 24579.8594, 24583.8594, ...], [49151.8594, 49155.8594, 49159.8594, ...], ], ] sum = 221218.734375 ggml_debug: ffn_moe_up-12 = (f32) MUL_MAT_ID(blk.12.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2210, 3.7790, 7.7790, ...], [43007.7773, 43011.7773, 43015.7773, ...], [86015.7812, 86019.7812, 86023.7812, ...], ], ] sum = 387106.031250 ggml_debug: ffn_moe_gate-12 = (f32) MUL_MAT_ID(blk.12.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5036, 4.5036, 8.5036, ...], [43008.5039, 43012.5039, 43016.5039, ...], [86016.5000, 86020.5000, 86024.5000, ...], ], ] sum = 387112.500000 ggml_debug: ffn_moe_silu-12 = (f32) UNARY(ffn_moe_gate-12{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.3139, 4.3139, 8.3139, ...], [43008.3125, 43012.3125, 43016.3125, ...], [86016.3125, 86020.3125, 86024.3125, ...], ], ] sum = 387110.812500 ggml_debug: ffn_moe_gate_par-12 = (f32) MUL(ffn_moe_up-12{10752, 3, 1, 1}, ffn_moe_silu-12{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0694, 3.9306, 7.9306, ...], [43007.9297, 43011.9297, 43015.9297, ...], [86015.9297, 86019.9297, 86023.9297, ...], ], ] sum = 387107.375000 ggml_debug: ffn_moe_down-12 = (f32) MUL_MAT_ID(blk.12.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-12{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0832, 4.0832, 8.0832, ...], [24576.0840, 24580.0840, 24584.0840, ...], [49152.0820, 49156.0820, 49160.0820, ...], ], ] sum = 221220.734375 ggml_debug: ffn_moe_weights_norm-12 (view) = (f32) VIEW(ffn_moe_weights_norm-12{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2441], [ 16.2441], [ 32.2441], ], ] sum = 48.732208 ggml_debug: ffn_moe_weighted-12 = (f32) MUL(ffn_moe_down-12{6144, 3, 1, 1}, ffn_moe_weights_norm-12 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0203, 4.0203, 8.0203, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: ffn_moe_out-12 = (f32) ADD(ffn_moe_out-12{6144, 3, 1, 1}, ffn_moe_weighted-12{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1208, 3.8792, 7.8792, ...], [24575.8789, 24579.8789, 24583.8789, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], ] sum = 221218.906250 ggml_debug: ffn_moe_up-12 = (f32) MUL_MAT_ID(blk.12.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4230, 3.5770, 7.5770, ...], [43007.5781, 43011.5781, 43015.5781, ...], [86015.5781, 86019.5781, 86023.5781, ...], ], ] sum = 387104.187500 ggml_debug: ffn_moe_gate-12 = (f32) MUL_MAT_ID(blk.12.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-12{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3653, 3.6347, 7.6347, ...], [43007.6328, 43011.6328, 43015.6328, ...], [86015.6328, 86019.6328, 86023.6328, ...], ], ] sum = 387104.687500 ggml_debug: ffn_moe_silu-12 = (f32) UNARY(ffn_moe_gate-12{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1497, 3.8503, 7.8503, ...], [43007.8516, 43011.8516, 43015.8516, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.656250 ggml_debug: ffn_moe_gate_par-12 = (f32) MUL(ffn_moe_up-12{10752, 3, 1, 1}, ffn_moe_silu-12{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0633, 4.0633, 8.0633, ...], [43008.0625, 43012.0625, 43016.0625, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_down-12 = (f32) MUL_MAT_ID(blk.12.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-12{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0214, 3.9786, 7.9786, ...], [24575.9785, 24579.9785, 24583.9785, ...], [49151.9805, 49155.9805, 49159.9805, ...], ], ] sum = 221219.812500 ggml_debug: ffn_moe_weights_norm-12 (view) = (f32) VIEW(ffn_moe_weights_norm-12{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2151], [ 16.2151], [ 32.2151], ], ] sum = 48.645229 ggml_debug: ffn_moe_weighted-12 = (f32) MUL(ffn_moe_down-12{6144, 3, 1, 1}, ffn_moe_weights_norm-12 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0046, 3.9954, 7.9954, ...], [24575.9961, 24579.9961, 24583.9961, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: ffn_moe_out-12 = (f32) ADD(ffn_moe_out-12{6144, 3, 1, 1}, ffn_moe_weighted-12{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1254, 3.8746, 7.8746, ...], [24575.8750, 24579.8750, 24583.8750, ...], [49151.8750, 49155.8750, 49159.8750, ...], ], ] sum = 221218.875000 ggml_debug: ffn_inp-12 = (f32) ADD(kqv_out-12{6144, 3, 1, 1}, l_out-11{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0250, 4.0250, 8.0250, ...], [24576.0254, 24580.0254, 24584.0254, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: l_out-12 = (f32) ADD(ffn_moe_out-12{6144, 3, 1, 1}, ffn_inp-12{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1004, 3.8996, 7.8996, ...], [24575.9004, 24579.9004, 24583.9004, ...], [49151.8984, 49155.8984, 49159.8984, ...], ], ] sum = 221219.093750 ggml_debug: norm-13 = (f32) NORM(l_out-12{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2920, 3.7080, 7.7080, ...], [24575.7070, 24579.7070, 24583.7070, ...], [49151.7070, 49155.7070, 49159.7070, ...], ], ] sum = 221217.359375 ggml_debug: attn_norm-13 = (f32) MUL(norm-13{6144, 3, 1, 1}, blk.13.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0593, 3.9407, 7.9407, ...], [24575.9414, 24579.9414, 24583.9414, ...], [49151.9414, 49155.9414, 49159.9414, ...], ], ] sum = 221219.453125 ggml_debug: wqkv-13 = (f32) MUL_MAT(blk.13.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-13{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.0941, 4.0941, 8.0941, ...], [32768.0938, 32772.0938, 32776.0938, ...], [65536.0938, 65540.0938, 65544.0938, ...], ], ] sum = 294948.843750 ggml_debug: wqkv_clamped-13 = (f32) CLAMP(wqkv-13{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.0941, 4.0941, 8.0941, ...], [32768.0938, 32772.0938, 32776.0938, ...], [65536.0938, 65540.0938, 65544.0938, ...], ], ] sum = 294948.843750 ggml_debug: wqkv_clamped-13 (view) = (f32) VIEW(wqkv_clamped-13{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0941, 4.0941, 8.0941, ...], [32768.0938, 32772.0938, 32776.0938, ...], [65536.0938, 65540.0938, 65544.0938, ...], ], ] sum = 294948.843750 ggml_debug: Qcur-13 = (f32) CONT(wqkv_clamped-13 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0941, 4.0941, 8.0941, ...], [24576.0938, 24580.0938, 24584.0938, ...], [49152.0938, 49156.0938, 49160.0938, ...], ], ] sum = 221220.843750 ggml_debug: Qcur-13 (reshaped) = (f32) RESHAPE(Qcur-13{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0941, 4.0941, 8.0941, ...], [512.0941, 516.0941, 520.0941, ...], [1024.0941, 1028.0941, 1032.0941, ...], ... ], [ [24576.0938, 24580.0938, 24584.0938, ...], [25088.0938, 25092.0938, 25096.0938, ...], [25600.0938, 25604.0938, 25608.0938, ...], ... ], [ [49152.0938, 49156.0938, 49160.0938, ...], [49664.0938, 49668.0938, 49672.0938, ...], [50176.0938, 50180.0938, 50184.0938, ...], ... ], ] sum = 677486.625000 ggml_debug: Qcur-13 = (f32) ROPE(Qcur-13 (reshaped){128, 48, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.0941, 4.0941, 8.0941, ...], [512.0941, 516.0941, 520.0941, ...], [1024.0941, 1028.0941, 1032.0941, ...], ... ], [ [24576.0938, 24580.0938, 24584.0938, ...], [25088.0938, 25092.0938, 25096.0938, ...], [25600.0938, 25604.0938, 25608.0938, ...], ... ], [ [49152.0938, 49156.0938, 49160.0938, ...], [49664.0938, 49668.0938, 49672.0938, ...], [50176.0938, 50180.0938, 50184.0938, ...], ... ], ] sum = 677486.625000 ggml_debug: wqkv_clamped-13 (view) = (f32) VIEW(wqkv_clamped-13{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.9792, 3.0208, 7.0208, ...], [32767.0215, 32771.0195, 32775.0195, ...], [65535.0195, 65539.0234, 65543.0234, ...], ], ] sum = 294939.187500 ggml_debug: Kcur-13 = (f32) CONT(wqkv_clamped-13 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.9792, 3.0208, 7.0208, ...], [4095.0208, 4099.0210, 4103.0210, ...], [8191.0210, 8195.0205, 8199.0205, ...], ], ] sum = 36891.187500 ggml_debug: Kcur-13 (reshaped) = (f32) RESHAPE(Kcur-13{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -0.9792, 3.0208, 7.0208, ...], [511.0208, 515.0208, 519.0208, ...], [1023.0208, 1027.0208, 1031.0208, ...], ... ], [ [4095.0208, 4099.0210, 4103.0210, ...], [4607.0210, 4611.0210, 4615.0210, ...], [5119.0210, 5123.0210, 5127.0210, ...], ... ], [ [8191.0210, 8195.0205, 8199.0205, ...], [8703.0205, 8707.0205, 8711.0205, ...], [9215.0205, 9219.0205, 9223.0205, ...], ... ], ] sum = 124497.578125 ggml_debug: Kcur-13 = (f32) ROPE(Kcur-13 (reshaped){128, 8, 3, 1}, CUDA0#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -0.9792, 3.0208, 7.0208, ...], [511.0208, 515.0208, 519.0208, ...], [1023.0208, 1027.0208, 1031.0208, ...], ... ], [ [4095.0208, 4099.0210, 4103.0210, ...], [4607.0210, 4611.0210, 4615.0210, ...], [5119.0210, 5123.0210, 5127.0210, ...], ... ], [ [8191.0210, 8195.0205, 8199.0205, ...], [8703.0205, 8707.0205, 8711.0205, ...], [9215.0205, 9219.0205, 9223.0205, ...], ... ], ] sum = 124497.578125 ggml_debug: wqkv_clamped-13 (view) = (f32) VIEW(wqkv_clamped-13{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.2212, 4.2212, 8.2212, ...], [32768.2227, 32772.2227, 32776.2227, ...], [65536.2188, 65540.2188, 65544.2188, ...], ], ] sum = 294950.000000 ggml_debug: Vcur-13 = (f32) CONT(wqkv_clamped-13 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.2212, 4.2212, 8.2212, ...], [4096.2212, 4100.2212, 4104.2212, ...], [8192.2217, 8196.2217, 8200.2217, ...], ], ] sum = 36901.992188 ggml_debug: k_cache_view-13 = (f16) VIEW(cache_k_l13{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-13 (copy of Kcur-13) = (f16) CPY(Kcur-13{128, 8, 3, 1}, k_cache_view-13{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -0.9790, -0.9800, -0.9810, ...], ], ] sum = -2.939941 ggml_debug: v_cur_t-13 = (f32) TRANSPOSE(Vcur-13{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.2212, 4096.2212, 8192.2217], [ 4.2212, 4100.2212, 8196.2217], [ 8.2212, 4104.2212, 8200.2217], ... ], ] sum = 36901.992188 ggml_debug: v_cache_view-13 = (f16) VIEW(cache_v_l13{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-13 (copy of v_cur_t-13) = (f16) CPY(v_cur_t-13{3, 1024, 1, 1}, v_cache_view-13{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.2212, 0.2214, 0.2217], [ 0.4424, 0.4429, 0.4434], [ 0.8848, 0.8857, 0.8867], ... ], ] sum = 4.650146 ggml_debug: v-13 = (f16) VIEW(cache_v_l13{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.2212, 0.2214, 0.2217, ...], [ 0.4424, 0.4429, 0.4434, ...], [ 0.8848, 0.8857, 0.8867, ...], ... ], [ [ 0.2212, 0.2214, 0.2217, ...], [ 0.4424, 0.4429, 0.4434, ...], [ 0.8848, 0.8857, 0.8867, ...], ... ], [ [ 0.2212, 0.2214, 0.2217, ...], [ 0.4424, 0.4429, 0.4434, ...], [ 0.8848, 0.8857, 0.8867, ...], ... ], ... ] sum = 13.950439 ggml_debug: k-13 = (f16) VIEW(cache_k_l13{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -0.9790, -0.9800, -0.9810, ...], [ -3.9160, -3.9199, -3.9238, ...], [-15.6641, -15.6797, -15.6953, ...], ... ], [ [ -1.2080, -1.2100, -1.2119, ...], [ -4.8320, -4.8398, -4.8477, ...], [-19.3281, -19.3594, -19.3906, ...], ... ], [ [ -1.4580, -1.4600, -1.4619, ...], [ -5.8320, -5.8398, -5.8477, ...], [-23.3281, -23.3594, -23.3906, ...], ... ], ... ] sum = -229.943848 ggml_debug: q-13 = (f32) PERMUTE(Qcur-13{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.0941, 4.0941, 8.0941, ...], [24576.0938, 24580.0938, 24584.0938, ...], [49152.0938, 49156.0938, 49160.0938, ...], ], [ [512.0941, 516.0941, 520.0941, ...], [25088.0938, 25092.0938, 25096.0938, ...], [49664.0938, 49668.0938, 49672.0938, ...], ], [ [1024.0941, 1028.0941, 1032.0941, ...], [25600.0938, 25604.0938, 25608.0938, ...], [50176.0938, 50180.0938, 50184.0938, ...], ], ... ] sum = 677486.625000 ggml_debug: kq-13 = (f32) MUL_MAT(k-13{128, 32, 8, 1}, q-13{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 42.3125, 46.3125, 50.3125, ...], [170.3125, 174.3125, 178.3125, ...], [298.3125, 302.3125, 306.3125, ...], ], [ [426.3125, 430.3125, 434.3125, ...], [554.3125, 558.3125, 562.3125, ...], [682.3125, 686.3125, 690.3125, ...], ], [ [810.3125, 814.3125, 818.3125, ...], [938.3125, 942.3125, 946.3125, ...], [1066.3125, 1070.3125, 1074.3125, ...], ], ... ] sum = 15074.437500 ggml_debug: kq_soft_max_ext-13 = (f32) SOFT_MAX(kq-13{32, 3, 48, 1}, CUDA0#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-13 = (f32) MUL_MAT(v-13{32, 128, 8, 1}, kq_soft_max_ext-13{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.2212, 4.2212, 8.2212, ...], [512.2212, 516.2212, 520.2212, ...], [1024.2212, 1028.2212, 1032.2212, ...], ], [ [1536.2212, 1540.2212, 1544.2212, ...], [2048.2212, 2052.2212, 2056.2212, ...], [2560.2212, 2564.2212, 2568.2212, ...], ], [ [3072.2212, 3076.2212, 3080.2212, ...], [3584.2212, 3588.2212, 3592.2212, ...], [4096.2212, 4100.2212, 4104.2212, ...], ], ... ] sum = 55409.976562 ggml_debug: kqv_merged-13 = (f32) PERMUTE(kqv-13{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.2212, 4.2212, 8.2212, ...], [1536.2212, 1540.2212, 1544.2212, ...], [3072.2212, 3076.2212, 3080.2212, ...], ... ], [ [512.2212, 516.2212, 520.2212, ...], [2048.2212, 2052.2212, 2056.2212, ...], [3584.2212, 3588.2212, 3592.2212, ...], ... ], [ [1024.2212, 1028.2212, 1032.2212, ...], [2560.2212, 2564.2212, 2568.2212, ...], [4096.2212, 4100.2212, 4104.2212, ...], ... ], ] sum = 55409.980469 ggml_debug: kqv_merged_cont-13 = (f32) CONT(kqv_merged-13{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.2212, 4.2212, 8.2212, ...], [24576.2207, 24580.2207, 24584.2207, ...], [49152.2227, 49156.2227, 49160.2227, ...], ], ] sum = 221221.984375 ggml_debug: kqv_out-13 = (f32) MUL_MAT(blk.13.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-13{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1086, 4.1086, 8.1086, ...], [24576.1094, 24580.1094, 24584.1094, ...], [49152.1094, 49156.1094, 49160.1094, ...], ], ] sum = 221220.984375 ggml_debug: norm-13 = (f32) NORM(kqv_out-13{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.7829, 4.7829, 8.7829, ...], [24576.7832, 24580.7832, 24584.7832, ...], [49152.7812, 49156.7812, 49160.7812, ...], ], ] sum = 221227.031250 ggml_debug: attn_out_norm-13 = (f32) MUL(norm-13{6144, 3, 1, 1}, blk.13.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2477, 4.2477, 8.2477, ...], [24576.2480, 24580.2480, 24584.2480, ...], [49152.2461, 49156.2461, 49160.2461, ...], ], ] sum = 221222.234375 ggml_debug: ffn_moe_logits-13 = (f32) MUL_MAT(blk.13.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.0300, 3.9700, 7.9700, ...], [ 63.9700, 67.9700, 71.9700, ...], [127.9700, 131.9700, 135.9700, ...], ], ] sum = 611.729736 ggml_debug: ffn_moe_probs-13 = (f32) SOFT_MAX(ffn_moe_logits-13{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0557, 4.0557, 8.0557, ...], [ 64.0557, 68.0557, 72.0557, ...], [128.0557, 132.0557, 136.0557, ...], ], ] sum = 612.501404 ggml_debug: ffn_moe_argsort-13 = (i32) ARGSORT(ffn_moe_probs-13{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 10.0000, 14.0000, 18.0000, ...], [ 74.0000, 78.0000, 82.0000, ...], [138.0000, 142.0000, 146.0000, ...], ], ] sum = 702.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-13{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 10.0000, 14.0000, 18.0000, ...], [ 74.0000, 78.0000, 82.0000, ...], [138.0000, 142.0000, 146.0000, ...], ], ] sum = 702.000000 ggml_debug: ffn_moe_up-13 = (f32) MUL_MAT_ID(blk.13.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0426, 3.9574, 7.9574, ...], [43007.9570, 43011.9570, 43015.9570, ...], [86015.9609, 86019.9609, 86023.9609, ...], ], ] sum = 387107.625000 ggml_debug: ffn_moe_gate-13 = (f32) MUL_MAT_ID(blk.13.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2122, 3.7878, 7.7878, ...], [43007.7891, 43011.7891, 43015.7891, ...], [86015.7891, 86019.7891, 86023.7891, ...], ], ] sum = 387106.093750 ggml_debug: ffn_moe_silu-13 = (f32) UNARY(ffn_moe_gate-13{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0949, 3.9051, 7.9051, ...], [43007.9062, 43011.9062, 43015.9062, ...], [86015.9062, 86019.9062, 86023.9062, ...], ], ] sum = 387107.156250 ggml_debug: ffn_moe_gate_par-13 = (f32) MUL(ffn_moe_up-13{10752, 3, 1, 1}, ffn_moe_silu-13{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0040, 4.0040, 8.0040, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.031250 ggml_debug: ffn_moe_down-13 = (f32) MUL_MAT_ID(blk.13.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-13{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0972, 3.9028, 7.9028, ...], [24575.9023, 24579.9023, 24583.9023, ...], [49151.9023, 49155.9023, 49159.9023, ...], ], ] sum = 221219.140625 ggml_debug: ffn_moe_probs-13 (reshaped) = (f32) RESHAPE(ffn_moe_probs-13{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0557], [ 4.0557], [ 8.0557], ... ], [ [ 64.0557], [ 68.0557], [ 72.0557], ... ], [ [128.0557], [132.0557], [136.0557], ... ], ] sum = 612.501404 ggml_debug: ffn_moe_weights-13 = (f32) GET_ROWS(ffn_moe_probs-13 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1014], [ 4.1014], [ 8.1014], ... ], [ [ 16.1014], [ 20.1014], [ 24.1014], ... ], [ [ 32.1014], [ 36.1014], [ 40.1014], ... ], ] sum = 180.912964 ggml_debug: ffn_moe_weights-13 (reshaped) = (f32) RESHAPE(ffn_moe_weights-13{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1014, 4.1014, 8.1014, ...], [ 16.1014, 20.1014, 24.1014, ...], [ 32.1014, 36.1014, 40.1014, ...], ], ] sum = 180.912964 ggml_debug: ffn_moe_weights_sum-13 = (f32) SUM_ROWS(ffn_moe_weights-13 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3189], [ 4.3189], [ 8.3189], ], ] sum = 12.956714 ggml_debug: ffn_moe_weights_norm-13 = (f32) DIV(ffn_moe_weights-13 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-13{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3181, 4.3181, 8.3181, ...], [ 16.3181, 20.3181, 24.3181, ...], [ 32.3181, 36.3181, 40.3181, ...], ], ] sum = 182.862823 ggml_debug: ffn_moe_weights_norm-13 (view) = (f32) VIEW(ffn_moe_weights_norm-13{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3181], [ 16.3181], [ 32.3181], ], ] sum = 48.954281 ggml_debug: ffn_moe_weighted-13 = (f32) MUL(ffn_moe_down-13{6144, 3, 1, 1}, ffn_moe_weights_norm-13 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0309, 3.9691, 7.9691, ...], [24575.9688, 24579.9688, 24583.9688, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], ] sum = 221219.718750 ggml_debug: ffn_moe_up-13 = (f32) MUL_MAT_ID(blk.13.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6032, 4.6032, 8.6032, ...], [43008.6016, 43012.6016, 43016.6016, ...], [86016.6016, 86020.6016, 86024.6016, ...], ], ] sum = 387113.406250 ggml_debug: ffn_moe_gate-13 = (f32) MUL_MAT_ID(blk.13.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0923, 3.9077, 7.9077, ...], [43007.9062, 43011.9062, 43015.9062, ...], [86015.9062, 86019.9062, 86023.9062, ...], ], ] sum = 387107.156250 ggml_debug: ffn_moe_silu-13 = (f32) UNARY(ffn_moe_gate-13{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0440, 3.9560, 7.9560, ...], [43007.9570, 43011.9570, 43015.9570, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_gate_par-13 = (f32) MUL(ffn_moe_up-13{10752, 3, 1, 1}, ffn_moe_silu-13{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0265, 3.9735, 7.9735, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.750000 ggml_debug: ffn_moe_down-13 = (f32) MUL_MAT_ID(blk.13.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-13{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0212, 3.9788, 7.9788, ...], [24575.9785, 24579.9785, 24583.9785, ...], [49151.9805, 49155.9805, 49159.9805, ...], ], ] sum = 221219.812500 ggml_debug: ffn_moe_weights_norm-13 (view) = (f32) VIEW(ffn_moe_weights_norm-13{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2352], [ 16.2352], [ 32.2352], ], ] sum = 48.705708 ggml_debug: ffn_moe_weighted-13 = (f32) MUL(ffn_moe_down-13{6144, 3, 1, 1}, ffn_moe_weights_norm-13 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0050, 3.9950, 7.9950, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: ffn_moe_out-13 = (f32) ADD(ffn_moe_weighted-13{6144, 3, 1, 1}, ffn_moe_weighted-13{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0359, 3.9641, 7.9641, ...], [24575.9648, 24579.9648, 24583.9648, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.687500 ggml_debug: ffn_moe_up-13 = (f32) MUL_MAT_ID(blk.13.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.8934, 4.8934, 8.8934, ...], [43008.8945, 43012.8945, 43016.8945, ...], [86016.8906, 86020.8906, 86024.8906, ...], ], ] sum = 387116.000000 ggml_debug: ffn_moe_gate-13 = (f32) MUL_MAT_ID(blk.13.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2923, 3.7077, 7.7077, ...], [43007.7070, 43011.7070, 43015.7070, ...], [86015.7109, 86019.7109, 86023.7109, ...], ], ] sum = 387105.406250 ggml_debug: ffn_moe_silu-13 = (f32) UNARY(ffn_moe_gate-13{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1249, 3.8751, 7.8751, ...], [43007.8750, 43011.8750, 43015.8750, ...], [86015.8750, 86019.8750, 86023.8750, ...], ], ] sum = 387106.875000 ggml_debug: ffn_moe_gate_par-13 = (f32) MUL(ffn_moe_up-13{10752, 3, 1, 1}, ffn_moe_silu-13{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1116, 3.8884, 7.8884, ...], [43007.8867, 43011.8867, 43015.8867, ...], [86015.8906, 86019.8906, 86023.8906, ...], ], ] sum = 387107.000000 ggml_debug: ffn_moe_down-13 = (f32) MUL_MAT_ID(blk.13.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-13{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1131, 3.8869, 7.8869, ...], [24575.8867, 24579.8867, 24583.8867, ...], [49151.8867, 49155.8867, 49159.8867, ...], ], ] sum = 221218.984375 ggml_debug: ffn_moe_weights_norm-13 (view) = (f32) VIEW(ffn_moe_weights_norm-13{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2320], [ 16.2320], [ 32.2320], ], ] sum = 48.696125 ggml_debug: ffn_moe_weighted-13 = (f32) MUL(ffn_moe_down-13{6144, 3, 1, 1}, ffn_moe_weights_norm-13 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0262, 3.9738, 7.9738, ...], [24575.9746, 24579.9746, 24583.9746, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.750000 ggml_debug: ffn_moe_out-13 = (f32) ADD(ffn_moe_out-13{6144, 3, 1, 1}, ffn_moe_weighted-13{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0621, 3.9379, 7.9379, ...], [24575.9375, 24579.9375, 24583.9375, ...], [49151.9375, 49155.9375, 49159.9375, ...], ], ] sum = 221219.437500 ggml_debug: ffn_moe_up-13 = (f32) MUL_MAT_ID(blk.13.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5712, 3.4288, 7.4288, ...], [43007.4297, 43011.4297, 43015.4297, ...], [86015.4297, 86019.4297, 86023.4297, ...], ], ] sum = 387102.875000 ggml_debug: ffn_moe_gate-13 = (f32) MUL_MAT_ID(blk.13.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-13{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4200, 3.5800, 7.5800, ...], [43007.5781, 43011.5781, 43015.5781, ...], [86015.5781, 86019.5781, 86023.5781, ...], ], ] sum = 387104.187500 ggml_debug: ffn_moe_silu-13 = (f32) UNARY(ffn_moe_gate-13{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1665, 3.8335, 7.8335, ...], [43007.8320, 43011.8320, 43015.8320, ...], [86015.8359, 86019.8359, 86023.8359, ...], ], ] sum = 387106.531250 ggml_debug: ffn_moe_gate_par-13 = (f32) MUL(ffn_moe_up-13{10752, 3, 1, 1}, ffn_moe_silu-13{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0951, 4.0951, 8.0951, ...], [43008.0938, 43012.0938, 43016.0938, ...], [86016.0938, 86020.0938, 86024.0938, ...], ], ] sum = 387108.843750 ggml_debug: ffn_moe_down-13 = (f32) MUL_MAT_ID(blk.13.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-13{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0746, 4.0746, 8.0746, ...], [24576.0742, 24580.0742, 24584.0742, ...], [49152.0742, 49156.0742, 49160.0742, ...], ], ] sum = 221220.687500 ggml_debug: ffn_moe_weights_norm-13 (view) = (f32) VIEW(ffn_moe_weights_norm-13{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2146], [ 16.2146], [ 32.2146], ], ] sum = 48.643890 ggml_debug: ffn_moe_weighted-13 = (f32) MUL(ffn_moe_down-13{6144, 3, 1, 1}, ffn_moe_weights_norm-13 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0160, 4.0160, 8.0160, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_out-13 = (f32) ADD(ffn_moe_out-13{6144, 3, 1, 1}, ffn_moe_weighted-13{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0461, 3.9539, 7.9539, ...], [24575.9531, 24579.9531, 24583.9531, ...], [49151.9531, 49155.9531, 49159.9531, ...], ], ] sum = 221219.578125 ggml_debug: ffn_inp-13 = (f32) ADD(kqv_out-13{6144, 3, 1, 1}, l_out-12{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0081, 4.0081, 8.0081, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: l_out-13 = (f32) ADD(ffn_moe_out-13{6144, 3, 1, 1}, ffn_inp-13{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0380, 3.9620, 7.9620, ...], [24575.9629, 24579.9629, 24583.9629, ...], [49151.9609, 49155.9609, 49159.9609, ...], ], ] sum = 221219.656250 ggml_debug: norm-14 = (f32) NORM(CUDA1#l_out-13#0{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0952, 3.9048, 7.9048, ...], [24575.9043, 24579.9043, 24583.9043, ...], [49151.9062, 49155.9062, 49159.9062, ...], ], ] sum = 221219.156250 ggml_debug: attn_norm-14 = (f32) MUL(norm-14{6144, 3, 1, 1}, blk.14.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0225, 3.9775, 7.9775, ...], [24575.9766, 24579.9766, 24583.9766, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: wqkv-14 = (f32) MUL_MAT(blk.14.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-14{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.9626, 4.9626, 8.9626, ...], [32768.9609, 32772.9609, 32776.9609, ...], [65536.9609, 65540.9609, 65544.9609, ...], ], ] sum = 294956.656250 ggml_debug: wqkv_clamped-14 = (f32) CLAMP(wqkv-14{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.9626, 4.9626, 8.9626, ...], [32768.9609, 32772.9609, 32776.9609, ...], [65536.9609, 65540.9609, 65544.9609, ...], ], ] sum = 294956.656250 ggml_debug: wqkv_clamped-14 (view) = (f32) VIEW(wqkv_clamped-14{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.9626, 4.9626, 8.9626, ...], [32768.9609, 32772.9609, 32776.9609, ...], [65536.9609, 65540.9609, 65544.9609, ...], ], ] sum = 294956.656250 ggml_debug: Qcur-14 = (f32) CONT(wqkv_clamped-14 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.9626, 4.9626, 8.9626, ...], [24576.9629, 24580.9629, 24584.9629, ...], [49152.9609, 49156.9609, 49160.9609, ...], ], ] sum = 221228.656250 ggml_debug: Qcur-14 (reshaped) = (f32) RESHAPE(Qcur-14{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.9626, 4.9626, 8.9626, ...], [512.9626, 516.9626, 520.9626, ...], [1024.9625, 1028.9625, 1032.9625, ...], ... ], [ [24576.9629, 24580.9629, 24584.9629, ...], [25088.9629, 25092.9629, 25096.9629, ...], [25600.9629, 25604.9629, 25608.9629, ...], ... ], [ [49152.9609, 49156.9609, 49160.9609, ...], [49664.9609, 49668.9609, 49672.9609, ...], [50176.9609, 50180.9609, 50184.9609, ...], ... ], ] sum = 677509.937500 ggml_debug: Qcur-14 = (f32) ROPE(Qcur-14 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.9626, 4.9626, 8.9626, ...], [512.9626, 516.9626, 520.9626, ...], [1024.9625, 1028.9625, 1032.9625, ...], ... ], [ [24576.9629, 24580.9629, 24584.9629, ...], [25088.9629, 25092.9629, 25096.9629, ...], [25600.9629, 25604.9629, 25608.9629, ...], ... ], [ [49152.9609, 49156.9609, 49160.9609, ...], [49664.9609, 49668.9609, 49672.9609, ...], [50176.9609, 50180.9609, 50184.9609, ...], ... ], ] sum = 677509.937500 ggml_debug: wqkv_clamped-14 (view) = (f32) VIEW(wqkv_clamped-14{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.8786, 4.8786, 8.8786, ...], [32768.8789, 32772.8789, 32776.8789, ...], [65536.8750, 65540.8750, 65544.8750, ...], ], ] sum = 294955.875000 ggml_debug: Kcur-14 = (f32) CONT(wqkv_clamped-14 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.8786, 4.8786, 8.8786, ...], [4096.8784, 4100.8784, 4104.8784, ...], [8192.8789, 8196.8789, 8200.8789, ...], ], ] sum = 36907.906250 ggml_debug: Kcur-14 (reshaped) = (f32) RESHAPE(Kcur-14{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.8786, 4.8786, 8.8786, ...], [512.8786, 516.8786, 520.8786, ...], [1024.8785, 1028.8785, 1032.8785, ...], ... ], [ [4096.8784, 4100.8784, 4104.8784, ...], [4608.8784, 4612.8784, 4616.8784, ...], [5120.8784, 5124.8784, 5128.8784, ...], ... ], [ [8192.8789, 8196.8789, 8200.8789, ...], [8704.8789, 8708.8789, 8712.8789, ...], [9216.8789, 9220.8789, 9224.8789, ...], ... ], ] sum = 124547.703125 ggml_debug: Kcur-14 = (f32) ROPE(Kcur-14 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.8786, 4.8786, 8.8786, ...], [512.8786, 516.8786, 520.8786, ...], [1024.8785, 1028.8785, 1032.8785, ...], ... ], [ [4096.8784, 4100.8784, 4104.8784, ...], [4608.8784, 4612.8784, 4616.8784, ...], [5120.8784, 5124.8784, 5128.8784, ...], ... ], [ [8192.8789, 8196.8789, 8200.8789, ...], [8704.8789, 8708.8789, 8712.8789, ...], [9216.8789, 9220.8789, 9224.8789, ...], ... ], ] sum = 124547.703125 ggml_debug: wqkv_clamped-14 (view) = (f32) VIEW(wqkv_clamped-14{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0122, 4.0122, 8.0122, ...], [32768.0117, 32772.0117, 32776.0117, ...], [65536.0156, 65540.0156, 65544.0156, ...], ], ] sum = 294948.125000 ggml_debug: Vcur-14 = (f32) CONT(wqkv_clamped-14 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0122, 4.0122, 8.0122, ...], [4096.0122, 4100.0122, 4104.0122, ...], [8192.0117, 8196.0117, 8200.0117, ...], ], ] sum = 36900.109375 ggml_debug: k_cache_view-14 = (f16) VIEW(cache_k_l14{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-14 (copy of Kcur-14) = (f16) CPY(Kcur-14{128, 8, 3, 1}, k_cache_view-14{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.8784, 0.8794, 0.8804, ...], ], ] sum = 2.638184 ggml_debug: v_cur_t-14 = (f32) TRANSPOSE(Vcur-14{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0122, 4096.0122, 8192.0117], [ 4.0122, 4100.0122, 8196.0117], [ 8.0122, 4104.0122, 8200.0117], ... ], ] sum = 36900.109375 ggml_debug: v_cache_view-14 = (f16) VIEW(cache_v_l14{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-14 (copy of v_cur_t-14) = (f16) CPY(v_cur_t-14{3, 1024, 1, 1}, v_cache_view-14{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0122, 0.0122, 0.0122], [ 0.0244, 0.0244, 0.0245], [ 0.0488, 0.0489, 0.0489], ... ], ] sum = 0.256508 ggml_debug: v-14 = (f16) VIEW(cache_v_l14{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0122, 0.0122, 0.0122, ...], [ 0.0244, 0.0244, 0.0245, ...], [ 0.0488, 0.0489, 0.0489, ...], ... ], [ [ 0.0122, 0.0122, 0.0122, ...], [ 0.0244, 0.0244, 0.0245, ...], [ 0.0488, 0.0489, 0.0489, ...], ... ], [ [ 0.0122, 0.0122, 0.0122, ...], [ 0.0244, 0.0244, 0.0245, ...], [ 0.0488, 0.0489, 0.0489, ...], ... ], ... ] sum = 0.769524 ggml_debug: k-14 = (f16) VIEW(cache_k_l14{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.8784, 0.8794, 0.8804, ...], [ 3.5137, 3.5176, 3.5215, ...], [ 14.0547, 14.0703, 14.0859, ...], ... ], [ [ 1.0068, 1.0088, 1.0107, ...], [ 4.0273, 4.0352, 4.0430, ...], [ 16.1094, 16.1406, 16.1719, ...], ... ], [ [ 1.2568, 1.2588, 1.2607, ...], [ 5.0273, 5.0352, 5.0430, ...], [ 20.1094, 20.1406, 20.1719, ...], ... ], ... ] sum = 198.259277 ggml_debug: q-14 = (f32) PERMUTE(Qcur-14{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.9626, 4.9626, 8.9626, ...], [24576.9629, 24580.9629, 24584.9629, ...], [49152.9609, 49156.9609, 49160.9609, ...], ], [ [512.9626, 516.9626, 520.9626, ...], [25088.9629, 25092.9629, 25096.9629, ...], [49664.9609, 49668.9609, 49672.9609, ...], ], [ [1024.9625, 1028.9625, 1032.9625, ...], [25600.9629, 25604.9629, 25608.9629, ...], [50176.9609, 50180.9609, 50184.9609, ...], ], ... ] sum = 677510.000000 ggml_debug: kq-14 = (f32) MUL_MAT(k-14{128, 32, 8, 1}, q-14{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 36.4688, 40.4688, 44.4688, ...], [164.4688, 168.4688, 172.4688, ...], [292.4688, 296.4688, 300.4688, ...], ], [ [420.4688, 424.4688, 428.4688, ...], [548.4688, 552.4688, 556.4688, ...], [676.4688, 680.4688, 684.4688, ...], ], [ [804.4688, 808.4688, 812.4688, ...], [932.4688, 936.4688, 940.4688, ...], [1060.4688, 1064.4688, 1068.4688, ...], ], ... ] sum = 14916.656250 ggml_debug: kq_soft_max_ext-14 = (f32) SOFT_MAX(kq-14{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-14 = (f32) MUL_MAT(v-14{32, 128, 8, 1}, kq_soft_max_ext-14{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0122, 4.0122, 8.0122, ...], [512.0122, 516.0122, 520.0122, ...], [1024.0122, 1028.0122, 1032.0122, ...], ], [ [1536.0122, 1540.0122, 1544.0122, ...], [2048.0122, 2052.0122, 2056.0122, ...], [2560.0122, 2564.0122, 2568.0122, ...], ], [ [3072.0122, 3076.0122, 3080.0122, ...], [3584.0122, 3588.0122, 3592.0122, ...], [4096.0122, 4100.0122, 4104.0122, ...], ], ... ] sum = 55404.324219 ggml_debug: kqv_merged-14 = (f32) PERMUTE(kqv-14{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0122, 4.0122, 8.0122, ...], [1536.0122, 1540.0122, 1544.0122, ...], [3072.0122, 3076.0122, 3080.0122, ...], ... ], [ [512.0122, 516.0122, 520.0122, ...], [2048.0122, 2052.0122, 2056.0122, ...], [3584.0122, 3588.0122, 3592.0122, ...], ... ], [ [1024.0122, 1028.0122, 1032.0122, ...], [2560.0122, 2564.0122, 2568.0122, ...], [4096.0122, 4100.0122, 4104.0122, ...], ... ], ] sum = 55404.320312 ggml_debug: kqv_merged_cont-14 = (f32) CONT(kqv_merged-14{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0122, 4.0122, 8.0122, ...], [24576.0117, 24580.0117, 24584.0117, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.125000 ggml_debug: kqv_out-14 = (f32) MUL_MAT(blk.14.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-14{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2228, 4.2228, 8.2228, ...], [24576.2227, 24580.2227, 24584.2227, ...], [49152.2227, 49156.2227, 49160.2227, ...], ], ] sum = 221222.000000 ggml_debug: norm-14 = (f32) NORM(kqv_out-14{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 2.1097, 6.1097, 10.1097, ...], [24578.1094, 24582.1094, 24586.1094, ...], [49154.1094, 49158.1094, 49162.1094, ...], ], ] sum = 221238.984375 ggml_debug: attn_out_norm-14 = (f32) MUL(norm-14{6144, 3, 1, 1}, blk.14.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.6840, 4.6840, 8.6840, ...], [24576.6836, 24580.6836, 24584.6836, ...], [49152.6836, 49156.6836, 49160.6836, ...], ], ] sum = 221226.171875 ggml_debug: ffn_moe_logits-14 = (f32) MUL_MAT(blk.14.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.0156, 3.9844, 7.9844, ...], [ 63.9844, 67.9844, 71.9844, ...], [127.9844, 131.9844, 135.9844, ...], ], ] sum = 611.859680 ggml_debug: ffn_moe_probs-14 = (f32) SOFT_MAX(ffn_moe_logits-14{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0597, 4.0597, 8.0597, ...], [ 64.0597, 68.0597, 72.0597, ...], [128.0597, 132.0597, 136.0597, ...], ], ] sum = 612.537476 ggml_debug: ffn_moe_argsort-14 = (i32) ARGSORT(ffn_moe_probs-14{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 2.0000, 6.0000, 10.0000, ...], [ 66.0000, 70.0000, 74.0000, ...], [130.0000, 134.0000, 138.0000, ...], ], ] sum = 630.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-14{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 2.0000, 6.0000, 10.0000, ...], [ 66.0000, 70.0000, 74.0000, ...], [130.0000, 134.0000, 138.0000, ...], ], ] sum = 630.000000 ggml_debug: ffn_moe_up-14 = (f32) MUL_MAT_ID(blk.14.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.2960, 2.7040, 6.7040, ...], [43006.7031, 43010.7031, 43014.7031, ...], [86014.7031, 86018.7031, 86022.7031, ...], ], ] sum = 387096.312500 ggml_debug: ffn_moe_gate-14 = (f32) MUL_MAT_ID(blk.14.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3803, 4.3803, 8.3803, ...], [43008.3789, 43012.3789, 43016.3789, ...], [86016.3828, 86020.3828, 86024.3828, ...], ], ] sum = 387111.406250 ggml_debug: ffn_moe_silu-14 = (f32) UNARY(ffn_moe_gate-14{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2259, 4.2259, 8.2259, ...], [43008.2266, 43012.2266, 43016.2266, ...], [86016.2266, 86020.2266, 86024.2266, ...], ], ] sum = 387110.031250 ggml_debug: ffn_moe_gate_par-14 = (f32) MUL(ffn_moe_up-14{10752, 3, 1, 1}, ffn_moe_silu-14{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2928, 3.7072, 7.7072, ...], [43007.7070, 43011.7070, 43015.7070, ...], [86015.7109, 86019.7109, 86023.7109, ...], ], ] sum = 387105.375000 ggml_debug: ffn_moe_down-14 = (f32) MUL_MAT_ID(blk.14.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-14{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4799, 3.5201, 7.5201, ...], [24575.5195, 24579.5195, 24583.5195, ...], [49151.5195, 49155.5195, 49159.5195, ...], ], ] sum = 221215.671875 ggml_debug: ffn_moe_probs-14 (reshaped) = (f32) RESHAPE(ffn_moe_probs-14{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0597], [ 4.0597], [ 8.0597], ... ], [ [ 64.0597], [ 68.0597], [ 72.0597], ... ], [ [128.0597], [132.0597], [136.0597], ... ], ] sum = 612.537476 ggml_debug: ffn_moe_weights-14 = (f32) GET_ROWS(ffn_moe_probs-14 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0825], [ 4.0825], [ 8.0825], ... ], [ [ 16.0825], [ 20.0825], [ 24.0825], ... ], [ [ 32.0825], [ 36.0825], [ 40.0825], ... ], ] sum = 180.742889 ggml_debug: ffn_moe_weights-14 (reshaped) = (f32) RESHAPE(ffn_moe_weights-14{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0825, 4.0825, 8.0825, ...], [ 16.0825, 20.0825, 24.0825, ...], [ 32.0825, 36.0825, 40.0825, ...], ], ] sum = 180.742889 ggml_debug: ffn_moe_weights_sum-14 = (f32) SUM_ROWS(ffn_moe_weights-14 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3148], [ 4.3148], [ 8.3148], ], ] sum = 12.944359 ggml_debug: ffn_moe_weights_norm-14 = (f32) DIV(ffn_moe_weights-14 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-14{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2622, 4.2622, 8.2622, ...], [ 16.2622, 20.2622, 24.2622, ...], [ 32.2622, 36.2622, 40.2622, ...], ], ] sum = 182.360001 ggml_debug: ffn_moe_weights_norm-14 (view) = (f32) VIEW(ffn_moe_weights_norm-14{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2622], [ 16.2622], [ 32.2622], ], ] sum = 48.786663 ggml_debug: ffn_moe_weighted-14 = (f32) MUL(ffn_moe_down-14{6144, 3, 1, 1}, ffn_moe_weights_norm-14 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1258, 3.8742, 7.8742, ...], [24575.8750, 24579.8750, 24583.8750, ...], [49151.8750, 49155.8750, 49159.8750, ...], ], ] sum = 221218.875000 ggml_debug: ffn_moe_up-14 = (f32) MUL_MAT_ID(blk.14.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.6928, 3.3072, 7.3072, ...], [43007.3086, 43011.3086, 43015.3086, ...], [86015.3047, 86019.3047, 86023.3047, ...], ], ] sum = 387101.781250 ggml_debug: ffn_moe_gate-14 = (f32) MUL_MAT_ID(blk.14.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5928, 3.4072, 7.4072, ...], [43007.4062, 43011.4062, 43015.4062, ...], [86015.4062, 86019.4062, 86023.4062, ...], ], ] sum = 387102.656250 ggml_debug: ffn_moe_silu-14 = (f32) UNARY(ffn_moe_gate-14{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2110, 3.7890, 7.7890, ...], [43007.7891, 43011.7891, 43015.7891, ...], [86015.7891, 86019.7891, 86023.7891, ...], ], ] sum = 387106.093750 ggml_debug: ffn_moe_gate_par-14 = (f32) MUL(ffn_moe_up-14{10752, 3, 1, 1}, ffn_moe_silu-14{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1462, 4.1462, 8.1462, ...], [43008.1445, 43012.1445, 43016.1445, ...], [86016.1484, 86020.1484, 86024.1484, ...], ], ] sum = 387109.343750 ggml_debug: ffn_moe_down-14 = (f32) MUL_MAT_ID(blk.14.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-14{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1894, 3.8106, 7.8106, ...], [24575.8105, 24579.8105, 24583.8105, ...], [49151.8125, 49155.8125, 49159.8125, ...], ], ] sum = 221218.312500 ggml_debug: ffn_moe_weights_norm-14 (view) = (f32) VIEW(ffn_moe_weights_norm-14{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2561], [ 16.2561], [ 32.2561], ], ] sum = 48.768253 ggml_debug: ffn_moe_weighted-14 = (f32) MUL(ffn_moe_down-14{6144, 3, 1, 1}, ffn_moe_weights_norm-14 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0485, 3.9515, 7.9515, ...], [24575.9512, 24579.9512, 24583.9512, ...], [49151.9531, 49155.9531, 49159.9531, ...], ], ] sum = 221219.578125 ggml_debug: ffn_moe_out-14 = (f32) ADD(ffn_moe_weighted-14{6144, 3, 1, 1}, ffn_moe_weighted-14{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1743, 3.8257, 7.8257, ...], [24575.8262, 24579.8262, 24583.8262, ...], [49151.8242, 49155.8242, 49159.8242, ...], ], ] sum = 221218.437500 ggml_debug: ffn_moe_up-14 = (f32) MUL_MAT_ID(blk.14.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3754, 3.6246, 7.6246, ...], [43007.6250, 43011.6250, 43015.6250, ...], [86015.6250, 86019.6250, 86023.6250, ...], ], ] sum = 387104.625000 ggml_debug: ffn_moe_gate-14 = (f32) MUL_MAT_ID(blk.14.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6899, 4.6899, 8.6899, ...], [43008.6914, 43012.6914, 43016.6914, ...], [86016.6875, 86020.6875, 86024.6875, ...], ], ] sum = 387114.187500 ggml_debug: ffn_moe_silu-14 = (f32) UNARY(ffn_moe_gate-14{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.4594, 4.4594, 8.4594, ...], [43008.4609, 43012.4609, 43016.4609, ...], [86016.4609, 86020.4609, 86024.4609, ...], ], ] sum = 387112.156250 ggml_debug: ffn_moe_gate_par-14 = (f32) MUL(ffn_moe_up-14{10752, 3, 1, 1}, ffn_moe_silu-14{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1725, 3.8275, 7.8275, ...], [43007.8281, 43011.8281, 43015.8281, ...], [86015.8281, 86019.8281, 86023.8281, ...], ], ] sum = 387106.437500 ggml_debug: ffn_moe_down-14 = (f32) MUL_MAT_ID(blk.14.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-14{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1874, 3.8126, 7.8126, ...], [24575.8125, 24579.8125, 24583.8125, ...], [49151.8125, 49155.8125, 49159.8125, ...], ], ] sum = 221218.312500 ggml_debug: ffn_moe_weights_norm-14 (view) = (f32) VIEW(ffn_moe_weights_norm-14{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2446], [ 16.2446], [ 32.2446], ], ] sum = 48.733700 ggml_debug: ffn_moe_weighted-14 = (f32) MUL(ffn_moe_down-14{6144, 3, 1, 1}, ffn_moe_weights_norm-14 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0458, 3.9542, 7.9542, ...], [24575.9551, 24579.9551, 24583.9551, ...], [49151.9531, 49155.9531, 49159.9531, ...], ], ] sum = 221219.578125 ggml_debug: ffn_moe_out-14 = (f32) ADD(ffn_moe_out-14{6144, 3, 1, 1}, ffn_moe_weighted-14{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2202, 3.7798, 7.7798, ...], [24575.7793, 24579.7793, 24583.7793, ...], [49151.7812, 49155.7812, 49159.7812, ...], ], ] sum = 221218.031250 ggml_debug: ffn_moe_up-14 = (f32) MUL_MAT_ID(blk.14.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1082, 3.8918, 7.8918, ...], [43007.8906, 43011.8906, 43015.8906, ...], [86015.8906, 86019.8906, 86023.8906, ...], ], ] sum = 387107.000000 ggml_debug: ffn_moe_gate-14 = (f32) MUL_MAT_ID(blk.14.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-14{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1362, 4.1362, 8.1362, ...], [43008.1367, 43012.1367, 43016.1367, ...], [86016.1328, 86020.1328, 86024.1328, ...], ], ] sum = 387109.218750 ggml_debug: ffn_moe_silu-14 = (f32) UNARY(ffn_moe_gate-14{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0727, 4.0727, 8.0727, ...], [43008.0742, 43012.0742, 43016.0742, ...], [86016.0703, 86020.0703, 86024.0703, ...], ], ] sum = 387108.625000 ggml_debug: ffn_moe_gate_par-14 = (f32) MUL(ffn_moe_up-14{10752, 3, 1, 1}, ffn_moe_silu-14{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0079, 3.9921, 7.9921, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-14 = (f32) MUL_MAT_ID(blk.14.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-14{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1255, 4.1255, 8.1255, ...], [24576.1250, 24580.1250, 24584.1250, ...], [49152.1250, 49156.1250, 49160.1250, ...], ], ] sum = 221221.125000 ggml_debug: ffn_moe_weights_norm-14 (view) = (f32) VIEW(ffn_moe_weights_norm-14{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2371], [ 16.2371], [ 32.2371], ], ] sum = 48.711388 ggml_debug: ffn_moe_weighted-14 = (f32) MUL(ffn_moe_down-14{6144, 3, 1, 1}, ffn_moe_weights_norm-14 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0298, 4.0298, 8.0298, ...], [24576.0293, 24580.0293, 24584.0293, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: ffn_moe_out-14 = (f32) ADD(ffn_moe_out-14{6144, 3, 1, 1}, ffn_moe_weighted-14{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1904, 3.8096, 7.8096, ...], [24575.8105, 24579.8105, 24583.8105, ...], [49151.8086, 49155.8086, 49159.8086, ...], ], ] sum = 221218.296875 ggml_debug: ffn_inp-14 = (f32) ADD(kqv_out-14{6144, 3, 1, 1}, CUDA1#l_out-13#0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1848, 4.1848, 8.1848, ...], [24576.1855, 24580.1855, 24584.1855, ...], [49152.1836, 49156.1836, 49160.1836, ...], ], ] sum = 221221.671875 ggml_debug: l_out-14 = (f32) ADD(ffn_moe_out-14{6144, 3, 1, 1}, ffn_inp-14{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0056, 3.9944, 7.9944, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.953125 ggml_debug: norm-15 = (f32) NORM(l_out-14{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0112, 3.9888, 7.9888, ...], [24575.9883, 24579.9883, 24583.9883, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: attn_norm-15 = (f32) MUL(norm-15{6144, 3, 1, 1}, blk.15.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0028, 3.9972, 7.9972, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.984375 ggml_debug: wqkv-15 = (f32) MUL_MAT(blk.15.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-15{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.2448, 3.7552, 7.7552, ...], [32767.7559, 32771.7539, 32775.7539, ...], [65535.7539, 65539.7578, 65543.7578, ...], ], ] sum = 294945.781250 ggml_debug: wqkv_clamped-15 = (f32) CLAMP(wqkv-15{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.2448, 3.7552, 7.7552, ...], [32767.7559, 32771.7539, 32775.7539, ...], [65535.7539, 65539.7578, 65543.7578, ...], ], ] sum = 294945.781250 ggml_debug: wqkv_clamped-15 (view) = (f32) VIEW(wqkv_clamped-15{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2448, 3.7552, 7.7552, ...], [32767.7559, 32771.7539, 32775.7539, ...], [65535.7539, 65539.7578, 65543.7578, ...], ], ] sum = 294945.781250 ggml_debug: Qcur-15 = (f32) CONT(wqkv_clamped-15 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2448, 3.7552, 7.7552, ...], [24575.7559, 24579.7559, 24583.7559, ...], [49151.7539, 49155.7539, 49159.7539, ...], ], ] sum = 221217.781250 ggml_debug: Qcur-15 (reshaped) = (f32) RESHAPE(Qcur-15{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.2448, 3.7552, 7.7552, ...], [511.7552, 515.7552, 519.7552, ...], [1023.7552, 1027.7552, 1031.7552, ...], ... ], [ [24575.7559, 24579.7559, 24583.7559, ...], [25087.7559, 25091.7559, 25095.7559, ...], [25599.7559, 25603.7559, 25607.7559, ...], ... ], [ [49151.7539, 49155.7539, 49159.7539, ...], [49663.7539, 49667.7539, 49671.7539, ...], [50175.7539, 50179.7539, 50183.7539, ...], ... ], ] sum = 677477.375000 ggml_debug: Qcur-15 = (f32) ROPE(Qcur-15 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.2448, 3.7552, 7.7552, ...], [511.7552, 515.7552, 519.7552, ...], [1023.7552, 1027.7552, 1031.7552, ...], ... ], [ [24575.7559, 24579.7559, 24583.7559, ...], [25087.7559, 25091.7559, 25095.7559, ...], [25599.7559, 25603.7559, 25607.7559, ...], ... ], [ [49151.7539, 49155.7539, 49159.7539, ...], [49663.7539, 49667.7539, 49671.7539, ...], [50175.7539, 50179.7539, 50183.7539, ...], ... ], ] sum = 677477.375000 ggml_debug: wqkv_clamped-15 (view) = (f32) VIEW(wqkv_clamped-15{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.6803, 2.3197, 6.3197, ...], [32766.3203, 32770.3203, 32774.3203, ...], [65534.3203, 65538.3203, 65542.3203, ...], ], ] sum = 294932.875000 ggml_debug: Kcur-15 = (f32) CONT(wqkv_clamped-15 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.6803, 2.3197, 6.3197, ...], [4094.3198, 4098.3198, 4102.3198, ...], [8190.3198, 8194.3193, 8198.3193, ...], ], ] sum = 36884.878906 ggml_debug: Kcur-15 (reshaped) = (f32) RESHAPE(Kcur-15{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -1.6803, 2.3197, 6.3197, ...], [510.3197, 514.3197, 518.3197, ...], [1022.3197, 1026.3197, 1030.3197, ...], ... ], [ [4094.3198, 4098.3198, 4102.3198, ...], [4606.3198, 4610.3198, 4614.3198, ...], [5118.3198, 5122.3198, 5126.3198, ...], ... ], [ [8190.3198, 8194.3193, 8198.3193, ...], [8702.3193, 8706.3193, 8710.3193, ...], [9214.3193, 9218.3193, 9222.3193, ...], ... ], ] sum = 124478.640625 ggml_debug: Kcur-15 = (f32) ROPE(Kcur-15 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -1.6803, 2.3197, 6.3197, ...], [510.3197, 514.3197, 518.3197, ...], [1022.3197, 1026.3197, 1030.3197, ...], ... ], [ [4094.3198, 4098.3198, 4102.3198, ...], [4606.3198, 4610.3198, 4614.3198, ...], [5118.3198, 5122.3198, 5126.3198, ...], ... ], [ [8190.3198, 8194.3193, 8198.3193, ...], [8702.3193, 8706.3193, 8710.3193, ...], [9214.3193, 9218.3193, 9222.3193, ...], ... ], ] sum = 124478.640625 ggml_debug: wqkv_clamped-15 (view) = (f32) VIEW(wqkv_clamped-15{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.3643, 4.3643, 8.3644, ...], [32768.3633, 32772.3633, 32776.3633, ...], [65536.3672, 65540.3672, 65544.3672, ...], ], ] sum = 294951.312500 ggml_debug: Vcur-15 = (f32) CONT(wqkv_clamped-15 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.3643, 4.3643, 8.3644, ...], [4096.3643, 4100.3643, 4104.3643, ...], [8192.3643, 8196.3643, 8200.3643, ...], ], ] sum = 36903.277344 ggml_debug: k_cache_view-15 = (f16) VIEW(cache_k_l15{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-15 (copy of Kcur-15) = (f16) CPY(Kcur-15{128, 8, 3, 1}, k_cache_view-15{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -1.6807, -1.6826, -1.6846, ...], ], ] sum = -5.047852 ggml_debug: v_cur_t-15 = (f32) TRANSPOSE(Vcur-15{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.3643, 4096.3643, 8192.3643], [ 4.3643, 4100.3643, 8196.3643], [ 8.3644, 4104.3643, 8200.3643], ... ], ] sum = 36903.277344 ggml_debug: v_cache_view-15 = (f16) VIEW(cache_v_l15{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-15 (copy of v_cur_t-15) = (f16) CPY(v_cur_t-15{3, 1024, 1, 1}, v_cache_view-15{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.3643, 0.3647, 0.3652], [ 0.7285, 0.7295, 0.7305], [ 1.4570, 1.4590, 1.4609], ... ], ] sum = 7.659668 ggml_debug: v-15 = (f16) VIEW(cache_v_l15{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.3643, 0.3647, 0.3652, ...], [ 0.7285, 0.7295, 0.7305, ...], [ 1.4570, 1.4590, 1.4609, ...], ... ], [ [ 0.3643, 0.3647, 0.3652, ...], [ 0.7285, 0.7295, 0.7305, ...], [ 1.4570, 1.4590, 1.4609, ...], ... ], [ [ 0.3643, 0.3647, 0.3652, ...], [ 0.7285, 0.7295, 0.7305, ...], [ 1.4570, 1.4590, 1.4609, ...], ... ], ... ] sum = 22.979004 ggml_debug: k-15 = (f16) VIEW(cache_k_l15{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -1.6807, -1.6826, -1.6846, ...], [ -6.7227, -6.7305, -6.7383, ...], [-26.8906, -26.9219, -26.9531, ...], ... ], [ [ -1.9307, -1.9326, -1.9346, ...], [ -7.7227, -7.7305, -7.7383, ...], [-30.8906, -30.9219, -30.9531, ...], ... ], [ [ -2.3613, -2.3652, -2.3691, ...], [ -9.4453, -9.4609, -9.4766, ...], [-37.7812, -37.8438, -37.9062, ...], ... ], ... ] sum = -376.769531 ggml_debug: q-15 = (f32) PERMUTE(Qcur-15{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.2448, 3.7552, 7.7552, ...], [24575.7559, 24579.7559, 24583.7559, ...], [49151.7539, 49155.7539, 49159.7539, ...], ], [ [511.7552, 515.7552, 519.7552, ...], [25087.7559, 25091.7559, 25095.7559, ...], [49663.7539, 49667.7539, 49671.7539, ...], ], [ [1023.7552, 1027.7552, 1031.7552, ...], [25599.7559, 25603.7559, 25607.7559, ...], [50175.7539, 50179.7539, 50183.7539, ...], ], ... ] sum = 677477.312500 ggml_debug: kq-15 = (f32) MUL_MAT(k-15{128, 32, 8, 1}, q-15{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 71.6875, 75.6875, 79.6875, ...], [199.6875, 203.6875, 207.6875, ...], [327.6875, 331.6875, 335.6875, ...], ], [ [455.6875, 459.6875, 463.6875, ...], [583.6875, 587.6875, 591.6875, ...], [711.6875, 715.6875, 719.6875, ...], ], [ [839.6875, 843.6875, 847.6875, ...], [967.6875, 971.6875, 975.6875, ...], [1095.6875, 1099.6875, 1103.6875, ...], ], ... ] sum = 15867.562500 ggml_debug: kq_soft_max_ext-15 = (f32) SOFT_MAX(kq-15{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-15 = (f32) MUL_MAT(v-15{32, 128, 8, 1}, kq_soft_max_ext-15{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.3643, 4.3643, 8.3643, ...], [512.3643, 516.3643, 520.3643, ...], [1024.3643, 1028.3643, 1032.3643, ...], ], [ [1536.3643, 1540.3643, 1544.3643, ...], [2048.3643, 2052.3643, 2056.3643, ...], [2560.3643, 2564.3643, 2568.3643, ...], ], [ [3072.3643, 3076.3643, 3080.3643, ...], [3584.3643, 3588.3643, 3592.3643, ...], [4096.3643, 4100.3643, 4104.3643, ...], ], ... ] sum = 55413.824219 ggml_debug: kqv_merged-15 = (f32) PERMUTE(kqv-15{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.3643, 4.3643, 8.3643, ...], [1536.3643, 1540.3643, 1544.3643, ...], [3072.3643, 3076.3643, 3080.3643, ...], ... ], [ [512.3643, 516.3643, 520.3643, ...], [2048.3643, 2052.3643, 2056.3643, ...], [3584.3643, 3588.3643, 3592.3643, ...], ... ], [ [1024.3643, 1028.3643, 1032.3643, ...], [2560.3643, 2564.3643, 2568.3643, ...], [4096.3643, 4100.3643, 4104.3643, ...], ... ], ] sum = 55413.820312 ggml_debug: kqv_merged_cont-15 = (f32) CONT(kqv_merged-15{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.3643, 4.3643, 8.3643, ...], [24576.3633, 24580.3633, 24584.3633, ...], [49152.3633, 49156.3633, 49160.3633, ...], ], ] sum = 221223.265625 ggml_debug: kqv_out-15 = (f32) MUL_MAT(blk.15.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-15{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0127, 4.0127, 8.0127, ...], [24576.0117, 24580.0117, 24584.0117, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.125000 ggml_debug: norm-15 = (f32) NORM(kqv_out-15{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0932, 4.0932, 8.0932, ...], [24576.0938, 24580.0938, 24584.0938, ...], [49152.0938, 49156.0938, 49160.0938, ...], ], ] sum = 221220.843750 ggml_debug: attn_out_norm-15 = (f32) MUL(norm-15{6144, 3, 1, 1}, blk.15.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0335, 4.0335, 8.0335, ...], [24576.0332, 24580.0332, 24584.0332, ...], [49152.0352, 49156.0352, 49160.0352, ...], ], ] sum = 221220.296875 ggml_debug: ffn_moe_logits-15 = (f32) MUL_MAT(blk.15.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.1376, 4.1376, 8.1376, ...], [ 64.1376, 68.1376, 72.1376, ...], [128.1376, 132.1376, 136.1376, ...], ], ] sum = 613.238159 ggml_debug: ffn_moe_probs-15 = (f32) SOFT_MAX(ffn_moe_logits-15{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0714, 4.0714, 8.0714, ...], [ 64.0714, 68.0714, 72.0714, ...], [128.0714, 132.0714, 136.0714, ...], ], ] sum = 612.642273 ggml_debug: ffn_moe_argsort-15 = (i32) ARGSORT(ffn_moe_probs-15{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 8.0000, 12.0000, 16.0000, ...], [ 72.0000, 76.0000, 80.0000, ...], [136.0000, 140.0000, 144.0000, ...], ], ] sum = 684.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-15{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 8.0000, 12.0000, 16.0000, ...], [ 72.0000, 76.0000, 80.0000, ...], [136.0000, 140.0000, 144.0000, ...], ], ] sum = 684.000000 ggml_debug: ffn_moe_up-15 = (f32) MUL_MAT_ID(blk.15.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3241, 4.3241, 8.3241, ...], [43008.3242, 43012.3242, 43016.3242, ...], [86016.3203, 86020.3203, 86024.3203, ...], ], ] sum = 387110.906250 ggml_debug: ffn_moe_gate-15 = (f32) MUL_MAT_ID(blk.15.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2870, 4.2870, 8.2870, ...], [43008.2852, 43012.2852, 43016.2852, ...], [86016.2891, 86020.2891, 86024.2891, ...], ], ] sum = 387110.562500 ggml_debug: ffn_moe_silu-15 = (f32) UNARY(ffn_moe_gate-15{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1640, 4.1640, 8.1640, ...], [43008.1641, 43012.1641, 43016.1641, ...], [86016.1641, 86020.1641, 86024.1641, ...], ], ] sum = 387109.468750 ggml_debug: ffn_moe_gate_par-15 = (f32) MUL(ffn_moe_up-15{10752, 3, 1, 1}, ffn_moe_silu-15{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0532, 4.0532, 8.0532, ...], [43008.0547, 43012.0547, 43016.0547, ...], [86016.0547, 86020.0547, 86024.0547, ...], ], ] sum = 387108.500000 ggml_debug: ffn_moe_down-15 = (f32) MUL_MAT_ID(blk.15.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-15{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2386, 3.7614, 7.7614, ...], [24575.7617, 24579.7617, 24583.7617, ...], [49151.7617, 49155.7617, 49159.7617, ...], ], ] sum = 221217.859375 ggml_debug: ffn_moe_probs-15 (reshaped) = (f32) RESHAPE(ffn_moe_probs-15{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0714], [ 4.0714], [ 8.0714], ... ], [ [ 64.0714], [ 68.0714], [ 72.0714], ... ], [ [128.0714], [132.0714], [136.0714], ... ], ] sum = 612.642273 ggml_debug: ffn_moe_weights-15 = (f32) GET_ROWS(ffn_moe_probs-15 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1148], [ 4.1148], [ 8.1148], ... ], [ [ 16.1148], [ 20.1148], [ 24.1148], ... ], [ [ 32.1148], [ 36.1148], [ 40.1148], ... ], ] sum = 181.033051 ggml_debug: ffn_moe_weights-15 (reshaped) = (f32) RESHAPE(ffn_moe_weights-15{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1148, 4.1148, 8.1148, ...], [ 16.1148, 20.1148, 24.1148, ...], [ 32.1148, 36.1148, 40.1148, ...], ], ] sum = 181.033051 ggml_debug: ffn_moe_weights_sum-15 = (f32) SUM_ROWS(ffn_moe_weights-15 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3593], [ 4.3593], [ 8.3593], ], ] sum = 13.077956 ggml_debug: ffn_moe_weights_norm-15 = (f32) DIV(ffn_moe_weights-15 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-15{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3195, 4.3195, 8.3195, ...], [ 16.3195, 20.3195, 24.3195, ...], [ 32.3195, 36.3195, 40.3195, ...], ], ] sum = 182.875092 ggml_debug: ffn_moe_weights_norm-15 (view) = (f32) VIEW(ffn_moe_weights_norm-15{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3195], [ 16.3195], [ 32.3195], ], ] sum = 48.958363 ggml_debug: ffn_moe_weighted-15 = (f32) MUL(ffn_moe_down-15{6144, 3, 1, 1}, ffn_moe_weights_norm-15 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0762, 3.9238, 7.9238, ...], [24575.9238, 24579.9238, 24583.9238, ...], [49151.9219, 49155.9219, 49159.9219, ...], ], ] sum = 221219.296875 ggml_debug: ffn_moe_up-15 = (f32) MUL_MAT_ID(blk.15.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5374, 3.4626, 7.4626, ...], [43007.4609, 43011.4609, 43015.4609, ...], [86015.4609, 86019.4609, 86023.4609, ...], ], ] sum = 387103.156250 ggml_debug: ffn_moe_gate-15 = (f32) MUL_MAT_ID(blk.15.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4419, 4.4419, 8.4419, ...], [43008.4414, 43012.4414, 43016.4414, ...], [86016.4453, 86020.4453, 86024.4453, ...], ], ] sum = 387111.968750 ggml_debug: ffn_moe_silu-15 = (f32) UNARY(ffn_moe_gate-15{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2690, 4.2690, 8.2690, ...], [43008.2695, 43012.2695, 43016.2695, ...], [86016.2656, 86020.2656, 86024.2656, ...], ], ] sum = 387110.375000 ggml_debug: ffn_moe_gate_par-15 = (f32) MUL(ffn_moe_up-15{10752, 3, 1, 1}, ffn_moe_silu-15{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1446, 3.8554, 7.8554, ...], [43007.8555, 43011.8555, 43015.8555, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.687500 ggml_debug: ffn_moe_down-15 = (f32) MUL_MAT_ID(blk.15.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-15{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1295, 3.8705, 7.8705, ...], [24575.8711, 24579.8711, 24583.8711, ...], [49151.8711, 49155.8711, 49159.8711, ...], ], ] sum = 221218.843750 ggml_debug: ffn_moe_weights_norm-15 (view) = (f32) VIEW(ffn_moe_weights_norm-15{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2883], [ 16.2883], [ 32.2883], ], ] sum = 48.864964 ggml_debug: ffn_moe_weighted-15 = (f32) MUL(ffn_moe_down-15{6144, 3, 1, 1}, ffn_moe_weights_norm-15 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0373, 3.9627, 7.9627, ...], [24575.9629, 24579.9629, 24583.9629, ...], [49151.9609, 49155.9609, 49159.9609, ...], ], ] sum = 221219.656250 ggml_debug: ffn_moe_out-15 = (f32) ADD(ffn_moe_weighted-15{6144, 3, 1, 1}, ffn_moe_weighted-15{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1136, 3.8864, 7.8864, ...], [24575.8867, 24579.8867, 24583.8867, ...], [49151.8867, 49155.8867, 49159.8867, ...], ], ] sum = 221218.984375 ggml_debug: ffn_moe_up-15 = (f32) MUL_MAT_ID(blk.15.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2980, 3.7020, 7.7020, ...], [43007.7031, 43011.7031, 43015.7031, ...], [86015.7031, 86019.7031, 86023.7031, ...], ], ] sum = 387105.312500 ggml_debug: ffn_moe_gate-15 = (f32) MUL_MAT_ID(blk.15.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0820, 3.9180, 7.9180, ...], [43007.9180, 43011.9180, 43015.9180, ...], [86015.9219, 86019.9219, 86023.9219, ...], ], ] sum = 387107.312500 ggml_debug: ffn_moe_silu-15 = (f32) UNARY(ffn_moe_gate-15{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0393, 3.9607, 7.9607, ...], [43007.9609, 43011.9609, 43015.9609, ...], [86015.9609, 86019.9609, 86023.9609, ...], ], ] sum = 387107.656250 ggml_debug: ffn_moe_gate_par-15 = (f32) MUL(ffn_moe_up-15{10752, 3, 1, 1}, ffn_moe_silu-15{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0117, 4.0117, 8.0117, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.093750 ggml_debug: ffn_moe_down-15 = (f32) MUL_MAT_ID(blk.15.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-15{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1637, 4.1637, 8.1637, ...], [24576.1641, 24580.1641, 24584.1641, ...], [49152.1641, 49156.1641, 49160.1641, ...], ], ] sum = 221221.468750 ggml_debug: ffn_moe_weights_norm-15 (view) = (f32) VIEW(ffn_moe_weights_norm-15{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1986], [ 16.1986], [ 32.1986], ], ] sum = 48.595860 ggml_debug: ffn_moe_weighted-15 = (f32) MUL(ffn_moe_down-15{6144, 3, 1, 1}, ffn_moe_weights_norm-15 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0325, 4.0325, 8.0325, ...], [24576.0332, 24580.0332, 24584.0332, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: ffn_moe_out-15 = (f32) ADD(ffn_moe_out-15{6144, 3, 1, 1}, ffn_moe_weighted-15{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0811, 3.9189, 7.9189, ...], [24575.9180, 24579.9180, 24583.9180, ...], [49151.9180, 49155.9180, 49159.9180, ...], ], ] sum = 221219.281250 ggml_debug: ffn_moe_up-15 = (f32) MUL_MAT_ID(blk.15.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.0547, 2.9453, 6.9453, ...], [43006.9453, 43010.9453, 43014.9453, ...], [86014.9453, 86018.9453, 86022.9453, ...], ], ] sum = 387098.500000 ggml_debug: ffn_moe_gate-15 = (f32) MUL_MAT_ID(blk.15.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-15{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2069, 4.2069, 8.2069, ...], [43008.2070, 43012.2070, 43016.2070, ...], [86016.2031, 86020.2031, 86024.2031, ...], ], ] sum = 387109.812500 ggml_debug: ffn_moe_silu-15 = (f32) UNARY(ffn_moe_gate-15{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1141, 4.1141, 8.1141, ...], [43008.1133, 43012.1133, 43016.1133, ...], [86016.1172, 86020.1172, 86024.1172, ...], ], ] sum = 387109.062500 ggml_debug: ffn_moe_gate_par-15 = (f32) MUL(ffn_moe_up-15{10752, 3, 1, 1}, ffn_moe_silu-15{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1203, 3.8797, 7.8797, ...], [43007.8789, 43011.8789, 43015.8789, ...], [86015.8828, 86019.8828, 86023.8828, ...], ], ] sum = 387106.906250 ggml_debug: ffn_moe_down-15 = (f32) MUL_MAT_ID(blk.15.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-15{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0462, 4.0462, 8.0462, ...], [24576.0469, 24580.0469, 24584.0469, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: ffn_moe_weights_norm-15 (view) = (f32) VIEW(ffn_moe_weights_norm-15{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1936], [ 16.1936], [ 32.1936], ], ] sum = 48.580814 ggml_debug: ffn_moe_weighted-15 = (f32) MUL(ffn_moe_down-15{6144, 3, 1, 1}, ffn_moe_weights_norm-15 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0089, 4.0089, 8.0089, ...], [24576.0098, 24580.0098, 24584.0098, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_out-15 = (f32) ADD(ffn_moe_out-15{6144, 3, 1, 1}, ffn_moe_weighted-15{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0721, 3.9279, 7.9279, ...], [24575.9277, 24579.9277, 24583.9277, ...], [49151.9297, 49155.9297, 49159.9297, ...], ], ] sum = 221219.375000 ggml_debug: ffn_inp-15 = (f32) ADD(kqv_out-15{6144, 3, 1, 1}, l_out-14{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0071, 4.0071, 8.0071, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: l_out-15 = (f32) ADD(ffn_moe_out-15{6144, 3, 1, 1}, ffn_inp-15{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0650, 3.9350, 7.9350, ...], [24575.9355, 24579.9355, 24583.9355, ...], [49151.9336, 49155.9336, 49159.9336, ...], ], ] sum = 221219.421875 ggml_debug: norm-16 = (f32) NORM(l_out-15{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1228, 3.8772, 7.8772, ...], [24575.8770, 24579.8770, 24583.8770, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], ] sum = 221218.890625 ggml_debug: attn_norm-16 = (f32) MUL(norm-16{6144, 3, 1, 1}, blk.16.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0319, 3.9681, 7.9681, ...], [24575.9688, 24579.9688, 24583.9688, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], ] sum = 221219.718750 ggml_debug: wqkv-16 = (f32) MUL_MAT(blk.16.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-16{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.2410, 3.7590, 7.7590, ...], [32767.7598, 32771.7578, 32775.7578, ...], [65535.7578, 65539.7578, 65543.7578, ...], ], ] sum = 294945.812500 ggml_debug: wqkv_clamped-16 = (f32) CLAMP(wqkv-16{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.2410, 3.7590, 7.7590, ...], [32767.7598, 32771.7578, 32775.7578, ...], [65535.7578, 65539.7578, 65543.7578, ...], ], ] sum = 294945.812500 ggml_debug: wqkv_clamped-16 (view) = (f32) VIEW(wqkv_clamped-16{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2410, 3.7590, 7.7590, ...], [32767.7598, 32771.7578, 32775.7578, ...], [65535.7578, 65539.7578, 65543.7578, ...], ], ] sum = 294945.812500 ggml_debug: Qcur-16 = (f32) CONT(wqkv_clamped-16 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2410, 3.7590, 7.7590, ...], [24575.7598, 24579.7598, 24583.7598, ...], [49151.7578, 49155.7578, 49159.7578, ...], ], ] sum = 221217.812500 ggml_debug: Qcur-16 (reshaped) = (f32) RESHAPE(Qcur-16{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.2410, 3.7590, 7.7590, ...], [511.7590, 515.7590, 519.7590, ...], [1023.7590, 1027.7589, 1031.7589, ...], ... ], [ [24575.7598, 24579.7598, 24583.7598, ...], [25087.7598, 25091.7598, 25095.7598, ...], [25599.7598, 25603.7598, 25607.7598, ...], ... ], [ [49151.7578, 49155.7578, 49159.7578, ...], [49663.7578, 49667.7578, 49671.7578, ...], [50175.7578, 50179.7578, 50183.7578, ...], ... ], ] sum = 677477.437500 ggml_debug: Qcur-16 = (f32) ROPE(Qcur-16 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.2410, 3.7590, 7.7590, ...], [511.7590, 515.7590, 519.7590, ...], [1023.7590, 1027.7589, 1031.7589, ...], ... ], [ [24575.7598, 24579.7598, 24583.7598, ...], [25087.7598, 25091.7598, 25095.7598, ...], [25599.7598, 25603.7598, 25607.7598, ...], ... ], [ [49151.7578, 49155.7578, 49159.7578, ...], [49663.7578, 49667.7578, 49671.7578, ...], [50175.7578, 50179.7578, 50183.7578, ...], ... ], ] sum = 677477.437500 ggml_debug: wqkv_clamped-16 (view) = (f32) VIEW(wqkv_clamped-16{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.5735, 3.4265, 7.4265, ...], [32767.4258, 32771.4258, 32775.4258, ...], [65535.4258, 65539.4297, 65543.4297, ...], ], ] sum = 294942.843750 ggml_debug: Kcur-16 = (f32) CONT(wqkv_clamped-16 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.5735, 3.4265, 7.4265, ...], [4095.4265, 4099.4268, 4103.4268, ...], [8191.4268, 8195.4268, 8199.4268, ...], ], ] sum = 36894.839844 ggml_debug: Kcur-16 (reshaped) = (f32) RESHAPE(Kcur-16{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -0.5735, 3.4265, 7.4265, ...], [511.4265, 515.4265, 519.4265, ...], [1023.4265, 1027.4265, 1031.4265, ...], ... ], [ [4095.4265, 4099.4268, 4103.4268, ...], [4607.4268, 4611.4268, 4615.4268, ...], [5119.4268, 5123.4268, 5127.4268, ...], ... ], [ [8191.4268, 8195.4268, 8199.4268, ...], [8703.4268, 8707.4268, 8711.4268, ...], [9215.4268, 9219.4268, 9223.4268, ...], ... ], ] sum = 124508.531250 ggml_debug: Kcur-16 = (f32) ROPE(Kcur-16 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -0.5735, 3.4265, 7.4265, ...], [511.4265, 515.4265, 519.4265, ...], [1023.4265, 1027.4265, 1031.4265, ...], ... ], [ [4095.4265, 4099.4268, 4103.4268, ...], [4607.4268, 4611.4268, 4615.4268, ...], [5119.4268, 5123.4268, 5127.4268, ...], ... ], [ [8191.4268, 8195.4268, 8199.4268, ...], [8703.4268, 8707.4268, 8711.4268, ...], [9215.4268, 9219.4268, 9223.4268, ...], ... ], ] sum = 124508.531250 ggml_debug: wqkv_clamped-16 (view) = (f32) VIEW(wqkv_clamped-16{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4129, 4.4129, 8.4129, ...], [32768.4141, 32772.4141, 32776.4141, ...], [65536.4141, 65540.4141, 65544.4141, ...], ], ] sum = 294951.718750 ggml_debug: Vcur-16 = (f32) CONT(wqkv_clamped-16 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4129, 4.4129, 8.4129, ...], [4096.4131, 4100.4131, 4104.4131, ...], [8192.4131, 8196.4131, 8200.4131, ...], ], ] sum = 36903.718750 ggml_debug: k_cache_view-16 = (f16) VIEW(cache_k_l16{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-16 (copy of Kcur-16) = (f16) CPY(Kcur-16{128, 8, 3, 1}, k_cache_view-16{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -0.5732, -0.5742, -0.5752, ...], ], ] sum = -1.722656 ggml_debug: v_cur_t-16 = (f32) TRANSPOSE(Vcur-16{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.4129, 4096.4131, 8192.4131], [ 4.4129, 4100.4131, 8196.4131], [ 8.4129, 4104.4131, 8200.4131], ... ], ] sum = 36903.718750 ggml_debug: v_cache_view-16 = (f16) VIEW(cache_v_l16{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-16 (copy of v_cur_t-16) = (f16) CPY(v_cur_t-16{3, 1024, 1, 1}, v_cache_view-16{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.4128, 0.4133, 0.4138], [ 0.8257, 0.8267, 0.8276], [ 1.6514, 1.6533, 1.6553], ... ], ] sum = 8.679932 ggml_debug: v-16 = (f16) VIEW(cache_v_l16{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.4128, 0.4133, 0.4138, ...], [ 0.8257, 0.8267, 0.8276, ...], [ 1.6514, 1.6533, 1.6553, ...], ... ], [ [ 0.4128, 0.4133, 0.4138, ...], [ 0.8257, 0.8267, 0.8276, ...], [ 1.6514, 1.6533, 1.6553, ...], ... ], [ [ 0.4128, 0.4133, 0.4138, ...], [ 0.8257, 0.8267, 0.8276, ...], [ 1.6514, 1.6533, 1.6553, ...], ... ], ... ] sum = 26.039795 ggml_debug: k-16 = (f16) VIEW(cache_k_l16{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -0.5732, -0.5742, -0.5752, ...], [ -2.2930, -2.2969, -2.3008, ...], [ -9.1719, -9.1875, -9.2031, ...], ... ], [ [ -0.6982, -0.6992, -0.7002, ...], [ -2.7930, -2.7969, -2.8008, ...], [-11.1719, -11.1875, -11.2031, ...], ... ], [ [ -0.8232, -0.8242, -0.8252, ...], [ -3.2930, -3.2969, -3.3008, ...], [-13.1719, -13.1875, -13.2031, ...], ... ], ... ] sum = -132.152344 ggml_debug: q-16 = (f32) PERMUTE(Qcur-16{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.2410, 3.7590, 7.7590, ...], [24575.7598, 24579.7598, 24583.7598, ...], [49151.7578, 49155.7578, 49159.7578, ...], ], [ [511.7590, 515.7590, 519.7590, ...], [25087.7598, 25091.7598, 25095.7598, ...], [49663.7578, 49667.7578, 49671.7578, ...], ], [ [1023.7590, 1027.7589, 1031.7589, ...], [25599.7598, 25603.7598, 25607.7598, ...], [50175.7578, 50179.7578, 50183.7578, ...], ], ... ] sum = 677477.375000 ggml_debug: kq-16 = (f32) MUL_MAT(k-16{128, 32, 8, 1}, q-16{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ -5.9102, -1.9102, 2.0898, ...], [122.0898, 126.0898, 130.0898, ...], [250.0898, 254.0898, 258.0898, ...], ], [ [378.0898, 382.0898, 386.0898, ...], [506.0898, 510.0898, 514.0898, ...], [634.0898, 638.0898, 642.0898, ...], ], [ [762.0898, 766.0898, 770.0898, ...], [890.0898, 894.0898, 898.0898, ...], [1018.0898, 1022.0898, 1026.0898, ...], ], ... ] sum = 13772.425781 ggml_debug: kq_soft_max_ext-16 = (f32) SOFT_MAX(kq-16{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-16 = (f32) MUL_MAT(v-16{32, 128, 8, 1}, kq_soft_max_ext-16{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.4128, 4.4128, 8.4128, ...], [512.4128, 516.4128, 520.4128, ...], [1024.4128, 1028.4128, 1032.4128, ...], ], [ [1536.4128, 1540.4128, 1544.4128, ...], [2048.4128, 2052.4128, 2056.4128, ...], [2560.4128, 2564.4128, 2568.4128, ...], ], [ [3072.4128, 3076.4128, 3080.4128, ...], [3584.4128, 3588.4128, 3592.4128, ...], [4096.4131, 4100.4131, 4104.4131, ...], ], ... ] sum = 55415.152344 ggml_debug: kqv_merged-16 = (f32) PERMUTE(kqv-16{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.4128, 4.4128, 8.4128, ...], [1536.4128, 1540.4128, 1544.4128, ...], [3072.4128, 3076.4128, 3080.4128, ...], ... ], [ [512.4128, 516.4128, 520.4128, ...], [2048.4128, 2052.4128, 2056.4128, ...], [3584.4128, 3588.4128, 3592.4128, ...], ... ], [ [1024.4128, 1028.4128, 1032.4128, ...], [2560.4128, 2564.4128, 2568.4128, ...], [4096.4131, 4100.4131, 4104.4131, ...], ... ], ] sum = 55415.152344 ggml_debug: kqv_merged_cont-16 = (f32) CONT(kqv_merged-16{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4128, 4.4128, 8.4128, ...], [24576.4121, 24580.4121, 24584.4121, ...], [49152.4141, 49156.4141, 49160.4141, ...], ], ] sum = 221223.718750 ggml_debug: kqv_out-16 = (f32) MUL_MAT(blk.16.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-16{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0588, 4.0588, 8.0588, ...], [24576.0586, 24580.0586, 24584.0586, ...], [49152.0586, 49156.0586, 49160.0586, ...], ], ] sum = 221220.531250 ggml_debug: norm-16 = (f32) NORM(kqv_out-16{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4854, 4.4854, 8.4854, ...], [24576.4863, 24580.4863, 24584.4863, ...], [49152.4844, 49156.4844, 49160.4844, ...], ], ] sum = 221224.359375 ggml_debug: attn_out_norm-16 = (f32) MUL(norm-16{6144, 3, 1, 1}, blk.16.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1782, 4.1782, 8.1782, ...], [24576.1777, 24580.1777, 24584.1777, ...], [49152.1797, 49156.1797, 49160.1797, ...], ], ] sum = 221221.625000 ggml_debug: ffn_moe_logits-16 = (f32) MUL_MAT(blk.16.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.5508, 3.4492, 7.4492, ...], [ 63.4492, 67.4492, 71.4492, ...], [127.4492, 131.4492, 135.4492, ...], ], ] sum = 607.042969 ggml_debug: ffn_moe_probs-16 = (f32) SOFT_MAX(ffn_moe_logits-16{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0343, 4.0343, 8.0343, ...], [ 64.0343, 68.0343, 72.0343, ...], [128.0343, 132.0343, 136.0343, ...], ], ] sum = 612.308960 ggml_debug: ffn_moe_argsort-16 = (i32) ARGSORT(ffn_moe_probs-16{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 3.0000, 7.0000, 11.0000, ...], [ 67.0000, 71.0000, 75.0000, ...], [131.0000, 135.0000, 139.0000, ...], ], ] sum = 639.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-16{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 3.0000, 7.0000, 11.0000, ...], [ 67.0000, 71.0000, 75.0000, ...], [131.0000, 135.0000, 139.0000, ...], ], ] sum = 639.000000 ggml_debug: ffn_moe_up-16 = (f32) MUL_MAT_ID(blk.16.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5716, 3.4284, 7.4284, ...], [43007.4297, 43011.4297, 43015.4297, ...], [86015.4297, 86019.4297, 86023.4297, ...], ], ] sum = 387102.875000 ggml_debug: ffn_moe_gate-16 = (f32) MUL_MAT_ID(blk.16.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0206, 4.0206, 8.0206, ...], [43008.0195, 43012.0195, 43016.0195, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.218750 ggml_debug: ffn_moe_silu-16 = (f32) UNARY(ffn_moe_gate-16{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0104, 4.0104, 8.0104, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_gate_par-16 = (f32) MUL(ffn_moe_up-16{10752, 3, 1, 1}, ffn_moe_silu-16{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0059, 3.9941, 7.9941, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-16 = (f32) MUL_MAT_ID(blk.16.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-16{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2630, 3.7370, 7.7370, ...], [24575.7363, 24579.7363, 24583.7363, ...], [49151.7383, 49155.7383, 49159.7383, ...], ], ] sum = 221217.625000 ggml_debug: ffn_moe_probs-16 (reshaped) = (f32) RESHAPE(ffn_moe_probs-16{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0343], [ 4.0343], [ 8.0343], ... ], [ [ 64.0343], [ 68.0343], [ 72.0343], ... ], [ [128.0343], [132.0343], [136.0343], ... ], ] sum = 612.308960 ggml_debug: ffn_moe_weights-16 = (f32) GET_ROWS(ffn_moe_probs-16 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0994], [ 4.0994], [ 8.0994], ... ], [ [ 16.0994], [ 20.0994], [ 24.0994], ... ], [ [ 32.0994], [ 36.0994], [ 40.0994], ... ], ] sum = 180.894897 ggml_debug: ffn_moe_weights-16 (reshaped) = (f32) RESHAPE(ffn_moe_weights-16{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0994, 4.0994, 8.0994, ...], [ 16.0994, 20.0994, 24.0994, ...], [ 32.0994, 36.0994, 40.0994, ...], ], ] sum = 180.894897 ggml_debug: ffn_moe_weights_sum-16 = (f32) SUM_ROWS(ffn_moe_weights-16 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3558], [ 4.3558], [ 8.3558], ], ] sum = 13.067314 ggml_debug: ffn_moe_weights_norm-16 = (f32) DIV(ffn_moe_weights-16 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-16{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2795, 4.2795, 8.2795, ...], [ 16.2795, 20.2795, 24.2795, ...], [ 32.2795, 36.2795, 40.2795, ...], ], ] sum = 182.515427 ggml_debug: ffn_moe_weights_norm-16 (view) = (f32) VIEW(ffn_moe_weights_norm-16{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2795], [ 16.2795], [ 32.2795], ], ] sum = 48.838474 ggml_debug: ffn_moe_weighted-16 = (f32) MUL(ffn_moe_down-16{6144, 3, 1, 1}, ffn_moe_weights_norm-16 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0735, 3.9265, 7.9265, ...], [24575.9258, 24579.9258, 24583.9258, ...], [49151.9258, 49155.9258, 49159.9258, ...], ], ] sum = 221219.328125 ggml_debug: ffn_moe_up-16 = (f32) MUL_MAT_ID(blk.16.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3169, 4.3169, 8.3169, ...], [43008.3164, 43012.3164, 43016.3164, ...], [86016.3203, 86020.3203, 86024.3203, ...], ], ] sum = 387110.843750 ggml_debug: ffn_moe_gate-16 = (f32) MUL_MAT_ID(blk.16.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1443, 3.8557, 7.8557, ...], [43007.8555, 43011.8555, 43015.8555, ...], [86015.8594, 86019.8594, 86023.8594, ...], ], ] sum = 387106.750000 ggml_debug: ffn_moe_silu-16 = (f32) UNARY(ffn_moe_gate-16{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0670, 3.9330, 7.9330, ...], [43007.9336, 43011.9336, 43015.9336, ...], [86015.9297, 86019.9297, 86023.9297, ...], ], ] sum = 387107.406250 ggml_debug: ffn_moe_gate_par-16 = (f32) MUL(ffn_moe_up-16{10752, 3, 1, 1}, ffn_moe_silu-16{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0212, 3.9788, 7.9788, ...], [43007.9805, 43011.9805, 43015.9805, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.781250 ggml_debug: ffn_moe_down-16 = (f32) MUL_MAT_ID(blk.16.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-16{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1431, 3.8569, 7.8569, ...], [24575.8574, 24579.8574, 24583.8574, ...], [49151.8555, 49155.8555, 49159.8555, ...], ], ] sum = 221218.718750 ggml_debug: ffn_moe_weights_norm-16 (view) = (f32) VIEW(ffn_moe_weights_norm-16{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2580], [ 16.2580], [ 32.2580], ], ] sum = 48.774136 ggml_debug: ffn_moe_weighted-16 = (f32) MUL(ffn_moe_down-16{6144, 3, 1, 1}, ffn_moe_weights_norm-16 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0369, 3.9631, 7.9631, ...], [24575.9629, 24579.9629, 24583.9629, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.671875 ggml_debug: ffn_moe_out-16 = (f32) ADD(ffn_moe_weighted-16{6144, 3, 1, 1}, ffn_moe_weighted-16{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1104, 3.8896, 7.8896, ...], [24575.8887, 24579.8887, 24583.8887, ...], [49151.8906, 49155.8906, 49159.8906, ...], ], ] sum = 221219.015625 ggml_debug: ffn_moe_up-16 = (f32) MUL_MAT_ID(blk.16.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4680, 4.4680, 8.4680, ...], [43008.4688, 43012.4688, 43016.4688, ...], [86016.4688, 86020.4688, 86024.4688, ...], ], ] sum = 387112.218750 ggml_debug: ffn_moe_gate-16 = (f32) MUL_MAT_ID(blk.16.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3590, 3.6410, 7.6410, ...], [43007.6406, 43011.6406, 43015.6406, ...], [86015.6406, 86019.6406, 86023.6406, ...], ], ] sum = 387104.750000 ggml_debug: ffn_moe_silu-16 = (f32) UNARY(ffn_moe_gate-16{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1476, 3.8524, 7.8524, ...], [43007.8516, 43011.8516, 43015.8516, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.656250 ggml_debug: ffn_moe_gate_par-16 = (f32) MUL(ffn_moe_up-16{10752, 3, 1, 1}, ffn_moe_silu-16{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0691, 3.9309, 7.9309, ...], [43007.9297, 43011.9297, 43015.9297, ...], [86015.9297, 86019.9297, 86023.9297, ...], ], ] sum = 387107.375000 ggml_debug: ffn_moe_down-16 = (f32) MUL_MAT_ID(blk.16.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-16{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0924, 3.9076, 7.9076, ...], [24575.9082, 24579.9082, 24583.9082, ...], [49151.9062, 49155.9062, 49159.9062, ...], ], ] sum = 221219.156250 ggml_debug: ffn_moe_weights_norm-16 (view) = (f32) VIEW(ffn_moe_weights_norm-16{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2500], [ 16.2500], [ 32.2500], ], ] sum = 48.750137 ggml_debug: ffn_moe_weighted-16 = (f32) MUL(ffn_moe_down-16{6144, 3, 1, 1}, ffn_moe_weights_norm-16 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0231, 3.9769, 7.9769, ...], [24575.9766, 24579.9766, 24583.9766, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_out-16 = (f32) ADD(ffn_moe_out-16{6144, 3, 1, 1}, ffn_moe_weighted-16{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1335, 3.8665, 7.8665, ...], [24575.8672, 24579.8672, 24583.8672, ...], [49151.8672, 49155.8672, 49159.8672, ...], ], ] sum = 221218.812500 ggml_debug: ffn_moe_up-16 = (f32) MUL_MAT_ID(blk.16.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1996, 4.1996, 8.1996, ...], [43008.1992, 43012.1992, 43016.1992, ...], [86016.2031, 86020.2031, 86024.2031, ...], ], ] sum = 387109.812500 ggml_debug: ffn_moe_gate-16 = (f32) MUL_MAT_ID(blk.16.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-16{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0197, 4.0197, 8.0197, ...], [43008.0195, 43012.0195, 43016.0195, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.187500 ggml_debug: ffn_moe_silu-16 = (f32) UNARY(ffn_moe_gate-16{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0099, 4.0099, 8.0099, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_gate_par-16 = (f32) MUL(ffn_moe_up-16{10752, 3, 1, 1}, ffn_moe_silu-16{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0020, 4.0020, 8.0020, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-16 = (f32) MUL_MAT_ID(blk.16.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-16{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2704, 3.7296, 7.7296, ...], [24575.7305, 24579.7305, 24583.7305, ...], [49151.7305, 49155.7305, 49159.7305, ...], ], ] sum = 221217.578125 ggml_debug: ffn_moe_weights_norm-16 (view) = (f32) VIEW(ffn_moe_weights_norm-16{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2124], [ 16.2124], [ 32.2124], ], ] sum = 48.637253 ggml_debug: ffn_moe_weighted-16 = (f32) MUL(ffn_moe_down-16{6144, 3, 1, 1}, ffn_moe_weights_norm-16 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0574, 3.9426, 7.9426, ...], [24575.9434, 24579.9434, 24583.9434, ...], [49151.9414, 49155.9414, 49159.9414, ...], ], ] sum = 221219.468750 ggml_debug: ffn_moe_out-16 = (f32) ADD(ffn_moe_out-16{6144, 3, 1, 1}, ffn_moe_weighted-16{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1910, 3.8090, 7.8090, ...], [24575.8086, 24579.8086, 24583.8086, ...], [49151.8086, 49155.8086, 49159.8086, ...], ], ] sum = 221218.296875 ggml_debug: ffn_inp-16 = (f32) ADD(kqv_out-16{6144, 3, 1, 1}, l_out-15{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0062, 3.9938, 7.9938, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: l_out-16 = (f32) ADD(ffn_moe_out-16{6144, 3, 1, 1}, ffn_inp-16{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1972, 3.8028, 7.8028, ...], [24575.8027, 24579.8027, 24583.8027, ...], [49151.8047, 49155.8047, 49159.8047, ...], ], ] sum = 221218.250000 ggml_debug: norm-17 = (f32) NORM(l_out-16{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.3570, 3.6430, 7.6430, ...], [24575.6426, 24579.6426, 24583.6426, ...], [49151.6445, 49155.6445, 49159.6445, ...], ], ] sum = 221216.781250 ggml_debug: attn_norm-17 = (f32) MUL(norm-17{6144, 3, 1, 1}, blk.17.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0899, 3.9101, 7.9101, ...], [24575.9102, 24579.9102, 24583.9102, ...], [49151.9102, 49155.9102, 49159.9102, ...], ], ] sum = 221219.187500 ggml_debug: wqkv-17 = (f32) MUL_MAT(blk.17.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-17{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.2395, 4.2395, 8.2395, ...], [32768.2383, 32772.2383, 32776.2383, ...], [65536.2422, 65540.2422, 65544.2422, ...], ], ] sum = 294950.187500 ggml_debug: wqkv_clamped-17 = (f32) CLAMP(wqkv-17{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.2395, 4.2395, 8.2395, ...], [32768.2383, 32772.2383, 32776.2383, ...], [65536.2422, 65540.2422, 65544.2422, ...], ], ] sum = 294950.187500 ggml_debug: wqkv_clamped-17 (view) = (f32) VIEW(wqkv_clamped-17{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.2395, 4.2395, 8.2395, ...], [32768.2383, 32772.2383, 32776.2383, ...], [65536.2422, 65540.2422, 65544.2422, ...], ], ] sum = 294950.187500 ggml_debug: Qcur-17 = (f32) CONT(wqkv_clamped-17 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.2395, 4.2395, 8.2395, ...], [24576.2402, 24580.2402, 24584.2402, ...], [49152.2383, 49156.2383, 49160.2383, ...], ], ] sum = 221222.140625 ggml_debug: Qcur-17 (reshaped) = (f32) RESHAPE(Qcur-17{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.2395, 4.2395, 8.2395, ...], [512.2394, 516.2394, 520.2394, ...], [1024.2395, 1028.2395, 1032.2395, ...], ... ], [ [24576.2402, 24580.2402, 24584.2402, ...], [25088.2402, 25092.2402, 25096.2402, ...], [25600.2402, 25604.2402, 25608.2402, ...], ... ], [ [49152.2383, 49156.2383, 49160.2383, ...], [49664.2383, 49668.2383, 49672.2383, ...], [50176.2383, 50180.2383, 50184.2383, ...], ... ], ] sum = 677490.500000 ggml_debug: Qcur-17 = (f32) ROPE(Qcur-17 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.2395, 4.2395, 8.2395, ...], [512.2394, 516.2394, 520.2394, ...], [1024.2395, 1028.2395, 1032.2395, ...], ... ], [ [24576.2402, 24580.2402, 24584.2402, ...], [25088.2402, 25092.2402, 25096.2402, ...], [25600.2402, 25604.2402, 25608.2402, ...], ... ], [ [49152.2383, 49156.2383, 49160.2383, ...], [49664.2383, 49668.2383, 49672.2383, ...], [50176.2383, 50180.2383, 50184.2383, ...], ... ], ] sum = 677490.500000 ggml_debug: wqkv_clamped-17 (view) = (f32) VIEW(wqkv_clamped-17{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.1124, 4.1124, 8.1124, ...], [32768.1133, 32772.1133, 32776.1133, ...], [65536.1094, 65540.1094, 65544.1094, ...], ], ] sum = 294949.000000 ggml_debug: Kcur-17 = (f32) CONT(wqkv_clamped-17 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.1124, 4.1124, 8.1124, ...], [4096.1123, 4100.1123, 4104.1123, ...], [8192.1123, 8196.1123, 8200.1123, ...], ], ] sum = 36901.011719 ggml_debug: Kcur-17 (reshaped) = (f32) RESHAPE(Kcur-17{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.1124, 4.1124, 8.1124, ...], [512.1124, 516.1124, 520.1124, ...], [1024.1124, 1028.1124, 1032.1124, ...], ... ], [ [4096.1123, 4100.1123, 4104.1123, ...], [4608.1123, 4612.1123, 4616.1123, ...], [5120.1123, 5124.1123, 5128.1123, ...], ... ], [ [8192.1123, 8196.1123, 8200.1123, ...], [8704.1123, 8708.1123, 8712.1123, ...], [9216.1123, 9220.1123, 9224.1123, ...], ... ], ] sum = 124527.023438 ggml_debug: Kcur-17 = (f32) ROPE(Kcur-17 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.1124, 4.1124, 8.1124, ...], [512.1124, 516.1124, 520.1124, ...], [1024.1124, 1028.1124, 1032.1124, ...], ... ], [ [4096.1123, 4100.1123, 4104.1123, ...], [4608.1123, 4612.1123, 4616.1123, ...], [5120.1123, 5124.1123, 5128.1123, ...], ... ], [ [8192.1123, 8196.1123, 8200.1123, ...], [8704.1123, 8708.1123, 8712.1123, ...], [9216.1123, 9220.1123, 9224.1123, ...], ... ], ] sum = 124527.023438 ggml_debug: wqkv_clamped-17 (view) = (f32) VIEW(wqkv_clamped-17{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0378, 3.9622, 7.9622, ...], [32767.9629, 32771.9609, 32775.9609, ...], [65535.9609, 65539.9609, 65543.9609, ...], ], ] sum = 294947.656250 ggml_debug: Vcur-17 = (f32) CONT(wqkv_clamped-17 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0378, 3.9622, 7.9622, ...], [4095.9622, 4099.9624, 4103.9624, ...], [8191.9624, 8195.9619, 8199.9619, ...], ], ] sum = 36899.660156 ggml_debug: k_cache_view-17 = (f16) VIEW(cache_k_l17{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-17 (copy of Kcur-17) = (f16) CPY(Kcur-17{128, 8, 3, 1}, k_cache_view-17{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.1124, 0.1125, 0.1127, ...], ], ] sum = 0.337646 ggml_debug: v_cur_t-17 = (f32) TRANSPOSE(Vcur-17{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0378, 4095.9622, 8191.9624], [ 3.9622, 4099.9624, 8195.9619], [ 7.9622, 4103.9624, 8199.9619], ... ], ] sum = 36899.660156 ggml_debug: v_cache_view-17 = (f16) VIEW(cache_v_l17{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-17 (copy of v_cur_t-17) = (f16) CPY(v_cur_t-17{3, 1024, 1, 1}, v_cache_view-17{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0378, -0.0378, -0.0379], [ -0.0755, -0.0756, -0.0757], [ -0.1510, -0.1512, -0.1515], ... ], ] sum = -0.794037 ggml_debug: v-17 = (f16) VIEW(cache_v_l17{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0378, -0.0378, -0.0379, ...], [ -0.0755, -0.0756, -0.0757, ...], [ -0.1510, -0.1512, -0.1515, ...], ... ], [ [ -0.0378, -0.0378, -0.0379, ...], [ -0.0755, -0.0756, -0.0757, ...], [ -0.1510, -0.1512, -0.1515, ...], ... ], [ [ -0.0378, -0.0378, -0.0379, ...], [ -0.0755, -0.0756, -0.0757, ...], [ -0.1510, -0.1512, -0.1515, ...], ... ], ... ] sum = -2.382111 ggml_debug: k-17 = (f16) VIEW(cache_k_l17{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.1124, 0.1125, 0.1127, ...], [ 0.4497, 0.4502, 0.4507, ...], [ 1.7988, 1.8008, 1.8027, ...], ... ], [ [ 0.1311, 0.1313, 0.1316, ...], [ 0.5244, 0.5254, 0.5264, ...], [ 2.0977, 2.1016, 2.1055, ...], ... ], [ [ 0.1624, 0.1626, 0.1628, ...], [ 0.6494, 0.6504, 0.6514, ...], [ 2.5977, 2.6016, 2.6055, ...], ... ], ... ] sum = 25.609131 ggml_debug: q-17 = (f32) PERMUTE(Qcur-17{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.2395, 4.2395, 8.2395, ...], [24576.2402, 24580.2402, 24584.2402, ...], [49152.2383, 49156.2383, 49160.2383, ...], ], [ [512.2394, 516.2394, 520.2394, ...], [25088.2402, 25092.2402, 25096.2402, ...], [49664.2383, 49668.2383, 49672.2383, ...], ], [ [1024.2395, 1028.2395, 1032.2395, ...], [25600.2402, 25604.2402, 25608.2402, ...], [50176.2383, 50180.2383, 50184.2383, ...], ], ... ] sum = 677490.562500 ggml_debug: kq-17 = (f32) MUL_MAT(k-17{128, 32, 8, 1}, q-17{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 27.2031, 31.2031, 35.2031, ...], [155.2031, 159.2031, 163.2031, ...], [283.2031, 287.2031, 291.2031, ...], ], [ [411.2031, 415.2031, 419.2031, ...], [539.2031, 543.2031, 547.2031, ...], [667.2031, 671.2031, 675.2031, ...], ], [ [795.2031, 799.2031, 803.2031, ...], [923.2031, 927.2031, 931.2031, ...], [1051.2031, 1055.2031, 1059.2031, ...], ], ... ] sum = 14666.484375 ggml_debug: kq_soft_max_ext-17 = (f32) SOFT_MAX(kq-17{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-17 = (f32) MUL_MAT(v-17{32, 128, 8, 1}, kq_soft_max_ext-17{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0378, 3.9622, 7.9622, ...], [511.9622, 515.9623, 519.9623, ...], [1023.9623, 1027.9623, 1031.9623, ...], ], [ [1535.9623, 1539.9623, 1543.9623, ...], [2047.9623, 2051.9622, 2055.9622, ...], [2559.9622, 2563.9622, 2567.9622, ...], ], [ [3071.9622, 3075.9622, 3079.9622, ...], [3583.9622, 3587.9622, 3591.9622, ...], [4095.9622, 4099.9624, 4103.9624, ...], ], ... ] sum = 55402.976562 ggml_debug: kqv_merged-17 = (f32) PERMUTE(kqv-17{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0378, 3.9622, 7.9622, ...], [1535.9623, 1539.9623, 1543.9623, ...], [3071.9622, 3075.9622, 3079.9622, ...], ... ], [ [511.9622, 515.9623, 519.9623, ...], [2047.9623, 2051.9622, 2055.9622, ...], [3583.9622, 3587.9622, 3591.9622, ...], ... ], [ [1023.9623, 1027.9623, 1031.9623, ...], [2559.9622, 2563.9622, 2567.9622, ...], [4095.9622, 4099.9624, 4103.9624, ...], ... ], ] sum = 55402.972656 ggml_debug: kqv_merged_cont-17 = (f32) CONT(kqv_merged-17{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0378, 3.9622, 7.9622, ...], [24575.9629, 24579.9629, 24583.9629, ...], [49151.9609, 49155.9609, 49159.9609, ...], ], ] sum = 221219.656250 ggml_debug: kqv_out-17 = (f32) MUL_MAT(blk.17.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-17{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0513, 3.9487, 7.9487, ...], [24575.9492, 24579.9492, 24583.9492, ...], [49151.9492, 49155.9492, 49159.9492, ...], ], ] sum = 221219.546875 ggml_debug: norm-17 = (f32) NORM(kqv_out-17{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.3423, 3.6577, 7.6577, ...], [24575.6582, 24579.6582, 24583.6582, ...], [49151.6562, 49155.6562, 49159.6562, ...], ], ] sum = 221216.906250 ggml_debug: attn_out_norm-17 = (f32) MUL(norm-17{6144, 3, 1, 1}, blk.17.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1277, 3.8723, 7.8723, ...], [24575.8730, 24579.8730, 24583.8730, ...], [49151.8711, 49155.8711, 49159.8711, ...], ], ] sum = 221218.859375 ggml_debug: ffn_moe_logits-17 = (f32) MUL_MAT(blk.17.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.3079, 4.3079, 8.3079, ...], [ 64.3079, 68.3079, 72.3079, ...], [128.3079, 132.3079, 136.3079, ...], ], ] sum = 614.770752 ggml_debug: ffn_moe_probs-17 = (f32) SOFT_MAX(ffn_moe_logits-17{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0816, 4.0816, 8.0816, ...], [ 64.0816, 68.0816, 72.0816, ...], [128.0816, 132.0816, 136.0816, ...], ], ] sum = 612.734314 ggml_debug: ffn_moe_argsort-17 = (i32) ARGSORT(ffn_moe_probs-17{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-17{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: ffn_moe_up-17 = (f32) MUL_MAT_ID(blk.17.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7842, 3.2158, 7.2158, ...], [43007.2148, 43011.2148, 43015.2148, ...], [86015.2188, 86019.2188, 86023.2188, ...], ], ] sum = 387100.968750 ggml_debug: ffn_moe_gate-17 = (f32) MUL_MAT_ID(blk.17.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3245, 3.6755, 7.6755, ...], [43007.6758, 43011.6758, 43015.6758, ...], [86015.6719, 86019.6719, 86023.6719, ...], ], ] sum = 387105.062500 ggml_debug: ffn_moe_silu-17 = (f32) UNARY(ffn_moe_gate-17{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1362, 3.8638, 7.8638, ...], [43007.8633, 43011.8633, 43015.8633, ...], [86015.8672, 86019.8672, 86023.8672, ...], ], ] sum = 387106.781250 ggml_debug: ffn_moe_gate_par-17 = (f32) MUL(ffn_moe_up-17{10752, 3, 1, 1}, ffn_moe_silu-17{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1068, 4.1068, 8.1068, ...], [43008.1055, 43012.1055, 43016.1055, ...], [86016.1094, 86020.1094, 86024.1094, ...], ], ] sum = 387109.000000 ggml_debug: ffn_moe_down-17 = (f32) MUL_MAT_ID(blk.17.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-17{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2291, 3.7709, 7.7709, ...], [24575.7715, 24579.7715, 24583.7715, ...], [49151.7695, 49155.7695, 49159.7695, ...], ], ] sum = 221217.921875 ggml_debug: ffn_moe_probs-17 (reshaped) = (f32) RESHAPE(ffn_moe_probs-17{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0816], [ 4.0816], [ 8.0816], ... ], [ [ 64.0816], [ 68.0816], [ 72.0816], ... ], [ [128.0816], [132.0816], [136.0816], ... ], ] sum = 612.734314 ggml_debug: ffn_moe_weights-17 = (f32) GET_ROWS(ffn_moe_probs-17 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0816], [ 4.0816], [ 8.0816], ... ], [ [ 16.0816], [ 20.0816], [ 24.0816], ... ], [ [ 32.0816], [ 36.0816], [ 40.0816], ... ], ] sum = 180.734299 ggml_debug: ffn_moe_weights-17 (reshaped) = (f32) RESHAPE(ffn_moe_weights-17{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0816, 4.0816, 8.0816, ...], [ 16.0816, 20.0816, 24.0816, ...], [ 32.0816, 36.0816, 40.0816, ...], ], ] sum = 180.734299 ggml_debug: ffn_moe_weights_sum-17 = (f32) SUM_ROWS(ffn_moe_weights-17 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2986], [ 4.2986], [ 8.2986], ], ] sum = 12.895732 ggml_debug: ffn_moe_weights_norm-17 = (f32) DIV(ffn_moe_weights-17 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-17{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2733, 4.2733, 8.2733, ...], [ 16.2733, 20.2733, 24.2733, ...], [ 32.2733, 36.2733, 40.2733, ...], ], ] sum = 182.459351 ggml_debug: ffn_moe_weights_norm-17 (view) = (f32) VIEW(ffn_moe_weights_norm-17{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2733], [ 16.2733], [ 32.2733], ], ] sum = 48.819782 ggml_debug: ffn_moe_weighted-17 = (f32) MUL(ffn_moe_down-17{6144, 3, 1, 1}, ffn_moe_weights_norm-17 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0626, 3.9374, 7.9374, ...], [24575.9375, 24579.9375, 24583.9375, ...], [49151.9375, 49155.9375, 49159.9375, ...], ], ] sum = 221219.437500 ggml_debug: ffn_moe_up-17 = (f32) MUL_MAT_ID(blk.17.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7815, 3.2185, 7.2185, ...], [43007.2188, 43011.2188, 43015.2188, ...], [86015.2188, 86019.2188, 86023.2188, ...], ], ] sum = 387100.968750 ggml_debug: ffn_moe_gate-17 = (f32) MUL_MAT_ID(blk.17.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1113, 4.1113, 8.1113, ...], [43008.1094, 43012.1094, 43016.1094, ...], [86016.1094, 86020.1094, 86024.1094, ...], ], ] sum = 387109.000000 ggml_debug: ffn_moe_silu-17 = (f32) UNARY(ffn_moe_gate-17{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0587, 4.0587, 8.0587, ...], [43008.0586, 43012.0586, 43016.0586, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_gate_par-17 = (f32) MUL(ffn_moe_up-17{10752, 3, 1, 1}, ffn_moe_silu-17{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0459, 3.9541, 7.9541, ...], [43007.9531, 43011.9531, 43015.9531, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_down-17 = (f32) MUL_MAT_ID(blk.17.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-17{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.3723, 4.3723, 8.3723, ...], [24576.3730, 24580.3730, 24584.3730, ...], [49152.3711, 49156.3711, 49160.3711, ...], ], ] sum = 221223.359375 ggml_debug: ffn_moe_weights_norm-17 (view) = (f32) VIEW(ffn_moe_weights_norm-17{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2544], [ 16.2544], [ 32.2544], ], ] sum = 48.763092 ggml_debug: ffn_moe_weighted-17 = (f32) MUL(ffn_moe_down-17{6144, 3, 1, 1}, ffn_moe_weights_norm-17 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0947, 4.0947, 8.0947, ...], [24576.0938, 24580.0938, 24584.0938, ...], [49152.0938, 49156.0938, 49160.0938, ...], ], ] sum = 221220.843750 ggml_debug: ffn_moe_out-17 = (f32) ADD(ffn_moe_weighted-17{6144, 3, 1, 1}, ffn_moe_weighted-17{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0321, 4.0321, 8.0321, ...], [24576.0312, 24580.0312, 24584.0312, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: ffn_moe_up-17 = (f32) MUL_MAT_ID(blk.17.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6751, 4.6751, 8.6751, ...], [43008.6758, 43012.6758, 43016.6758, ...], [86016.6719, 86020.6719, 86024.6719, ...], ], ] sum = 387114.062500 ggml_debug: ffn_moe_gate-17 = (f32) MUL_MAT_ID(blk.17.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2555, 4.2555, 8.2555, ...], [43008.2539, 43012.2539, 43016.2539, ...], [86016.2578, 86020.2578, 86024.2578, ...], ], ] sum = 387110.281250 ggml_debug: ffn_moe_silu-17 = (f32) UNARY(ffn_moe_gate-17{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1440, 4.1440, 8.1440, ...], [43008.1445, 43012.1445, 43016.1445, ...], [86016.1406, 86020.1406, 86024.1406, ...], ], ] sum = 387109.250000 ggml_debug: ffn_moe_gate_par-17 = (f32) MUL(ffn_moe_up-17{10752, 3, 1, 1}, ffn_moe_silu-17{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0972, 4.0972, 8.0972, ...], [43008.0977, 43012.0977, 43016.0977, ...], [86016.0938, 86020.0938, 86024.0938, ...], ], ] sum = 387108.843750 ggml_debug: ffn_moe_down-17 = (f32) MUL_MAT_ID(blk.17.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-17{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1410, 4.1410, 8.1410, ...], [24576.1406, 24580.1406, 24584.1406, ...], [49152.1406, 49156.1406, 49160.1406, ...], ], ] sum = 221221.265625 ggml_debug: ffn_moe_weights_norm-17 (view) = (f32) VIEW(ffn_moe_weights_norm-17{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2374], [ 16.2374], [ 32.2374], ], ] sum = 48.712151 ggml_debug: ffn_moe_weighted-17 = (f32) MUL(ffn_moe_down-17{6144, 3, 1, 1}, ffn_moe_weights_norm-17 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0335, 4.0335, 8.0335, ...], [24576.0332, 24580.0332, 24584.0332, ...], [49152.0352, 49156.0352, 49160.0352, ...], ], ] sum = 221220.296875 ggml_debug: ffn_moe_out-17 = (f32) ADD(ffn_moe_out-17{6144, 3, 1, 1}, ffn_moe_weighted-17{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0656, 4.0656, 8.0656, ...], [24576.0664, 24580.0664, 24584.0664, ...], [49152.0664, 49156.0664, 49160.0664, ...], ], ] sum = 221220.578125 ggml_debug: ffn_moe_up-17 = (f32) MUL_MAT_ID(blk.17.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2733, 3.7267, 7.7267, ...], [43007.7266, 43011.7266, 43015.7266, ...], [86015.7266, 86019.7266, 86023.7266, ...], ], ] sum = 387105.531250 ggml_debug: ffn_moe_gate-17 = (f32) MUL_MAT_ID(blk.17.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-17{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3696, 3.6304, 7.6304, ...], [43007.6289, 43011.6289, 43015.6289, ...], [86015.6328, 86019.6328, 86023.6328, ...], ], ] sum = 387104.656250 ggml_debug: ffn_moe_silu-17 = (f32) UNARY(ffn_moe_gate-17{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1510, 3.8490, 7.8490, ...], [43007.8477, 43011.8477, 43015.8477, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.625000 ggml_debug: ffn_moe_gate_par-17 = (f32) MUL(ffn_moe_up-17{10752, 3, 1, 1}, ffn_moe_silu-17{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0413, 4.0413, 8.0413, ...], [43008.0430, 43012.0430, 43016.0430, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.343750 ggml_debug: ffn_moe_down-17 = (f32) MUL_MAT_ID(blk.17.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-17{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1951, 3.8049, 7.8049, ...], [24575.8047, 24579.8047, 24583.8047, ...], [49151.8047, 49155.8047, 49159.8047, ...], ], ] sum = 221218.250000 ggml_debug: ffn_moe_weights_norm-17 (view) = (f32) VIEW(ffn_moe_weights_norm-17{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2350], [ 16.2350], [ 32.2350], ], ] sum = 48.704971 ggml_debug: ffn_moe_weighted-17 = (f32) MUL(ffn_moe_down-17{6144, 3, 1, 1}, ffn_moe_weights_norm-17 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0458, 3.9542, 7.9542, ...], [24575.9551, 24579.9551, 24583.9551, ...], [49151.9531, 49155.9531, 49159.9531, ...], ], ] sum = 221219.578125 ggml_debug: ffn_moe_out-17 = (f32) ADD(ffn_moe_out-17{6144, 3, 1, 1}, ffn_moe_weighted-17{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0197, 4.0197, 8.0197, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.171875 ggml_debug: ffn_inp-17 = (f32) ADD(kqv_out-17{6144, 3, 1, 1}, l_out-16{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2485, 3.7515, 7.7515, ...], [24575.7520, 24579.7520, 24583.7520, ...], [49151.7500, 49155.7500, 49159.7500, ...], ], ] sum = 221217.750000 ggml_debug: l_out-17 = (f32) ADD(ffn_moe_out-17{6144, 3, 1, 1}, ffn_inp-17{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2287, 3.7713, 7.7713, ...], [24575.7715, 24579.7715, 24583.7715, ...], [49151.7695, 49155.7695, 49159.7695, ...], ], ] sum = 221217.937500 ggml_debug: norm-18 = (f32) NORM(l_out-17{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.3898, 3.6102, 7.6102, ...], [24575.6094, 24579.6094, 24583.6094, ...], [49151.6094, 49155.6094, 49159.6094, ...], ], ] sum = 221216.484375 ggml_debug: attn_norm-18 = (f32) MUL(norm-18{6144, 3, 1, 1}, blk.18.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1058, 3.8942, 7.8942, ...], [24575.8945, 24579.8945, 24583.8945, ...], [49151.8945, 49155.8945, 49159.8945, ...], ], ] sum = 221219.031250 ggml_debug: wqkv-18 = (f32) MUL_MAT(blk.18.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-18{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.8332, 4.8332, 8.8332, ...], [32768.8320, 32772.8320, 32776.8320, ...], [65536.8359, 65540.8359, 65544.8359, ...], ], ] sum = 294955.531250 ggml_debug: wqkv_clamped-18 = (f32) CLAMP(wqkv-18{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.8332, 4.8332, 8.8332, ...], [32768.8320, 32772.8320, 32776.8320, ...], [65536.8359, 65540.8359, 65544.8359, ...], ], ] sum = 294955.531250 ggml_debug: wqkv_clamped-18 (view) = (f32) VIEW(wqkv_clamped-18{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.8332, 4.8332, 8.8332, ...], [32768.8320, 32772.8320, 32776.8320, ...], [65536.8359, 65540.8359, 65544.8359, ...], ], ] sum = 294955.531250 ggml_debug: Qcur-18 = (f32) CONT(wqkv_clamped-18 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.8332, 4.8332, 8.8332, ...], [24576.8340, 24580.8340, 24584.8340, ...], [49152.8320, 49156.8320, 49160.8320, ...], ], ] sum = 221227.484375 ggml_debug: Qcur-18 (reshaped) = (f32) RESHAPE(Qcur-18{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.8332, 4.8332, 8.8332, ...], [512.8332, 516.8332, 520.8332, ...], [1024.8331, 1028.8331, 1032.8331, ...], ... ], [ [24576.8340, 24580.8340, 24584.8340, ...], [25088.8340, 25092.8340, 25096.8340, ...], [25600.8340, 25604.8340, 25608.8340, ...], ... ], [ [49152.8320, 49156.8320, 49160.8320, ...], [49664.8320, 49668.8320, 49672.8320, ...], [50176.8320, 50180.8320, 50184.8320, ...], ... ], ] sum = 677506.437500 ggml_debug: Qcur-18 = (f32) ROPE(Qcur-18 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.8332, 4.8332, 8.8332, ...], [512.8332, 516.8332, 520.8332, ...], [1024.8331, 1028.8331, 1032.8331, ...], ... ], [ [24576.8340, 24580.8340, 24584.8340, ...], [25088.8340, 25092.8340, 25096.8340, ...], [25600.8340, 25604.8340, 25608.8340, ...], ... ], [ [49152.8320, 49156.8320, 49160.8320, ...], [49664.8320, 49668.8320, 49672.8320, ...], [50176.8320, 50180.8320, 50184.8320, ...], ... ], ] sum = 677506.437500 ggml_debug: wqkv_clamped-18 (view) = (f32) VIEW(wqkv_clamped-18{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4818, 4.4818, 8.4818, ...], [32768.4805, 32772.4805, 32776.4805, ...], [65536.4844, 65540.4844, 65544.4844, ...], ], ] sum = 294952.343750 ggml_debug: Kcur-18 = (f32) CONT(wqkv_clamped-18 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4818, 4.4818, 8.4818, ...], [4096.4819, 4100.4819, 4104.4819, ...], [8192.4814, 8196.4814, 8200.4814, ...], ], ] sum = 36904.332031 ggml_debug: Kcur-18 (reshaped) = (f32) RESHAPE(Kcur-18{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.4818, 4.4818, 8.4818, ...], [512.4818, 516.4818, 520.4818, ...], [1024.4818, 1028.4818, 1032.4818, ...], ... ], [ [4096.4819, 4100.4819, 4104.4819, ...], [4608.4819, 4612.4819, 4616.4819, ...], [5120.4819, 5124.4819, 5128.4819, ...], ... ], [ [8192.4814, 8196.4814, 8200.4814, ...], [8704.4814, 8708.4814, 8712.4814, ...], [9216.4814, 9220.4814, 9224.4814, ...], ... ], ] sum = 124537.023438 ggml_debug: Kcur-18 = (f32) ROPE(Kcur-18 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.4818, 4.4818, 8.4818, ...], [512.4818, 516.4818, 520.4818, ...], [1024.4818, 1028.4818, 1032.4818, ...], ... ], [ [4096.4819, 4100.4819, 4104.4819, ...], [4608.4819, 4612.4819, 4616.4819, ...], [5120.4819, 5124.4819, 5128.4819, ...], ... ], [ [8192.4814, 8196.4814, 8200.4814, ...], [8704.4814, 8708.4814, 8712.4814, ...], [9216.4814, 9220.4814, 9224.4814, ...], ... ], ] sum = 124537.023438 ggml_debug: wqkv_clamped-18 (view) = (f32) VIEW(wqkv_clamped-18{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1668, 3.8332, 7.8332, ...], [32767.8340, 32771.8320, 32775.8320, ...], [65535.8320, 65539.8359, 65543.8359, ...], ], ] sum = 294946.500000 ggml_debug: Vcur-18 = (f32) CONT(wqkv_clamped-18 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1668, 3.8332, 7.8332, ...], [4095.8333, 4099.8330, 4103.8330, ...], [8191.8330, 8195.8330, 8199.8330, ...], ], ] sum = 36898.496094 ggml_debug: k_cache_view-18 = (f16) VIEW(cache_k_l18{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-18 (copy of Kcur-18) = (f16) CPY(Kcur-18{128, 8, 3, 1}, k_cache_view-18{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.4819, 0.4824, 0.4829, ...], ], ] sum = 1.447266 ggml_debug: v_cur_t-18 = (f32) TRANSPOSE(Vcur-18{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.1668, 4095.8333, 8191.8330], [ 3.8332, 4099.8330, 8195.8330], [ 7.8332, 4103.8330, 8199.8330], ... ], ] sum = 36898.500000 ggml_debug: v_cache_view-18 = (f16) VIEW(cache_v_l18{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-18 (copy of v_cur_t-18) = (f16) CPY(v_cur_t-18{3, 1024, 1, 1}, v_cache_view-18{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.1667, -0.1670, -0.1672], [ -0.3335, -0.3340, -0.3345], [ -0.6670, -0.6680, -0.6689], ... ], ] sum = -3.506836 ggml_debug: v-18 = (f16) VIEW(cache_v_l18{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.1667, -0.1670, -0.1672, ...], [ -0.3335, -0.3340, -0.3345, ...], [ -0.6670, -0.6680, -0.6689, ...], ... ], [ [ -0.1667, -0.1670, -0.1672, ...], [ -0.3335, -0.3340, -0.3345, ...], [ -0.6670, -0.6680, -0.6689, ...], ... ], [ [ -0.1667, -0.1670, -0.1672, ...], [ -0.3335, -0.3340, -0.3345, ...], [ -0.6670, -0.6680, -0.6689, ...], ... ], ... ] sum = -10.520508 ggml_debug: k-18 = (f16) VIEW(cache_k_l18{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.4819, 0.4824, 0.4829, ...], [ 1.9277, 1.9297, 1.9316, ...], [ 7.7109, 7.7188, 7.7266, ...], ... ], [ [ 0.5889, 0.5898, 0.5908, ...], [ 2.3555, 2.3594, 2.3633, ...], [ 9.4219, 9.4375, 9.4531, ...], ... ], [ [ 0.7139, 0.7148, 0.7158, ...], [ 2.8555, 2.8594, 2.8633, ...], [ 11.4219, 11.4375, 11.4531, ...], ... ], ... ] sum = 112.587891 ggml_debug: q-18 = (f32) PERMUTE(Qcur-18{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.8332, 4.8332, 8.8332, ...], [24576.8340, 24580.8340, 24584.8340, ...], [49152.8320, 49156.8320, 49160.8320, ...], ], [ [512.8332, 516.8332, 520.8332, ...], [25088.8340, 25092.8340, 25096.8340, ...], [49664.8320, 49668.8320, 49672.8320, ...], ], [ [1024.8331, 1028.8331, 1032.8331, ...], [25600.8340, 25604.8340, 25608.8340, ...], [50176.8320, 50180.8320, 50184.8320, ...], ], ... ] sum = 677506.500000 ggml_debug: kq-18 = (f32) MUL_MAT(k-18{128, 32, 8, 1}, q-18{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 15.5781, 19.5781, 23.5781, ...], [143.5781, 147.5781, 151.5781, ...], [271.5781, 275.5781, 279.5781, ...], ], [ [399.5781, 403.5781, 407.5781, ...], [527.5781, 531.5781, 535.5781, ...], [655.5781, 659.5781, 663.5781, ...], ], [ [783.5781, 787.5781, 791.5781, ...], [911.5781, 915.5781, 919.5781, ...], [1039.5781, 1043.5781, 1047.5781, ...], ], ... ] sum = 14352.609375 ggml_debug: kq_soft_max_ext-18 = (f32) SOFT_MAX(kq-18{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-18 = (f32) MUL_MAT(v-18{32, 128, 8, 1}, kq_soft_max_ext-18{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.1667, 3.8333, 7.8333, ...], [511.8333, 515.8333, 519.8333, ...], [1023.8333, 1027.8333, 1031.8333, ...], ], [ [1535.8333, 1539.8333, 1543.8333, ...], [2047.8333, 2051.8333, 2055.8333, ...], [2559.8333, 2563.8333, 2567.8333, ...], ], [ [3071.8333, 3075.8333, 3079.8333, ...], [3583.8333, 3587.8333, 3591.8333, ...], [4095.8333, 4099.8330, 4103.8330, ...], ], ... ] sum = 55399.492188 ggml_debug: kqv_merged-18 = (f32) PERMUTE(kqv-18{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1667, 3.8333, 7.8333, ...], [1535.8333, 1539.8333, 1543.8333, ...], [3071.8333, 3075.8333, 3079.8333, ...], ... ], [ [511.8333, 515.8333, 519.8333, ...], [2047.8333, 2051.8333, 2055.8333, ...], [3583.8333, 3587.8333, 3591.8333, ...], ... ], [ [1023.8333, 1027.8333, 1031.8333, ...], [2559.8333, 2563.8333, 2567.8333, ...], [4095.8333, 4099.8330, 4103.8330, ...], ... ], ] sum = 55399.492188 ggml_debug: kqv_merged_cont-18 = (f32) CONT(kqv_merged-18{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1667, 3.8333, 7.8333, ...], [24575.8340, 24579.8340, 24583.8340, ...], [49151.8320, 49155.8320, 49159.8320, ...], ], ] sum = 221218.484375 ggml_debug: kqv_out-18 = (f32) MUL_MAT(blk.18.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-18{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1213, 3.8787, 7.8787, ...], [24575.8789, 24579.8789, 24583.8789, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], ] sum = 221218.906250 ggml_debug: norm-18 = (f32) NORM(kqv_out-18{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.9279, 3.0721, 7.0721, ...], [24575.0723, 24579.0723, 24583.0723, ...], [49151.0703, 49155.0703, 49159.0703, ...], ], ] sum = 221211.625000 ggml_debug: attn_out_norm-18 = (f32) MUL(norm-18{6144, 3, 1, 1}, blk.18.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3479, 3.6521, 7.6521, ...], [24575.6523, 24579.6523, 24583.6523, ...], [49151.6523, 49155.6523, 49159.6523, ...], ], ] sum = 221216.875000 ggml_debug: ffn_moe_logits-18 = (f32) MUL_MAT(blk.18.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.0017, 4.0017, 8.0017, ...], [ 64.0017, 68.0017, 72.0017, ...], [128.0017, 132.0017, 136.0017, ...], ], ] sum = 612.015381 ggml_debug: ffn_moe_probs-18 = (f32) SOFT_MAX(ffn_moe_logits-18{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0616, 4.0616, 8.0616, ...], [ 64.0616, 68.0616, 72.0616, ...], [128.0616, 132.0616, 136.0616, ...], ], ] sum = 612.554260 ggml_debug: ffn_moe_argsort-18 = (i32) ARGSORT(ffn_moe_probs-18{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 7.0000, 11.0000, 15.0000, ...], [ 71.0000, 75.0000, 79.0000, ...], [135.0000, 139.0000, 143.0000, ...], ], ] sum = 675.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-18{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 7.0000, 11.0000, 15.0000, ...], [ 71.0000, 75.0000, 79.0000, ...], [135.0000, 139.0000, 143.0000, ...], ], ] sum = 675.000000 ggml_debug: ffn_moe_up-18 = (f32) MUL_MAT_ID(blk.18.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.0088, 2.9912, 6.9912, ...], [43006.9922, 43010.9922, 43014.9922, ...], [86014.9922, 86018.9922, 86022.9922, ...], ], ] sum = 387098.937500 ggml_debug: ffn_moe_gate-18 = (f32) MUL_MAT_ID(blk.18.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4052, 4.4052, 8.4052, ...], [43008.4062, 43012.4062, 43016.4062, ...], [86016.4062, 86020.4062, 86024.4062, ...], ], ] sum = 387111.656250 ggml_debug: ffn_moe_silu-18 = (f32) UNARY(ffn_moe_gate-18{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2431, 4.2431, 8.2431, ...], [43008.2422, 43012.2422, 43016.2422, ...], [86016.2422, 86020.2422, 86024.2422, ...], ], ] sum = 387110.187500 ggml_debug: ffn_moe_gate_par-18 = (f32) MUL(ffn_moe_up-18{10752, 3, 1, 1}, ffn_moe_silu-18{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2452, 3.7548, 7.7548, ...], [43007.7539, 43011.7539, 43015.7539, ...], [86015.7578, 86019.7578, 86023.7578, ...], ], ] sum = 387105.781250 ggml_debug: ffn_moe_down-18 = (f32) MUL_MAT_ID(blk.18.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-18{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1196, 3.8804, 7.8804, ...], [24575.8809, 24579.8809, 24583.8809, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], ] sum = 221218.906250 ggml_debug: ffn_moe_probs-18 (reshaped) = (f32) RESHAPE(ffn_moe_probs-18{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0616], [ 4.0616], [ 8.0616], ... ], [ [ 64.0616], [ 68.0616], [ 72.0616], ... ], [ [128.0616], [132.0616], [136.0616], ... ], ] sum = 612.554260 ggml_debug: ffn_moe_weights-18 = (f32) GET_ROWS(ffn_moe_probs-18 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0869], [ 4.0869], [ 8.0869], ... ], [ [ 16.0869], [ 20.0869], [ 24.0869], ... ], [ [ 32.0869], [ 36.0869], [ 40.0869], ... ], ] sum = 180.782242 ggml_debug: ffn_moe_weights-18 (reshaped) = (f32) RESHAPE(ffn_moe_weights-18{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0869, 4.0869, 8.0869, ...], [ 16.0869, 20.0869, 24.0869, ...], [ 32.0869, 36.0869, 40.0869, ...], ], ] sum = 180.782242 ggml_debug: ffn_moe_weights_sum-18 = (f32) SUM_ROWS(ffn_moe_weights-18 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2995], [ 4.2995], [ 8.2995], ], ] sum = 12.898492 ggml_debug: ffn_moe_weights_norm-18 = (f32) DIV(ffn_moe_weights-18 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-18{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2902, 4.2902, 8.2902, ...], [ 16.2902, 20.2902, 24.2902, ...], [ 32.2902, 36.2902, 40.2902, ...], ], ] sum = 182.611862 ggml_debug: ffn_moe_weights_norm-18 (view) = (f32) VIEW(ffn_moe_weights_norm-18{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2902], [ 16.2902], [ 32.2902], ], ] sum = 48.870625 ggml_debug: ffn_moe_weighted-18 = (f32) MUL(ffn_moe_down-18{6144, 3, 1, 1}, ffn_moe_weights_norm-18 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0347, 3.9653, 7.9653, ...], [24575.9648, 24579.9648, 24583.9648, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.703125 ggml_debug: ffn_moe_up-18 = (f32) MUL_MAT_ID(blk.18.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1483, 3.8517, 7.8517, ...], [43007.8516, 43011.8516, 43015.8516, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.656250 ggml_debug: ffn_moe_gate-18 = (f32) MUL_MAT_ID(blk.18.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1845, 3.8155, 7.8155, ...], [43007.8164, 43011.8164, 43015.8164, ...], [86015.8125, 86019.8125, 86023.8125, ...], ], ] sum = 387106.312500 ggml_debug: ffn_moe_silu-18 = (f32) UNARY(ffn_moe_gate-18{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0837, 3.9163, 7.9163, ...], [43007.9180, 43011.9180, 43015.9180, ...], [86015.9141, 86019.9141, 86023.9141, ...], ], ] sum = 387107.218750 ggml_debug: ffn_moe_gate_par-18 = (f32) MUL(ffn_moe_up-18{10752, 3, 1, 1}, ffn_moe_silu-18{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0124, 4.0124, 8.0124, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_down-18 = (f32) MUL_MAT_ID(blk.18.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-18{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0968, 4.0968, 8.0968, ...], [24576.0977, 24580.0977, 24584.0977, ...], [49152.0977, 49156.0977, 49160.0977, ...], ], ] sum = 221220.859375 ggml_debug: ffn_moe_weights_norm-18 (view) = (f32) VIEW(ffn_moe_weights_norm-18{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2476], [ 16.2476], [ 32.2476], ], ] sum = 48.742779 ggml_debug: ffn_moe_weighted-18 = (f32) MUL(ffn_moe_down-18{6144, 3, 1, 1}, ffn_moe_weights_norm-18 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0240, 4.0240, 8.0240, ...], [24576.0234, 24580.0234, 24584.0234, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: ffn_moe_out-18 = (f32) ADD(ffn_moe_weighted-18{6144, 3, 1, 1}, ffn_moe_weighted-18{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0108, 3.9892, 7.9892, ...], [24575.9883, 24579.9883, 24583.9883, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_up-18 = (f32) MUL_MAT_ID(blk.18.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0021, 3.9979, 7.9979, ...], [43007.9961, 43011.9961, 43015.9961, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_gate-18 = (f32) MUL_MAT_ID(blk.18.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2208, 4.2208, 8.2208, ...], [43008.2227, 43012.2227, 43016.2227, ...], [86016.2188, 86020.2188, 86024.2188, ...], ], ] sum = 387109.968750 ggml_debug: ffn_moe_silu-18 = (f32) UNARY(ffn_moe_gate-18{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1225, 4.1225, 8.1225, ...], [43008.1211, 43012.1211, 43016.1211, ...], [86016.1250, 86020.1250, 86024.1250, ...], ], ] sum = 387109.125000 ggml_debug: ffn_moe_gate_par-18 = (f32) MUL(ffn_moe_up-18{10752, 3, 1, 1}, ffn_moe_silu-18{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0003, 3.9997, 7.9997, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-18 = (f32) MUL_MAT_ID(blk.18.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-18{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2005, 4.2005, 8.2005, ...], [24576.2012, 24580.2012, 24584.2012, ...], [49152.1992, 49156.1992, 49160.1992, ...], ], ] sum = 221221.812500 ggml_debug: ffn_moe_weights_norm-18 (view) = (f32) VIEW(ffn_moe_weights_norm-18{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2363], [ 16.2363], [ 32.2363], ], ] sum = 48.708763 ggml_debug: ffn_moe_weighted-18 = (f32) MUL(ffn_moe_down-18{6144, 3, 1, 1}, ffn_moe_weights_norm-18 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0474, 4.0474, 8.0474, ...], [24576.0469, 24580.0469, 24584.0469, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: ffn_moe_out-18 = (f32) ADD(ffn_moe_out-18{6144, 3, 1, 1}, ffn_moe_weighted-18{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0366, 4.0366, 8.0366, ...], [24576.0371, 24580.0371, 24584.0371, ...], [49152.0352, 49156.0352, 49160.0352, ...], ], ] sum = 221220.312500 ggml_debug: ffn_moe_up-18 = (f32) MUL_MAT_ID(blk.18.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.9160, 3.0840, 7.0840, ...], [43007.0820, 43011.0820, 43015.0820, ...], [86015.0859, 86019.0859, 86023.0859, ...], ], ] sum = 387099.781250 ggml_debug: ffn_moe_gate-18 = (f32) MUL_MAT_ID(blk.18.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-18{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1907, 3.8093, 7.8093, ...], [43007.8086, 43011.8086, 43015.8086, ...], [86015.8125, 86019.8125, 86023.8125, ...], ], ] sum = 387106.312500 ggml_debug: ffn_moe_silu-18 = (f32) UNARY(ffn_moe_gate-18{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0863, 3.9137, 7.9137, ...], [43007.9141, 43011.9141, 43015.9141, ...], [86015.9141, 86019.9141, 86023.9141, ...], ], ] sum = 387107.218750 ggml_debug: ffn_moe_gate_par-18 = (f32) MUL(ffn_moe_up-18{10752, 3, 1, 1}, ffn_moe_silu-18{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0790, 4.0790, 8.0790, ...], [43008.0781, 43012.0781, 43016.0781, ...], [86016.0781, 86020.0781, 86024.0781, ...], ], ] sum = 387108.687500 ggml_debug: ffn_moe_down-18 = (f32) MUL_MAT_ID(blk.18.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-18{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1874, 4.1874, 8.1874, ...], [24576.1875, 24580.1875, 24584.1875, ...], [49152.1875, 49156.1875, 49160.1875, ...], ], ] sum = 221221.687500 ggml_debug: ffn_moe_weights_norm-18 (view) = (f32) VIEW(ffn_moe_weights_norm-18{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2259], [ 16.2259], [ 32.2259], ], ] sum = 48.677834 ggml_debug: ffn_moe_weighted-18 = (f32) MUL(ffn_moe_down-18{6144, 3, 1, 1}, ffn_moe_weights_norm-18 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0423, 4.0423, 8.0423, ...], [24576.0430, 24580.0430, 24584.0430, ...], [49152.0430, 49156.0430, 49160.0430, ...], ], ] sum = 221220.390625 ggml_debug: ffn_moe_out-18 = (f32) ADD(ffn_moe_out-18{6144, 3, 1, 1}, ffn_moe_weighted-18{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0790, 4.0790, 8.0790, ...], [24576.0781, 24580.0781, 24584.0781, ...], [49152.0781, 49156.0781, 49160.0781, ...], ], ] sum = 221220.703125 ggml_debug: ffn_inp-18 = (f32) ADD(kqv_out-18{6144, 3, 1, 1}, l_out-17{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3500, 3.6500, 7.6500, ...], [24575.6504, 24579.6504, 24583.6504, ...], [49151.6484, 49155.6484, 49159.6484, ...], ], ] sum = 221216.843750 ggml_debug: l_out-18 = (f32) ADD(ffn_moe_out-18{6144, 3, 1, 1}, ffn_inp-18{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2711, 3.7289, 7.7289, ...], [24575.7285, 24579.7285, 24583.7285, ...], [49151.7305, 49155.7305, 49159.7305, ...], ], ] sum = 221217.578125 ggml_debug: norm-19 = (f32) NORM(l_out-18{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.4180, 3.5820, 7.5820, ...], [24575.5820, 24579.5820, 24583.5820, ...], [49151.5820, 49155.5820, 49159.5820, ...], ], ] sum = 221216.234375 ggml_debug: attn_norm-19 = (f32) MUL(norm-19{6144, 3, 1, 1}, blk.19.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1184, 3.8816, 7.8816, ...], [24575.8809, 24579.8809, 24583.8809, ...], [49151.8828, 49155.8828, 49159.8828, ...], ], ] sum = 221218.937500 ggml_debug: wqkv-19 = (f32) MUL_MAT(blk.19.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-19{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.1192, 3.8808, 7.8808, ...], [32767.8809, 32771.8789, 32775.8789, ...], [65535.8789, 65539.8828, 65543.8828, ...], ], ] sum = 294946.906250 ggml_debug: wqkv_clamped-19 = (f32) CLAMP(wqkv-19{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.1192, 3.8808, 7.8808, ...], [32767.8809, 32771.8789, 32775.8789, ...], [65535.8789, 65539.8828, 65543.8828, ...], ], ] sum = 294946.906250 ggml_debug: wqkv_clamped-19 (view) = (f32) VIEW(wqkv_clamped-19{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1192, 3.8808, 7.8808, ...], [32767.8809, 32771.8789, 32775.8789, ...], [65535.8789, 65539.8828, 65543.8828, ...], ], ] sum = 294946.906250 ggml_debug: Qcur-19 = (f32) CONT(wqkv_clamped-19 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1192, 3.8808, 7.8808, ...], [24575.8809, 24579.8809, 24583.8809, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], ] sum = 221218.921875 ggml_debug: Qcur-19 (reshaped) = (f32) RESHAPE(Qcur-19{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1192, 3.8808, 7.8808, ...], [511.8808, 515.8808, 519.8808, ...], [1023.8808, 1027.8809, 1031.8809, ...], ... ], [ [24575.8809, 24579.8809, 24583.8809, ...], [25087.8809, 25091.8809, 25095.8809, ...], [25599.8809, 25603.8809, 25607.8809, ...], ... ], [ [49151.8789, 49155.8789, 49159.8789, ...], [49663.8789, 49667.8789, 49671.8789, ...], [50175.8789, 50179.8789, 50183.8789, ...], ... ], ] sum = 677480.750000 ggml_debug: Qcur-19 = (f32) ROPE(Qcur-19 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.1192, 3.8808, 7.8808, ...], [511.8808, 515.8808, 519.8808, ...], [1023.8808, 1027.8809, 1031.8809, ...], ... ], [ [24575.8809, 24579.8809, 24583.8809, ...], [25087.8809, 25091.8809, 25095.8809, ...], [25599.8809, 25603.8809, 25607.8809, ...], ... ], [ [49151.8789, 49155.8789, 49159.8789, ...], [49663.8789, 49667.8789, 49671.8789, ...], [50175.8789, 50179.8789, 50183.8789, ...], ... ], ] sum = 677480.750000 ggml_debug: wqkv_clamped-19 (view) = (f32) VIEW(wqkv_clamped-19{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.4818, 2.5182, 6.5182, ...], [32766.5176, 32770.5195, 32774.5195, ...], [65534.5195, 65538.5156, 65542.5156, ...], ], ] sum = 294934.656250 ggml_debug: Kcur-19 = (f32) CONT(wqkv_clamped-19 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.4818, 2.5182, 6.5182, ...], [4094.5181, 4098.5181, 4102.5181, ...], [8190.5181, 8194.5186, 8198.5186, ...], ], ] sum = 36886.664062 ggml_debug: Kcur-19 (reshaped) = (f32) RESHAPE(Kcur-19{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -1.4818, 2.5182, 6.5182, ...], [510.5182, 514.5182, 518.5182, ...], [1022.5182, 1026.5182, 1030.5182, ...], ... ], [ [4094.5181, 4098.5181, 4102.5181, ...], [4606.5181, 4610.5181, 4614.5181, ...], [5118.5181, 5122.5181, 5126.5181, ...], ... ], [ [8190.5181, 8194.5186, 8198.5186, ...], [8702.5186, 8706.5186, 8710.5186, ...], [9214.5186, 9218.5186, 9222.5186, ...], ... ], ] sum = 124483.976562 ggml_debug: Kcur-19 = (f32) ROPE(Kcur-19 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -1.4818, 2.5182, 6.5182, ...], [510.5182, 514.5182, 518.5182, ...], [1022.5182, 1026.5182, 1030.5182, ...], ... ], [ [4094.5181, 4098.5181, 4102.5181, ...], [4606.5181, 4610.5181, 4614.5181, ...], [5118.5181, 5122.5181, 5126.5181, ...], ... ], [ [8190.5181, 8194.5186, 8198.5186, ...], [8702.5186, 8706.5186, 8710.5186, ...], [9214.5186, 9218.5186, 9222.5186, ...], ... ], ] sum = 124483.976562 ggml_debug: wqkv_clamped-19 (view) = (f32) VIEW(wqkv_clamped-19{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0213, 3.9787, 7.9787, ...], [32767.9785, 32771.9805, 32775.9805, ...], [65535.9805, 65539.9766, 65543.9766, ...], ], ] sum = 294947.812500 ggml_debug: Vcur-19 = (f32) CONT(wqkv_clamped-19 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0213, 3.9787, 7.9787, ...], [4095.9788, 4099.9785, 4103.9785, ...], [8191.9785, 8195.9785, 8199.9785, ...], ], ] sum = 36899.804688 ggml_debug: k_cache_view-19 = (f16) VIEW(cache_k_l19{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-19 (copy of Kcur-19) = (f16) CPY(Kcur-19{128, 8, 3, 1}, k_cache_view-19{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -1.4814, -1.4834, -1.4854, ...], ], ] sum = -4.450195 ggml_debug: v_cur_t-19 = (f32) TRANSPOSE(Vcur-19{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0213, 4095.9788, 8191.9785], [ 3.9787, 4099.9785, 8195.9785], [ 7.9787, 4103.9785, 8199.9785], ... ], ] sum = 36899.804688 ggml_debug: v_cache_view-19 = (f16) VIEW(cache_v_l19{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-19 (copy of v_cur_t-19) = (f16) CPY(v_cur_t-19{3, 1024, 1, 1}, v_cache_view-19{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0213, -0.0214, -0.0214], [ -0.0427, -0.0428, -0.0428], [ -0.0854, -0.0855, -0.0856], ... ], ] sum = -0.448929 ggml_debug: v-19 = (f16) VIEW(cache_v_l19{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0213, -0.0214, -0.0214, ...], [ -0.0427, -0.0428, -0.0428, ...], [ -0.0854, -0.0855, -0.0856, ...], ... ], [ [ -0.0213, -0.0214, -0.0214, ...], [ -0.0427, -0.0428, -0.0428, ...], [ -0.0854, -0.0855, -0.0856, ...], ... ], [ [ -0.0213, -0.0214, -0.0214, ...], [ -0.0427, -0.0428, -0.0428, ...], [ -0.0854, -0.0855, -0.0856, ...], ... ], ... ] sum = -1.346786 ggml_debug: k-19 = (f16) VIEW(cache_k_l19{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -1.4814, -1.4834, -1.4854, ...], [ -5.9258, -5.9336, -5.9414, ...], [-23.7031, -23.7344, -23.7656, ...], ... ], [ [ -1.7314, -1.7334, -1.7354, ...], [ -6.9258, -6.9336, -6.9414, ...], [-27.7031, -27.7344, -27.7656, ...], ... ], [ [ -1.9814, -1.9834, -1.9854, ...], [ -7.9258, -7.9336, -7.9414, ...], [-31.7031, -31.7344, -31.7656, ...], ... ], ... ] sum = -327.612305 ggml_debug: q-19 = (f32) PERMUTE(Qcur-19{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.1192, 3.8808, 7.8808, ...], [24575.8809, 24579.8809, 24583.8809, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], [ [511.8808, 515.8808, 519.8808, ...], [25087.8809, 25091.8809, 25095.8809, ...], [49663.8789, 49667.8789, 49671.8789, ...], ], [ [1023.8808, 1027.8809, 1031.8809, ...], [25599.8809, 25603.8809, 25607.8809, ...], [50175.8789, 50179.8789, 50183.8789, ...], ], ... ] sum = 677480.687500 ggml_debug: kq-19 = (f32) MUL_MAT(k-19{128, 32, 8, 1}, q-19{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [-11.7031, -7.7031, -3.7031, ...], [116.2969, 120.2969, 124.2969, ...], [244.2969, 248.2969, 252.2969, ...], ], [ [372.2969, 376.2969, 380.2969, ...], [500.2969, 504.2969, 508.2969, ...], [628.2969, 632.2969, 636.2969, ...], ], [ [756.2969, 760.2969, 764.2969, ...], [884.2969, 888.2969, 892.2969, ...], [1012.2969, 1016.2969, 1020.2969, ...], ], ... ] sum = 13616.015625 ggml_debug: kq_soft_max_ext-19 = (f32) SOFT_MAX(kq-19{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-19 = (f32) MUL_MAT(v-19{32, 128, 8, 1}, kq_soft_max_ext-19{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0213, 3.9787, 7.9787, ...], [511.9786, 515.9786, 519.9786, ...], [1023.9786, 1027.9786, 1031.9786, ...], ], [ [1535.9786, 1539.9786, 1543.9786, ...], [2047.9786, 2051.9788, 2055.9788, ...], [2559.9788, 2563.9788, 2567.9788, ...], ], [ [3071.9788, 3075.9788, 3079.9788, ...], [3583.9788, 3587.9788, 3591.9788, ...], [4095.9788, 4099.9785, 4103.9785, ...], ], ... ] sum = 55403.429688 ggml_debug: kqv_merged-19 = (f32) PERMUTE(kqv-19{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0213, 3.9787, 7.9787, ...], [1535.9786, 1539.9786, 1543.9786, ...], [3071.9788, 3075.9788, 3079.9788, ...], ... ], [ [511.9786, 515.9786, 519.9786, ...], [2047.9786, 2051.9788, 2055.9788, ...], [3583.9788, 3587.9788, 3591.9788, ...], ... ], [ [1023.9786, 1027.9786, 1031.9786, ...], [2559.9788, 2563.9788, 2567.9788, ...], [4095.9788, 4099.9785, 4103.9785, ...], ... ], ] sum = 55403.429688 ggml_debug: kqv_merged_cont-19 = (f32) CONT(kqv_merged-19{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0213, 3.9787, 7.9787, ...], [24575.9785, 24579.9785, 24583.9785, ...], [49151.9805, 49155.9805, 49159.9805, ...], ], ] sum = 221219.812500 ggml_debug: kqv_out-19 = (f32) MUL_MAT(blk.19.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-19{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0815, 3.9185, 7.9185, ...], [24575.9180, 24579.9180, 24583.9180, ...], [49151.9180, 49155.9180, 49159.9180, ...], ], ] sum = 221219.281250 ggml_debug: norm-19 = (f32) NORM(kqv_out-19{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.4092, 3.5908, 7.5908, ...], [24575.5918, 24579.5918, 24583.5918, ...], [49151.5898, 49155.5898, 49159.5898, ...], ], ] sum = 221216.328125 ggml_debug: attn_out_norm-19 = (f32) MUL(norm-19{6144, 3, 1, 1}, blk.19.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1598, 3.8402, 7.8402, ...], [24575.8398, 24579.8398, 24583.8398, ...], [49151.8398, 49155.8398, 49159.8398, ...], ], ] sum = 221218.578125 ggml_debug: ffn_moe_logits-19 = (f32) MUL_MAT(blk.19.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.0137, 3.9863, 7.9863, ...], [ 63.9863, 67.9863, 71.9863, ...], [127.9863, 131.9863, 135.9863, ...], ], ] sum = 611.876709 ggml_debug: ffn_moe_probs-19 = (f32) SOFT_MAX(ffn_moe_logits-19{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0604, 4.0604, 8.0604, ...], [ 64.0604, 68.0604, 72.0604, ...], [128.0604, 132.0604, 136.0604, ...], ], ] sum = 612.543884 ggml_debug: ffn_moe_argsort-19 = (i32) ARGSORT(ffn_moe_probs-19{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 15.0000, 19.0000, 23.0000, ...], [ 79.0000, 83.0000, 87.0000, ...], [143.0000, 147.0000, 151.0000, ...], ], ] sum = 747.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-19{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 15.0000, 19.0000, 23.0000, ...], [ 79.0000, 83.0000, 87.0000, ...], [143.0000, 147.0000, 151.0000, ...], ], ] sum = 747.000000 ggml_debug: ffn_moe_up-19 = (f32) MUL_MAT_ID(blk.19.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7111, 3.2889, 7.2889, ...], [43007.2891, 43011.2891, 43015.2891, ...], [86015.2891, 86019.2891, 86023.2891, ...], ], ] sum = 387101.593750 ggml_debug: ffn_moe_gate-19 = (f32) MUL_MAT_ID(blk.19.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6029, 4.6029, 8.6029, ...], [43008.6016, 43012.6016, 43016.6016, ...], [86016.6016, 86020.6016, 86024.6016, ...], ], ] sum = 387113.406250 ggml_debug: ffn_moe_silu-19 = (f32) UNARY(ffn_moe_gate-19{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.3897, 4.3897, 8.3897, ...], [43008.3906, 43012.3906, 43016.3906, ...], [86016.3906, 86020.3906, 86024.3906, ...], ], ] sum = 387111.500000 ggml_debug: ffn_moe_gate_par-19 = (f32) MUL(ffn_moe_up-19{10752, 3, 1, 1}, ffn_moe_silu-19{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2771, 3.7229, 7.7229, ...], [43007.7227, 43011.7227, 43015.7227, ...], [86015.7266, 86019.7266, 86023.7266, ...], ], ] sum = 387105.500000 ggml_debug: ffn_moe_down-19 = (f32) MUL_MAT_ID(blk.19.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-19{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0027, 4.0027, 8.0027, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_probs-19 (reshaped) = (f32) RESHAPE(ffn_moe_probs-19{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0604], [ 4.0604], [ 8.0604], ... ], [ [ 64.0604], [ 68.0604], [ 72.0604], ... ], [ [128.0604], [132.0604], [136.0604], ... ], ] sum = 612.543884 ggml_debug: ffn_moe_weights-19 = (f32) GET_ROWS(ffn_moe_probs-19 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0772], [ 4.0772], [ 8.0772], ... ], [ [ 16.0772], [ 20.0772], [ 24.0772], ... ], [ [ 32.0772], [ 36.0772], [ 40.0772], ... ], ] sum = 180.695160 ggml_debug: ffn_moe_weights-19 (reshaped) = (f32) RESHAPE(ffn_moe_weights-19{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0772, 4.0772, 8.0772, ...], [ 16.0772, 20.0772, 24.0772, ...], [ 32.0772, 36.0772, 40.0772, ...], ], ] sum = 180.695160 ggml_debug: ffn_moe_weights_sum-19 = (f32) SUM_ROWS(ffn_moe_weights-19 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2897], [ 4.2897], [ 8.2897], ], ] sum = 12.869178 ggml_debug: ffn_moe_weights_norm-19 = (f32) DIV(ffn_moe_weights-19 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-19{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2666, 4.2666, 8.2666, ...], [ 16.2666, 20.2666, 24.2666, ...], [ 32.2666, 36.2666, 40.2666, ...], ], ] sum = 182.399384 ggml_debug: ffn_moe_weights_norm-19 (view) = (f32) VIEW(ffn_moe_weights_norm-19{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2666], [ 16.2666], [ 32.2666], ], ] sum = 48.799793 ggml_debug: ffn_moe_weighted-19 = (f32) MUL(ffn_moe_down-19{6144, 3, 1, 1}, ffn_moe_weights_norm-19 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0007, 4.0007, 8.0007, ...], [24576.0000, 24580.0000, 24584.0000, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_up-19 = (f32) MUL_MAT_ID(blk.19.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2996, 4.2996, 8.2996, ...], [43008.3008, 43012.3008, 43016.3008, ...], [86016.2969, 86020.2969, 86024.2969, ...], ], ] sum = 387110.687500 ggml_debug: ffn_moe_gate-19 = (f32) MUL_MAT_ID(blk.19.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0498, 3.9502, 7.9502, ...], [43007.9492, 43011.9492, 43015.9492, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_silu-19 = (f32) UNARY(ffn_moe_gate-19{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0243, 3.9757, 7.9757, ...], [43007.9766, 43011.9766, 43015.9766, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.781250 ggml_debug: ffn_moe_gate_par-19 = (f32) MUL(ffn_moe_up-19{10752, 3, 1, 1}, ffn_moe_silu-19{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0073, 3.9927, 7.9927, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-19 = (f32) MUL_MAT_ID(blk.19.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-19{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0634, 4.0634, 8.0634, ...], [24576.0625, 24580.0625, 24584.0625, ...], [49152.0625, 49156.0625, 49160.0625, ...], ], ] sum = 221220.562500 ggml_debug: ffn_moe_weights_norm-19 (view) = (f32) VIEW(ffn_moe_weights_norm-19{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2553], [ 16.2553], [ 32.2553], ], ] sum = 48.765873 ggml_debug: ffn_moe_weighted-19 = (f32) MUL(ffn_moe_down-19{6144, 3, 1, 1}, ffn_moe_weights_norm-19 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0162, 4.0162, 8.0162, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_out-19 = (f32) ADD(ffn_moe_weighted-19{6144, 3, 1, 1}, ffn_moe_weighted-19{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0169, 4.0169, 8.0169, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_up-19 = (f32) MUL_MAT_ID(blk.19.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2106, 3.7894, 7.7894, ...], [43007.7891, 43011.7891, 43015.7891, ...], [86015.7891, 86019.7891, 86023.7891, ...], ], ] sum = 387106.093750 ggml_debug: ffn_moe_gate-19 = (f32) MUL_MAT_ID(blk.19.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3050, 3.6950, 7.6950, ...], [43007.6953, 43011.6953, 43015.6953, ...], [86015.6953, 86019.6953, 86023.6953, ...], ], ] sum = 387105.250000 ggml_debug: ffn_moe_silu-19 = (f32) UNARY(ffn_moe_gate-19{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1294, 3.8706, 7.8706, ...], [43007.8711, 43011.8711, 43015.8711, ...], [86015.8672, 86019.8672, 86023.8672, ...], ], ] sum = 387106.843750 ggml_debug: ffn_moe_gate_par-19 = (f32) MUL(ffn_moe_up-19{10752, 3, 1, 1}, ffn_moe_silu-19{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0273, 4.0273, 8.0273, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.250000 ggml_debug: ffn_moe_down-19 = (f32) MUL_MAT_ID(blk.19.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-19{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2301, 3.7699, 7.7699, ...], [24575.7695, 24579.7695, 24583.7695, ...], [49151.7695, 49155.7695, 49159.7695, ...], ], ] sum = 221217.921875 ggml_debug: ffn_moe_weights_norm-19 (view) = (f32) VIEW(ffn_moe_weights_norm-19{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2435], [ 16.2435], [ 32.2435], ], ] sum = 48.730534 ggml_debug: ffn_moe_weighted-19 = (f32) MUL(ffn_moe_down-19{6144, 3, 1, 1}, ffn_moe_weights_norm-19 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0560, 3.9440, 7.9440, ...], [24575.9434, 24579.9434, 24583.9434, ...], [49151.9453, 49155.9453, 49159.9453, ...], ], ] sum = 221219.500000 ggml_debug: ffn_moe_out-19 = (f32) ADD(ffn_moe_out-19{6144, 3, 1, 1}, ffn_moe_weighted-19{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0391, 3.9609, 7.9609, ...], [24575.9609, 24579.9609, 24583.9609, ...], [49151.9609, 49155.9609, 49159.9609, ...], ], ] sum = 221219.656250 ggml_debug: ffn_moe_up-19 = (f32) MUL_MAT_ID(blk.19.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5023, 3.4977, 7.4977, ...], [43007.4961, 43011.4961, 43015.4961, ...], [86015.5000, 86019.5000, 86023.5000, ...], ], ] sum = 387103.500000 ggml_debug: ffn_moe_gate-19 = (f32) MUL_MAT_ID(blk.19.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-19{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.8942, 3.1058, 7.1058, ...], [43007.1055, 43011.1055, 43015.1055, ...], [86015.1094, 86019.1094, 86023.1094, ...], ], ] sum = 387100.000000 ggml_debug: ffn_moe_silu-19 = (f32) UNARY(ffn_moe_gate-19{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2595, 3.7405, 7.7405, ...], [43007.7422, 43011.7422, 43015.7422, ...], [86015.7422, 86019.7422, 86023.7422, ...], ], ] sum = 387105.687500 ggml_debug: ffn_moe_gate_par-19 = (f32) MUL(ffn_moe_up-19{10752, 3, 1, 1}, ffn_moe_silu-19{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1304, 4.1304, 8.1304, ...], [43008.1289, 43012.1289, 43016.1289, ...], [86016.1328, 86020.1328, 86024.1328, ...], ], ] sum = 387109.156250 ggml_debug: ffn_moe_down-19 = (f32) MUL_MAT_ID(blk.19.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-19{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3829, 3.6171, 7.6171, ...], [24575.6172, 24579.6172, 24583.6172, ...], [49151.6172, 49155.6172, 49159.6172, ...], ], ] sum = 221216.562500 ggml_debug: ffn_moe_weights_norm-19 (view) = (f32) VIEW(ffn_moe_weights_norm-19{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2346], [ 16.2346], [ 32.2346], ], ] sum = 48.703793 ggml_debug: ffn_moe_weighted-19 = (f32) MUL(ffn_moe_down-19{6144, 3, 1, 1}, ffn_moe_weights_norm-19 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0898, 3.9102, 7.9102, ...], [24575.9102, 24579.9102, 24583.9102, ...], [49151.9102, 49155.9102, 49159.9102, ...], ], ] sum = 221219.187500 ggml_debug: ffn_moe_out-19 = (f32) ADD(ffn_moe_out-19{6144, 3, 1, 1}, ffn_moe_weighted-19{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1290, 3.8710, 7.8710, ...], [24575.8711, 24579.8711, 24583.8711, ...], [49151.8711, 49155.8711, 49159.8711, ...], ], ] sum = 221218.843750 ggml_debug: ffn_inp-19 = (f32) ADD(kqv_out-19{6144, 3, 1, 1}, l_out-18{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3525, 3.6475, 7.6475, ...], [24575.6465, 24579.6465, 24583.6465, ...], [49151.6484, 49155.6484, 49159.6484, ...], ], ] sum = 221216.843750 ggml_debug: l_out-19 = (f32) ADD(ffn_moe_out-19{6144, 3, 1, 1}, ffn_inp-19{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4815, 3.5185, 7.5185, ...], [24575.5176, 24579.5176, 24583.5176, ...], [49151.5195, 49155.5195, 49159.5195, ...], ], ] sum = 221215.656250 ggml_debug: norm-20 = (f32) NORM(l_out-19{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6204, 3.3796, 7.3796, ...], [24575.3789, 24579.3789, 24583.3789, ...], [49151.3789, 49155.3789, 49159.3789, ...], ], ] sum = 221214.406250 ggml_debug: attn_norm-20 = (f32) MUL(norm-20{6144, 3, 1, 1}, blk.20.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1866, 3.8134, 7.8134, ...], [24575.8125, 24579.8125, 24583.8125, ...], [49151.8125, 49155.8125, 49159.8125, ...], ], ] sum = 221218.312500 ggml_debug: wqkv-20 = (f32) MUL_MAT(blk.20.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-20{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.5003, 4.5003, 8.5003, ...], [32768.5000, 32772.5000, 32776.5000, ...], [65536.5000, 65540.5000, 65544.5000, ...], ], ] sum = 294952.500000 ggml_debug: wqkv_clamped-20 = (f32) CLAMP(wqkv-20{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.5003, 4.5003, 8.5003, ...], [32768.5000, 32772.5000, 32776.5000, ...], [65536.5000, 65540.5000, 65544.5000, ...], ], ] sum = 294952.500000 ggml_debug: wqkv_clamped-20 (view) = (f32) VIEW(wqkv_clamped-20{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.5003, 4.5003, 8.5003, ...], [32768.5000, 32772.5000, 32776.5000, ...], [65536.5000, 65540.5000, 65544.5000, ...], ], ] sum = 294952.500000 ggml_debug: Qcur-20 = (f32) CONT(wqkv_clamped-20 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.5003, 4.5003, 8.5003, ...], [24576.5000, 24580.5000, 24584.5000, ...], [49152.5000, 49156.5000, 49160.5000, ...], ], ] sum = 221224.500000 ggml_debug: Qcur-20 (reshaped) = (f32) RESHAPE(Qcur-20{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.5003, 4.5003, 8.5003, ...], [512.5002, 516.5002, 520.5002, ...], [1024.5002, 1028.5002, 1032.5002, ...], ... ], [ [24576.5000, 24580.5000, 24584.5000, ...], [25088.5000, 25092.5000, 25096.5000, ...], [25600.5000, 25604.5000, 25608.5000, ...], ... ], [ [49152.5000, 49156.5000, 49160.5000, ...], [49664.5000, 49668.5000, 49672.5000, ...], [50176.5000, 50180.5000, 50184.5000, ...], ... ], ] sum = 677497.500000 ggml_debug: Qcur-20 = (f32) ROPE(Qcur-20 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.5003, 4.5003, 8.5003, ...], [512.5002, 516.5002, 520.5002, ...], [1024.5002, 1028.5002, 1032.5002, ...], ... ], [ [24576.5000, 24580.5000, 24584.5000, ...], [25088.5000, 25092.5000, 25096.5000, ...], [25600.5000, 25604.5000, 25608.5000, ...], ... ], [ [49152.5000, 49156.5000, 49160.5000, ...], [49664.5000, 49668.5000, 49672.5000, ...], [50176.5000, 50180.5000, 50184.5000, ...], ... ], ] sum = 677497.500000 ggml_debug: wqkv_clamped-20 (view) = (f32) VIEW(wqkv_clamped-20{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4525, 4.4525, 8.4525, ...], [32768.4531, 32772.4531, 32776.4531, ...], [65536.4531, 65540.4531, 65544.4531, ...], ], ] sum = 294952.062500 ggml_debug: Kcur-20 = (f32) CONT(wqkv_clamped-20 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4525, 4.4525, 8.4525, ...], [4096.4526, 4100.4526, 4104.4526, ...], [8192.4521, 8196.4521, 8200.4521, ...], ], ] sum = 36904.074219 ggml_debug: Kcur-20 (reshaped) = (f32) RESHAPE(Kcur-20{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.4525, 4.4525, 8.4525, ...], [512.4525, 516.4525, 520.4525, ...], [1024.4525, 1028.4525, 1032.4525, ...], ... ], [ [4096.4526, 4100.4526, 4104.4526, ...], [4608.4526, 4612.4526, 4616.4526, ...], [5120.4526, 5124.4526, 5128.4526, ...], ... ], [ [8192.4521, 8196.4521, 8200.4521, ...], [8704.4521, 8708.4521, 8712.4521, ...], [9216.4521, 9220.4521, 9224.4521, ...], ... ], ] sum = 124536.226562 ggml_debug: Kcur-20 = (f32) ROPE(Kcur-20 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.4525, 4.4525, 8.4525, ...], [512.4525, 516.4525, 520.4525, ...], [1024.4525, 1028.4525, 1032.4525, ...], ... ], [ [4096.4526, 4100.4526, 4104.4526, ...], [4608.4526, 4612.4526, 4616.4526, ...], [5120.4526, 5124.4526, 5128.4526, ...], ... ], [ [8192.4521, 8196.4521, 8200.4521, ...], [8704.4521, 8708.4521, 8712.4521, ...], [9216.4521, 9220.4521, 9224.4521, ...], ... ], ] sum = 124536.226562 ggml_debug: wqkv_clamped-20 (view) = (f32) VIEW(wqkv_clamped-20{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1454, 3.8546, 7.8546, ...], [32767.8555, 32771.8555, 32775.8555, ...], [65535.8555, 65539.8516, 65543.8516, ...], ], ] sum = 294946.687500 ggml_debug: Vcur-20 = (f32) CONT(wqkv_clamped-20 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1454, 3.8546, 7.8546, ...], [4095.8545, 4099.8545, 4103.8545, ...], [8191.8545, 8195.8545, 8199.8545, ...], ], ] sum = 36898.691406 ggml_debug: k_cache_view-20 = (f16) VIEW(cache_k_l20{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-20 (copy of Kcur-20) = (f16) CPY(Kcur-20{128, 8, 3, 1}, k_cache_view-20{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.4524, 0.4529, 0.4534, ...], ], ] sum = 1.358643 ggml_debug: v_cur_t-20 = (f32) TRANSPOSE(Vcur-20{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.1454, 4095.8545, 8191.8545], [ 3.8546, 4099.8545, 8195.8545], [ 7.8546, 4103.8545, 8199.8545], ... ], ] sum = 36898.695312 ggml_debug: v_cache_view-20 = (f16) VIEW(cache_v_l20{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-20 (copy of v_cur_t-20) = (f16) CPY(v_cur_t-20{3, 1024, 1, 1}, v_cache_view-20{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.1454, -0.1456, -0.1459], [ -0.2908, -0.2913, -0.2917], [ -0.5815, -0.5825, -0.5835], ... ], ] sum = -3.058228 ggml_debug: v-20 = (f16) VIEW(cache_v_l20{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.1454, -0.1456, -0.1459, ...], [ -0.2908, -0.2913, -0.2917, ...], [ -0.5815, -0.5825, -0.5835, ...], ... ], [ [ -0.1454, -0.1456, -0.1459, ...], [ -0.2908, -0.2913, -0.2917, ...], [ -0.5815, -0.5825, -0.5835, ...], ... ], [ [ -0.1454, -0.1456, -0.1459, ...], [ -0.2908, -0.2913, -0.2917, ...], [ -0.5815, -0.5825, -0.5835, ...], ... ], ... ] sum = -9.174683 ggml_debug: k-20 = (f16) VIEW(cache_k_l20{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.4524, 0.4529, 0.4534, ...], [ 1.8096, 1.8115, 1.8135, ...], [ 7.2383, 7.2461, 7.2539, ...], ... ], [ [ 0.5298, 0.5308, 0.5317, ...], [ 2.1191, 2.1230, 2.1270, ...], [ 8.4766, 8.4922, 8.5078, ...], ... ], [ [ 0.6548, 0.6558, 0.6567, ...], [ 2.6191, 2.6230, 2.6270, ...], [ 10.4766, 10.4922, 10.5078, ...], ... ], ... ] sum = 103.282471 ggml_debug: q-20 = (f32) PERMUTE(Qcur-20{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.5003, 4.5003, 8.5003, ...], [24576.5000, 24580.5000, 24584.5000, ...], [49152.5000, 49156.5000, 49160.5000, ...], ], [ [512.5002, 516.5002, 520.5002, ...], [25088.5000, 25092.5000, 25096.5000, ...], [49664.5000, 49668.5000, 49672.5000, ...], ], [ [1024.5002, 1028.5002, 1032.5002, ...], [25600.5000, 25604.5000, 25608.5000, ...], [50176.5000, 50180.5000, 50184.5000, ...], ], ... ] sum = 677497.500000 ggml_debug: kq-20 = (f32) MUL_MAT(k-20{128, 32, 8, 1}, q-20{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 19.7656, 23.7656, 27.7656, ...], [147.7656, 151.7656, 155.7656, ...], [275.7656, 279.7656, 283.7656, ...], ], [ [403.7656, 407.7656, 411.7656, ...], [531.7656, 535.7656, 539.7656, ...], [659.7656, 663.7656, 667.7656, ...], ], [ [787.7656, 791.7656, 795.7656, ...], [915.7656, 919.7656, 923.7656, ...], [1043.7656, 1047.7656, 1051.7656, ...], ], ... ] sum = 14465.671875 ggml_debug: kq_soft_max_ext-20 = (f32) SOFT_MAX(kq-20{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-20 = (f32) MUL_MAT(v-20{32, 128, 8, 1}, kq_soft_max_ext-20{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.1454, 3.8546, 7.8546, ...], [511.8546, 515.8546, 519.8546, ...], [1023.8546, 1027.8546, 1031.8546, ...], ], [ [1535.8546, 1539.8546, 1543.8546, ...], [2047.8546, 2051.8545, 2055.8545, ...], [2559.8545, 2563.8545, 2567.8545, ...], ], [ [3071.8545, 3075.8545, 3079.8545, ...], [3583.8545, 3587.8545, 3591.8545, ...], [4095.8545, 4099.8545, 4103.8545, ...], ], ... ] sum = 55400.082031 ggml_debug: kqv_merged-20 = (f32) PERMUTE(kqv-20{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1454, 3.8546, 7.8546, ...], [1535.8546, 1539.8546, 1543.8546, ...], [3071.8545, 3075.8545, 3079.8545, ...], ... ], [ [511.8546, 515.8546, 519.8546, ...], [2047.8546, 2051.8545, 2055.8545, ...], [3583.8545, 3587.8545, 3591.8545, ...], ... ], [ [1023.8546, 1027.8546, 1031.8546, ...], [2559.8545, 2563.8545, 2567.8545, ...], [4095.8545, 4099.8545, 4103.8545, ...], ... ], ] sum = 55400.085938 ggml_debug: kqv_merged_cont-20 = (f32) CONT(kqv_merged-20{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1454, 3.8546, 7.8546, ...], [24575.8555, 24579.8555, 24583.8555, ...], [49151.8555, 49155.8555, 49159.8555, ...], ], ] sum = 221218.703125 ggml_debug: kqv_out-20 = (f32) MUL_MAT(blk.20.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-20{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1731, 3.8269, 7.8269, ...], [24575.8262, 24579.8262, 24583.8262, ...], [49151.8281, 49155.8281, 49159.8281, ...], ], ] sum = 221218.453125 ggml_debug: norm-20 = (f32) NORM(kqv_out-20{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.9083, 3.0917, 7.0917, ...], [24575.0918, 24579.0918, 24583.0918, ...], [49151.0898, 49155.0898, 49159.0898, ...], ], ] sum = 221211.828125 ggml_debug: attn_out_norm-20 = (f32) MUL(norm-20{6144, 3, 1, 1}, blk.20.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3690, 3.6310, 7.6310, ...], [24575.6309, 24579.6309, 24583.6309, ...], [49151.6328, 49155.6328, 49159.6328, ...], ], ] sum = 221216.687500 ggml_debug: ffn_moe_logits-20 = (f32) MUL_MAT(blk.20.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.4016, 3.5984, 7.5984, ...], [ 63.5984, 67.5984, 71.5984, ...], [127.5984, 131.5984, 135.5984, ...], ], ] sum = 608.385498 ggml_debug: ffn_moe_probs-20 = (f32) SOFT_MAX(ffn_moe_logits-20{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0370, 4.0370, 8.0370, ...], [ 64.0370, 68.0370, 72.0370, ...], [128.0370, 132.0370, 136.0370, ...], ], ] sum = 612.332764 ggml_debug: ffn_moe_argsort-20 = (i32) ARGSORT(ffn_moe_probs-20{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 3.0000, 7.0000, 11.0000, ...], [ 67.0000, 71.0000, 75.0000, ...], [131.0000, 135.0000, 139.0000, ...], ], ] sum = 639.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-20{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 3.0000, 7.0000, 11.0000, ...], [ 67.0000, 71.0000, 75.0000, ...], [131.0000, 135.0000, 139.0000, ...], ], ] sum = 639.000000 ggml_debug: ffn_moe_up-20 = (f32) MUL_MAT_ID(blk.20.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7285, 3.2715, 7.2715, ...], [43007.2734, 43011.2734, 43015.2734, ...], [86015.2734, 86019.2734, 86023.2734, ...], ], ] sum = 387101.468750 ggml_debug: ffn_moe_gate-20 = (f32) MUL_MAT_ID(blk.20.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.9045, 4.9045, 8.9045, ...], [43008.9062, 43012.9062, 43016.9062, ...], [86016.9062, 86020.9062, 86024.9062, ...], ], ] sum = 387116.156250 ggml_debug: ffn_moe_silu-20 = (f32) UNARY(ffn_moe_gate-20{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.6439, 4.6439, 8.6439, ...], [43008.6445, 43012.6445, 43016.6445, ...], [86016.6406, 86020.6406, 86024.6406, ...], ], ] sum = 387113.750000 ggml_debug: ffn_moe_gate_par-20 = (f32) MUL(ffn_moe_up-20{10752, 3, 1, 1}, ffn_moe_silu-20{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4691, 3.5309, 7.5309, ...], [43007.5312, 43011.5312, 43015.5312, ...], [86015.5312, 86019.5312, 86023.5312, ...], ], ] sum = 387103.781250 ggml_debug: ffn_moe_down-20 = (f32) MUL_MAT_ID(blk.20.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-20{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1228, 3.8772, 7.8772, ...], [24575.8770, 24579.8770, 24583.8770, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], ] sum = 221218.890625 ggml_debug: ffn_moe_probs-20 (reshaped) = (f32) RESHAPE(ffn_moe_probs-20{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0370], [ 4.0370], [ 8.0370], ... ], [ [ 64.0370], [ 68.0370], [ 72.0370], ... ], [ [128.0370], [132.0370], [136.0370], ... ], ] sum = 612.332764 ggml_debug: ffn_moe_weights-20 = (f32) GET_ROWS(ffn_moe_probs-20 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0912], [ 4.0912], [ 8.0912], ... ], [ [ 16.0912], [ 20.0912], [ 24.0912], ... ], [ [ 32.0912], [ 36.0912], [ 40.0912], ... ], ] sum = 180.820923 ggml_debug: ffn_moe_weights-20 (reshaped) = (f32) RESHAPE(ffn_moe_weights-20{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0912, 4.0912, 8.0912, ...], [ 16.0912, 20.0912, 24.0912, ...], [ 32.0912, 36.0912, 40.0912, ...], ], ] sum = 180.820923 ggml_debug: ffn_moe_weights_sum-20 = (f32) SUM_ROWS(ffn_moe_weights-20 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3210], [ 4.3210], [ 8.3210], ], ] sum = 12.963135 ggml_debug: ffn_moe_weights_norm-20 = (f32) DIV(ffn_moe_weights-20 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-20{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2841, 4.2841, 8.2841, ...], [ 16.2841, 20.2841, 24.2841, ...], [ 32.2841, 36.2841, 40.2841, ...], ], ] sum = 182.557037 ggml_debug: ffn_moe_weights_norm-20 (view) = (f32) VIEW(ffn_moe_weights_norm-20{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2841], [ 16.2841], [ 32.2841], ], ] sum = 48.852341 ggml_debug: ffn_moe_weighted-20 = (f32) MUL(ffn_moe_down-20{6144, 3, 1, 1}, ffn_moe_weights_norm-20 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0349, 3.9651, 7.9651, ...], [24575.9648, 24579.9648, 24583.9648, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.687500 ggml_debug: ffn_moe_up-20 = (f32) MUL_MAT_ID(blk.20.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.0137, 2.9863, 6.9863, ...], [43006.9844, 43010.9844, 43014.9844, ...], [86014.9844, 86018.9844, 86022.9844, ...], ], ] sum = 387098.875000 ggml_debug: ffn_moe_gate-20 = (f32) MUL_MAT_ID(blk.20.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7861, 3.2139, 7.2139, ...], [43007.2148, 43011.2148, 43015.2148, ...], [86015.2109, 86019.2109, 86023.2109, ...], ], ] sum = 387100.937500 ggml_debug: ffn_moe_silu-20 = (f32) UNARY(ffn_moe_gate-20{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2461, 3.7539, 7.7539, ...], [43007.7539, 43011.7539, 43015.7539, ...], [86015.7578, 86019.7578, 86023.7578, ...], ], ] sum = 387105.781250 ggml_debug: ffn_moe_gate_par-20 = (f32) MUL(ffn_moe_up-20{10752, 3, 1, 1}, ffn_moe_silu-20{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2494, 4.2494, 8.2494, ...], [43008.2500, 43012.2500, 43016.2500, ...], [86016.2500, 86020.2500, 86024.2500, ...], ], ] sum = 387110.250000 ggml_debug: ffn_moe_down-20 = (f32) MUL_MAT_ID(blk.20.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-20{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0804, 3.9196, 7.9196, ...], [24575.9199, 24579.9199, 24583.9199, ...], [49151.9180, 49155.9180, 49159.9180, ...], ], ] sum = 221219.281250 ggml_debug: ffn_moe_weights_norm-20 (view) = (f32) VIEW(ffn_moe_weights_norm-20{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2485], [ 16.2485], [ 32.2485], ], ] sum = 48.745422 ggml_debug: ffn_moe_weighted-20 = (f32) MUL(ffn_moe_down-20{6144, 3, 1, 1}, ffn_moe_weights_norm-20 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0200, 3.9800, 7.9800, ...], [24575.9805, 24579.9805, 24583.9805, ...], [49151.9805, 49155.9805, 49159.9805, ...], ], ] sum = 221219.828125 ggml_debug: ffn_moe_out-20 = (f32) ADD(ffn_moe_weighted-20{6144, 3, 1, 1}, ffn_moe_weighted-20{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0549, 3.9451, 7.9451, ...], [24575.9453, 24579.9453, 24583.9453, ...], [49151.9453, 49155.9453, 49159.9453, ...], ], ] sum = 221219.500000 ggml_debug: ffn_moe_up-20 = (f32) MUL_MAT_ID(blk.20.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.6689, 3.3311, 7.3311, ...], [43007.3320, 43011.3320, 43015.3320, ...], [86015.3281, 86019.3281, 86023.3281, ...], ], ] sum = 387101.937500 ggml_debug: ffn_moe_gate-20 = (f32) MUL_MAT_ID(blk.20.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0305, 4.0306, 8.0306, ...], [43008.0312, 43012.0312, 43016.0312, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_silu-20 = (f32) UNARY(ffn_moe_gate-20{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0155, 4.0155, 8.0155, ...], [43008.0156, 43012.0156, 43016.0156, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_gate_par-20 = (f32) MUL(ffn_moe_up-20{10752, 3, 1, 1}, ffn_moe_silu-20{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0104, 3.9896, 7.9896, ...], [43007.9883, 43011.9883, 43015.9883, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-20 = (f32) MUL_MAT_ID(blk.20.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-20{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0054, 4.0054, 8.0054, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_weights_norm-20 (view) = (f32) VIEW(ffn_moe_weights_norm-20{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2410], [ 16.2410], [ 32.2410], ], ] sum = 48.723019 ggml_debug: ffn_moe_weighted-20 = (f32) MUL(ffn_moe_down-20{6144, 3, 1, 1}, ffn_moe_weights_norm-20 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0013, 4.0013, 8.0013, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-20 = (f32) ADD(ffn_moe_out-20{6144, 3, 1, 1}, ffn_moe_weighted-20{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0536, 3.9464, 7.9464, ...], [24575.9473, 24579.9473, 24583.9473, ...], [49151.9453, 49155.9453, 49159.9453, ...], ], ] sum = 221219.500000 ggml_debug: ffn_moe_up-20 = (f32) MUL_MAT_ID(blk.20.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.7171, 2.2829, 6.2829, ...], [43006.2812, 43010.2812, 43014.2812, ...], [86014.2812, 86018.2812, 86022.2812, ...], ], ] sum = 387092.531250 ggml_debug: ffn_moe_gate-20 = (f32) MUL_MAT_ID(blk.20.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-20{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.6199, 3.3801, 7.3801, ...], [43007.3789, 43011.3789, 43015.3789, ...], [86015.3828, 86019.3828, 86023.3828, ...], ], ] sum = 387102.406250 ggml_debug: ffn_moe_silu-20 = (f32) UNARY(ffn_moe_gate-20{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2168, 3.7832, 7.7832, ...], [43007.7812, 43011.7812, 43015.7812, ...], [86015.7812, 86019.7812, 86023.7812, ...], ], ] sum = 387106.031250 ggml_debug: ffn_moe_gate_par-20 = (f32) MUL(ffn_moe_up-20{10752, 3, 1, 1}, ffn_moe_silu-20{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3723, 4.3723, 8.3723, ...], [43008.3711, 43012.3711, 43016.3711, ...], [86016.3750, 86020.3750, 86024.3750, ...], ], ] sum = 387111.375000 ggml_debug: ffn_moe_down-20 = (f32) MUL_MAT_ID(blk.20.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-20{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1828, 3.8172, 7.8172, ...], [24575.8164, 24579.8164, 24583.8164, ...], [49151.8164, 49155.8164, 49159.8164, ...], ], ] sum = 221218.343750 ggml_debug: ffn_moe_weights_norm-20 (view) = (f32) VIEW(ffn_moe_weights_norm-20{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2264], [ 16.2264], [ 32.2264], ], ] sum = 48.679214 ggml_debug: ffn_moe_weighted-20 = (f32) MUL(ffn_moe_down-20{6144, 3, 1, 1}, ffn_moe_weights_norm-20 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0414, 3.9586, 7.9586, ...], [24575.9590, 24579.9590, 24583.9590, ...], [49151.9570, 49155.9570, 49159.9570, ...], ], ] sum = 221219.609375 ggml_debug: ffn_moe_out-20 = (f32) ADD(ffn_moe_out-20{6144, 3, 1, 1}, ffn_moe_weighted-20{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0950, 3.9050, 7.9050, ...], [24575.9043, 24579.9043, 24583.9043, ...], [49151.9062, 49155.9062, 49159.9062, ...], ], ] sum = 221219.156250 ggml_debug: ffn_inp-20 = (f32) ADD(kqv_out-20{6144, 3, 1, 1}, l_out-19{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.6546, 3.3454, 7.3454, ...], [24575.3457, 24579.3457, 24583.3457, ...], [49151.3438, 49155.3438, 49159.3438, ...], ], ] sum = 221214.093750 ggml_debug: l_out-20 = (f32) ADD(ffn_moe_out-20{6144, 3, 1, 1}, ffn_inp-20{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.7495, 3.2505, 7.2505, ...], [24575.2500, 24579.2500, 24583.2500, ...], [49151.2500, 49155.2500, 49159.2500, ...], ], ] sum = 221213.250000 ggml_debug: norm-21 = (f32) NORM(l_out-20{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.8103, 3.1897, 7.1897, ...], [24575.1895, 24579.1895, 24583.1895, ...], [49151.1914, 49155.1914, 49159.1914, ...], ], ] sum = 221212.703125 ggml_debug: attn_norm-21 = (f32) MUL(norm-21{6144, 3, 1, 1}, blk.21.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2168, 3.7832, 7.7832, ...], [24575.7832, 24579.7832, 24583.7832, ...], [49151.7812, 49155.7812, 49159.7812, ...], ], ] sum = 221218.031250 ggml_debug: wqkv-21 = (f32) MUL_MAT(blk.21.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-21{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.7427, 4.7427, 8.7427, ...], [32768.7422, 32772.7422, 32776.7422, ...], [65536.7422, 65540.7422, 65544.7422, ...], ], ] sum = 294954.687500 ggml_debug: wqkv_clamped-21 = (f32) CLAMP(wqkv-21{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.7427, 4.7427, 8.7427, ...], [32768.7422, 32772.7422, 32776.7422, ...], [65536.7422, 65540.7422, 65544.7422, ...], ], ] sum = 294954.687500 ggml_debug: wqkv_clamped-21 (view) = (f32) VIEW(wqkv_clamped-21{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.7427, 4.7427, 8.7427, ...], [32768.7422, 32772.7422, 32776.7422, ...], [65536.7422, 65540.7422, 65544.7422, ...], ], ] sum = 294954.687500 ggml_debug: Qcur-21 = (f32) CONT(wqkv_clamped-21 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.7427, 4.7427, 8.7427, ...], [24576.7422, 24580.7422, 24584.7422, ...], [49152.7422, 49156.7422, 49160.7422, ...], ], ] sum = 221226.687500 ggml_debug: Qcur-21 (reshaped) = (f32) RESHAPE(Qcur-21{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.7427, 4.7427, 8.7427, ...], [512.7427, 516.7427, 520.7427, ...], [1024.7427, 1028.7427, 1032.7427, ...], ... ], [ [24576.7422, 24580.7422, 24584.7422, ...], [25088.7422, 25092.7422, 25096.7422, ...], [25600.7422, 25604.7422, 25608.7422, ...], ... ], [ [49152.7422, 49156.7422, 49160.7422, ...], [49664.7422, 49668.7422, 49672.7422, ...], [50176.7422, 50180.7422, 50184.7422, ...], ... ], ] sum = 677504.125000 ggml_debug: Qcur-21 = (f32) ROPE(Qcur-21 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.7427, 4.7427, 8.7427, ...], [512.7427, 516.7427, 520.7427, ...], [1024.7427, 1028.7427, 1032.7427, ...], ... ], [ [24576.7422, 24580.7422, 24584.7422, ...], [25088.7422, 25092.7422, 25096.7422, ...], [25600.7422, 25604.7422, 25608.7422, ...], ... ], [ [49152.7422, 49156.7422, 49160.7422, ...], [49664.7422, 49668.7422, 49672.7422, ...], [50176.7422, 50180.7422, 50184.7422, ...], ... ], ] sum = 677504.125000 ggml_debug: wqkv_clamped-21 (view) = (f32) VIEW(wqkv_clamped-21{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.2868, 5.2868, 9.2868, ...], [32769.2852, 32773.2852, 32777.2852, ...], [65537.2891, 65541.2891, 65545.2891, ...], ], ] sum = 294959.562500 ggml_debug: Kcur-21 = (f32) CONT(wqkv_clamped-21 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.2868, 5.2868, 9.2868, ...], [4097.2866, 4101.2866, 4105.2866, ...], [8193.2871, 8197.2871, 8201.2871, ...], ], ] sum = 36911.582031 ggml_debug: Kcur-21 (reshaped) = (f32) RESHAPE(Kcur-21{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 1.2868, 5.2868, 9.2868, ...], [513.2868, 517.2868, 521.2868, ...], [1025.2867, 1029.2867, 1033.2867, ...], ... ], [ [4097.2866, 4101.2866, 4105.2866, ...], [4609.2866, 4613.2866, 4617.2866, ...], [5121.2866, 5125.2866, 5129.2866, ...], ... ], [ [8193.2871, 8197.2871, 8201.2871, ...], [8705.2871, 8709.2871, 8713.2871, ...], [9217.2871, 9221.2871, 9225.2871, ...], ... ], ] sum = 124558.757812 ggml_debug: Kcur-21 = (f32) ROPE(Kcur-21 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 1.2868, 5.2868, 9.2868, ...], [513.2868, 517.2868, 521.2868, ...], [1025.2867, 1029.2867, 1033.2867, ...], ... ], [ [4097.2866, 4101.2866, 4105.2866, ...], [4609.2866, 4613.2866, 4617.2866, ...], [5121.2866, 5125.2866, 5129.2866, ...], ... ], [ [8193.2871, 8197.2871, 8201.2871, ...], [8705.2871, 8709.2871, 8713.2871, ...], [9217.2871, 9221.2871, 9225.2871, ...], ... ], ] sum = 124558.757812 ggml_debug: wqkv_clamped-21 (view) = (f32) VIEW(wqkv_clamped-21{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1267, 3.8733, 7.8733, ...], [32767.8730, 32771.8750, 32775.8750, ...], [65535.8750, 65539.8750, 65543.8750, ...], ], ] sum = 294946.875000 ggml_debug: Vcur-21 = (f32) CONT(wqkv_clamped-21 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1267, 3.8733, 7.8733, ...], [4095.8733, 4099.8730, 4103.8730, ...], [8191.8730, 8195.8730, 8199.8730, ...], ], ] sum = 36898.859375 ggml_debug: k_cache_view-21 = (f16) VIEW(cache_k_l21{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-21 (copy of Kcur-21) = (f16) CPY(Kcur-21{128, 8, 3, 1}, k_cache_view-21{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 1.2871, 1.2891, 1.2910, ...], ], ] sum = 3.867188 ggml_debug: v_cur_t-21 = (f32) TRANSPOSE(Vcur-21{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.1267, 4095.8733, 8191.8730], [ 3.8733, 4099.8730, 8195.8730], [ 7.8733, 4103.8730, 8199.8730], ... ], ] sum = 36898.859375 ggml_debug: v_cache_view-21 = (f16) VIEW(cache_v_l21{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-21 (copy of v_cur_t-21) = (f16) CPY(v_cur_t-21{3, 1024, 1, 1}, v_cache_view-21{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.1267, -0.1270, -0.1272], [ -0.2534, -0.2539, -0.2544], [ -0.5068, -0.5078, -0.5088], ... ], ] sum = -2.666016 ggml_debug: v-21 = (f16) VIEW(cache_v_l21{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.1267, -0.1270, -0.1272, ...], [ -0.2534, -0.2539, -0.2544, ...], [ -0.5068, -0.5078, -0.5088, ...], ... ], [ [ -0.1267, -0.1270, -0.1272, ...], [ -0.2534, -0.2539, -0.2544, ...], [ -0.5068, -0.5078, -0.5088, ...], ... ], [ [ -0.1267, -0.1270, -0.1272, ...], [ -0.2534, -0.2539, -0.2544, ...], [ -0.5068, -0.5078, -0.5088, ...], ... ], ... ] sum = -7.998047 ggml_debug: k-21 = (f16) VIEW(cache_k_l21{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 1.2871, 1.2891, 1.2910, ...], [ 5.1484, 5.1562, 5.1641, ...], [ 20.5938, 20.6250, 20.6562, ...], ... ], [ [ 1.5371, 1.5391, 1.5410, ...], [ 6.1484, 6.1562, 6.1641, ...], [ 24.5938, 24.6250, 24.6562, ...], ... ], [ [ 1.7871, 1.7891, 1.7910, ...], [ 7.1484, 7.1562, 7.1641, ...], [ 28.5938, 28.6250, 28.6562, ...], ... ], ... ] sum = 290.882812 ggml_debug: q-21 = (f32) PERMUTE(Qcur-21{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.7427, 4.7427, 8.7427, ...], [24576.7422, 24580.7422, 24584.7422, ...], [49152.7422, 49156.7422, 49160.7422, ...], ], [ [512.7427, 516.7427, 520.7427, ...], [25088.7422, 25092.7422, 25096.7422, ...], [49664.7422, 49668.7422, 49672.7422, ...], ], [ [1024.7427, 1028.7427, 1032.7427, ...], [25600.7422, 25604.7422, 25608.7422, ...], [50176.7422, 50180.7422, 50184.7422, ...], ], ... ] sum = 677504.187500 ggml_debug: kq-21 = (f32) MUL_MAT(k-21{128, 32, 8, 1}, q-21{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [-25.9375, -21.9375, -17.9375, ...], [102.0625, 106.0625, 110.0625, ...], [230.0625, 234.0625, 238.0625, ...], ], [ [358.0625, 362.0625, 366.0625, ...], [486.0625, 490.0625, 494.0625, ...], [614.0625, 618.0625, 622.0625, ...], ], [ [742.0625, 746.0625, 750.0625, ...], [870.0625, 874.0625, 878.0625, ...], [998.0625, 1002.0625, 1006.0625, ...], ], ... ] sum = 13231.687500 ggml_debug: kq_soft_max_ext-21 = (f32) SOFT_MAX(kq-21{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-21 = (f32) MUL_MAT(v-21{32, 128, 8, 1}, kq_soft_max_ext-21{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.1267, 3.8733, 7.8733, ...], [511.8733, 515.8733, 519.8733, ...], [1023.8733, 1027.8733, 1031.8733, ...], ], [ [1535.8733, 1539.8733, 1543.8733, ...], [2047.8733, 2051.8733, 2055.8733, ...], [2559.8733, 2563.8733, 2567.8733, ...], ], [ [3071.8733, 3075.8733, 3079.8733, ...], [3583.8733, 3587.8733, 3591.8733, ...], [4095.8733, 4099.8730, 4103.8730, ...], ], ... ] sum = 55400.585938 ggml_debug: kqv_merged-21 = (f32) PERMUTE(kqv-21{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1267, 3.8733, 7.8733, ...], [1535.8733, 1539.8733, 1543.8733, ...], [3071.8733, 3075.8733, 3079.8733, ...], ... ], [ [511.8733, 515.8733, 519.8733, ...], [2047.8733, 2051.8733, 2055.8733, ...], [3583.8733, 3587.8733, 3591.8733, ...], ... ], [ [1023.8733, 1027.8733, 1031.8733, ...], [2559.8733, 2563.8733, 2567.8733, ...], [4095.8733, 4099.8730, 4103.8730, ...], ... ], ] sum = 55400.585938 ggml_debug: kqv_merged_cont-21 = (f32) CONT(kqv_merged-21{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1267, 3.8733, 7.8733, ...], [24575.8730, 24579.8730, 24583.8730, ...], [49151.8750, 49155.8750, 49159.8750, ...], ], ] sum = 221218.875000 ggml_debug: kqv_out-21 = (f32) MUL_MAT(blk.21.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-21{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1699, 3.8301, 7.8301, ...], [24575.8301, 24579.8301, 24583.8301, ...], [49151.8320, 49155.8320, 49159.8320, ...], ], ] sum = 221218.468750 ggml_debug: norm-21 = (f32) NORM(kqv_out-21{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.7876, 3.2124, 7.2124, ...], [24575.2129, 24579.2129, 24583.2129, ...], [49151.2109, 49155.2109, 49159.2109, ...], ], ] sum = 221212.906250 ggml_debug: attn_out_norm-21 = (f32) MUL(norm-21{6144, 3, 1, 1}, blk.21.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3307, 3.6693, 7.6693, ...], [24575.6699, 24579.6699, 24583.6699, ...], [49151.6680, 49155.6680, 49159.6680, ...], ], ] sum = 221217.031250 ggml_debug: ffn_moe_logits-21 = (f32) MUL_MAT(blk.21.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.0970, 4.0970, 8.0970, ...], [ 64.0970, 68.0970, 72.0970, ...], [128.0970, 132.0970, 136.0970, ...], ], ] sum = 612.873413 ggml_debug: ffn_moe_probs-21 = (f32) SOFT_MAX(ffn_moe_logits-21{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0681, 4.0681, 8.0681, ...], [ 64.0681, 68.0681, 72.0681, ...], [128.0681, 132.0681, 136.0681, ...], ], ] sum = 612.612549 ggml_debug: ffn_moe_argsort-21 = (i32) ARGSORT(ffn_moe_probs-21{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 4.0000, 8.0000, 12.0000, ...], [ 68.0000, 72.0000, 76.0000, ...], [132.0000, 136.0000, 140.0000, ...], ], ] sum = 648.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-21{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 4.0000, 8.0000, 12.0000, ...], [ 68.0000, 72.0000, 76.0000, ...], [132.0000, 136.0000, 140.0000, ...], ], ] sum = 648.000000 ggml_debug: ffn_moe_up-21 = (f32) MUL_MAT_ID(blk.21.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4997, 3.5003, 7.5003, ...], [43007.5000, 43011.5000, 43015.5000, ...], [86015.5000, 86019.5000, 86023.5000, ...], ], ] sum = 387103.500000 ggml_debug: ffn_moe_gate-21 = (f32) MUL_MAT_ID(blk.21.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.8199, 3.1801, 7.1801, ...], [43007.1797, 43011.1797, 43015.1797, ...], [86015.1797, 86019.1797, 86023.1797, ...], ], ] sum = 387100.625000 ggml_debug: ffn_moe_silu-21 = (f32) UNARY(ffn_moe_gate-21{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2507, 3.7493, 7.7493, ...], [43007.7500, 43011.7500, 43015.7500, ...], [86015.7500, 86019.7500, 86023.7500, ...], ], ] sum = 387105.750000 ggml_debug: ffn_moe_gate_par-21 = (f32) MUL(ffn_moe_up-21{10752, 3, 1, 1}, ffn_moe_silu-21{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1253, 4.1253, 8.1253, ...], [43008.1250, 43012.1250, 43016.1250, ...], [86016.1250, 86020.1250, 86024.1250, ...], ], ] sum = 387109.125000 ggml_debug: ffn_moe_down-21 = (f32) MUL_MAT_ID(blk.21.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-21{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 1.5465, 5.5465, 9.5465, ...], [24577.5469, 24581.5469, 24585.5469, ...], [49153.5469, 49157.5469, 49161.5469, ...], ], ] sum = 221233.921875 ggml_debug: ffn_moe_probs-21 (reshaped) = (f32) RESHAPE(ffn_moe_probs-21{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0681], [ 4.0681], [ 8.0681], ... ], [ [ 64.0681], [ 68.0681], [ 72.0681], ... ], [ [128.0681], [132.0681], [136.0681], ... ], ] sum = 612.612549 ggml_debug: ffn_moe_weights-21 = (f32) GET_ROWS(ffn_moe_probs-21 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1112], [ 4.1112], [ 8.1112], ... ], [ [ 16.1112], [ 20.1112], [ 24.1112], ... ], [ [ 32.1112], [ 36.1112], [ 40.1112], ... ], ] sum = 181.000717 ggml_debug: ffn_moe_weights-21 (reshaped) = (f32) RESHAPE(ffn_moe_weights-21{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1112, 4.1112, 8.1112, ...], [ 16.1112, 20.1112, 24.1112, ...], [ 32.1112, 36.1112, 40.1112, ...], ], ] sum = 181.000717 ggml_debug: ffn_moe_weights_sum-21 = (f32) SUM_ROWS(ffn_moe_weights-21 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3499], [ 4.3499], [ 8.3499], ], ] sum = 13.049602 ggml_debug: ffn_moe_weights_norm-21 = (f32) DIV(ffn_moe_weights-21 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-21{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3178, 4.3178, 8.3178, ...], [ 16.3178, 20.3178, 24.3178, ...], [ 32.3178, 36.3178, 40.3178, ...], ], ] sum = 182.860291 ggml_debug: ffn_moe_weights_norm-21 (view) = (f32) VIEW(ffn_moe_weights_norm-21{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3178], [ 16.3178], [ 32.3178], ], ] sum = 48.953426 ggml_debug: ffn_moe_weighted-21 = (f32) MUL(ffn_moe_down-21{6144, 3, 1, 1}, ffn_moe_weights_norm-21 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.4915, 4.4915, 8.4915, ...], [24576.4922, 24580.4922, 24584.4922, ...], [49152.4922, 49156.4922, 49160.4922, ...], ], ] sum = 221224.437500 ggml_debug: ffn_moe_up-21 = (f32) MUL_MAT_ID(blk.21.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.2907, 2.7093, 6.7093, ...], [43006.7109, 43010.7109, 43014.7109, ...], [86014.7109, 86018.7109, 86022.7109, ...], ], ] sum = 387096.406250 ggml_debug: ffn_moe_gate-21 = (f32) MUL_MAT_ID(blk.21.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6939, 4.6939, 8.6939, ...], [43008.6953, 43012.6953, 43016.6953, ...], [86016.6953, 86020.6953, 86024.6953, ...], ], ] sum = 387114.250000 ggml_debug: ffn_moe_silu-21 = (f32) UNARY(ffn_moe_gate-21{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.4627, 4.4627, 8.4627, ...], [43008.4609, 43012.4609, 43016.4609, ...], [86016.4609, 86020.4609, 86024.4609, ...], ], ] sum = 387112.156250 ggml_debug: ffn_moe_gate_par-21 = (f32) MUL(ffn_moe_up-21{10752, 3, 1, 1}, ffn_moe_silu-21{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5973, 3.4027, 7.4027, ...], [43007.4023, 43011.4023, 43015.4023, ...], [86015.4062, 86019.4062, 86023.4062, ...], ], ] sum = 387102.656250 ggml_debug: ffn_moe_down-21 = (f32) MUL_MAT_ID(blk.21.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-21{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2419, 3.7581, 7.7581, ...], [24575.7578, 24579.7578, 24583.7578, ...], [49151.7578, 49155.7578, 49159.7578, ...], ], ] sum = 221217.812500 ggml_debug: ffn_moe_weights_norm-21 (view) = (f32) VIEW(ffn_moe_weights_norm-21{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2818], [ 16.2818], [ 32.2818], ], ] sum = 48.845512 ggml_debug: ffn_moe_weighted-21 = (f32) MUL(ffn_moe_down-21{6144, 3, 1, 1}, ffn_moe_weights_norm-21 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0682, 3.9318, 7.9318, ...], [24575.9316, 24579.9316, 24583.9316, ...], [49151.9336, 49155.9336, 49159.9336, ...], ], ] sum = 221219.390625 ggml_debug: ffn_moe_out-21 = (f32) ADD(ffn_moe_weighted-21{6144, 3, 1, 1}, ffn_moe_weighted-21{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.4233, 4.4233, 8.4233, ...], [24576.4238, 24580.4238, 24584.4238, ...], [49152.4219, 49156.4219, 49160.4219, ...], ], ] sum = 221223.796875 ggml_debug: ffn_moe_up-21 = (f32) MUL_MAT_ID(blk.21.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0861, 4.0861, 8.0861, ...], [43008.0859, 43012.0859, 43016.0859, ...], [86016.0859, 86020.0859, 86024.0859, ...], ], ] sum = 387108.781250 ggml_debug: ffn_moe_gate-21 = (f32) MUL_MAT_ID(blk.21.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0112, 3.9888, 7.9888, ...], [43007.9883, 43011.9883, 43015.9883, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.906250 ggml_debug: ffn_moe_silu-21 = (f32) UNARY(ffn_moe_gate-21{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0055, 3.9945, 7.9945, ...], [43007.9961, 43011.9961, 43015.9961, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.968750 ggml_debug: ffn_moe_gate_par-21 = (f32) MUL(ffn_moe_up-21{10752, 3, 1, 1}, ffn_moe_silu-21{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0005, 3.9995, 7.9995, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-21 = (f32) MUL_MAT_ID(blk.21.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-21{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2477, 3.7523, 7.7523, ...], [24575.7520, 24579.7520, 24583.7520, ...], [49151.7539, 49155.7539, 49159.7539, ...], ], ] sum = 221217.765625 ggml_debug: ffn_moe_weights_norm-21 (view) = (f32) VIEW(ffn_moe_weights_norm-21{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2058], [ 16.2058], [ 32.2058], ], ] sum = 48.617458 ggml_debug: ffn_moe_weighted-21 = (f32) MUL(ffn_moe_down-21{6144, 3, 1, 1}, ffn_moe_weights_norm-21 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0510, 3.9490, 7.9490, ...], [24575.9492, 24579.9492, 24583.9492, ...], [49151.9492, 49155.9492, 49159.9492, ...], ], ] sum = 221219.546875 ggml_debug: ffn_moe_out-21 = (f32) ADD(ffn_moe_out-21{6144, 3, 1, 1}, ffn_moe_weighted-21{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.3723, 4.3723, 8.3723, ...], [24576.3730, 24580.3730, 24584.3730, ...], [49152.3711, 49156.3711, 49160.3711, ...], ], ] sum = 221223.359375 ggml_debug: ffn_moe_up-21 = (f32) MUL_MAT_ID(blk.21.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1513, 4.1513, 8.1513, ...], [43008.1523, 43012.1523, 43016.1523, ...], [86016.1484, 86020.1484, 86024.1484, ...], ], ] sum = 387109.375000 ggml_debug: ffn_moe_gate-21 = (f32) MUL_MAT_ID(blk.21.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-21{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3770, 3.6230, 7.6230, ...], [43007.6211, 43011.6211, 43015.6211, ...], [86015.6250, 86019.6250, 86023.6250, ...], ], ] sum = 387104.625000 ggml_debug: ffn_moe_silu-21 = (f32) UNARY(ffn_moe_gate-21{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1534, 3.8466, 7.8466, ...], [43007.8477, 43011.8477, 43015.8477, ...], [86015.8438, 86019.8438, 86023.8438, ...], ], ] sum = 387106.593750 ggml_debug: ffn_moe_gate_par-21 = (f32) MUL(ffn_moe_up-21{10752, 3, 1, 1}, ffn_moe_silu-21{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0232, 3.9768, 7.9768, ...], [43007.9766, 43011.9766, 43015.9766, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.781250 ggml_debug: ffn_moe_down-21 = (f32) MUL_MAT_ID(blk.21.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-21{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4262, 3.5738, 7.5738, ...], [24575.5742, 24579.5742, 24583.5742, ...], [49151.5742, 49155.5742, 49159.5742, ...], ], ] sum = 221216.171875 ggml_debug: ffn_moe_weights_norm-21 (view) = (f32) VIEW(ffn_moe_weights_norm-21{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1945], [ 16.1945], [ 32.1945], ], ] sum = 48.583603 ggml_debug: ffn_moe_weighted-21 = (f32) MUL(ffn_moe_down-21{6144, 3, 1, 1}, ffn_moe_weights_norm-21 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0829, 3.9171, 7.9171, ...], [24575.9180, 24579.9180, 24583.9180, ...], [49151.9180, 49155.9180, 49159.9180, ...], ], ] sum = 221219.265625 ggml_debug: ffn_moe_out-21 = (f32) ADD(ffn_moe_out-21{6144, 3, 1, 1}, ffn_moe_weighted-21{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2894, 4.2894, 8.2894, ...], [24576.2891, 24580.2891, 24584.2891, ...], [49152.2891, 49156.2891, 49160.2891, ...], ], ] sum = 221222.593750 ggml_debug: ffn_inp-21 = (f32) ADD(kqv_out-21{6144, 3, 1, 1}, l_out-20{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.9194, 3.0806, 7.0806, ...], [24575.0801, 24579.0801, 24583.0801, ...], [49151.0820, 49155.0820, 49159.0820, ...], ], ] sum = 221211.718750 ggml_debug: l_out-21 = (f32) ADD(ffn_moe_out-21{6144, 3, 1, 1}, ffn_inp-21{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.6300, 3.3700, 7.3700, ...], [24575.3691, 24579.3691, 24583.3691, ...], [49151.3711, 49155.3711, 49159.3711, ...], ], ] sum = 221214.343750 ggml_debug: norm-22 = (f32) NORM(l_out-21{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.5927, 3.4073, 7.4073, ...], [24575.4082, 24579.4082, 24583.4082, ...], [49151.4062, 49155.4062, 49159.4062, ...], ], ] sum = 221214.656250 ggml_debug: attn_norm-22 = (f32) MUL(norm-22{6144, 3, 1, 1}, blk.22.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1713, 3.8287, 7.8287, ...], [24575.8281, 24579.8281, 24583.8281, ...], [49151.8281, 49155.8281, 49159.8281, ...], ], ] sum = 221218.453125 ggml_debug: wqkv-22 = (f32) MUL_MAT(blk.22.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-22{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.1330, 3.8670, 7.8670, ...], [32767.8672, 32771.8672, 32775.8672, ...], [65535.8672, 65539.8672, 65543.8672, ...], ], ] sum = 294946.812500 ggml_debug: wqkv_clamped-22 = (f32) CLAMP(wqkv-22{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.1330, 3.8670, 7.8670, ...], [32767.8672, 32771.8672, 32775.8672, ...], [65535.8672, 65539.8672, 65543.8672, ...], ], ] sum = 294946.812500 ggml_debug: wqkv_clamped-22 (view) = (f32) VIEW(wqkv_clamped-22{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1330, 3.8670, 7.8670, ...], [32767.8672, 32771.8672, 32775.8672, ...], [65535.8672, 65539.8672, 65543.8672, ...], ], ] sum = 294946.812500 ggml_debug: Qcur-22 = (f32) CONT(wqkv_clamped-22 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1330, 3.8670, 7.8670, ...], [24575.8672, 24579.8672, 24583.8672, ...], [49151.8672, 49155.8672, 49159.8672, ...], ], ] sum = 221218.812500 ggml_debug: Qcur-22 (reshaped) = (f32) RESHAPE(Qcur-22{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1330, 3.8670, 7.8670, ...], [511.8670, 515.8670, 519.8670, ...], [1023.8670, 1027.8671, 1031.8671, ...], ... ], [ [24575.8672, 24579.8672, 24583.8672, ...], [25087.8672, 25091.8672, 25095.8672, ...], [25599.8672, 25603.8672, 25607.8672, ...], ... ], [ [49151.8672, 49155.8672, 49159.8672, ...], [49663.8672, 49667.8672, 49671.8672, ...], [50175.8672, 50179.8672, 50183.8672, ...], ... ], ] sum = 677480.500000 ggml_debug: Qcur-22 = (f32) ROPE(Qcur-22 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.1330, 3.8670, 7.8670, ...], [511.8670, 515.8670, 519.8670, ...], [1023.8670, 1027.8671, 1031.8671, ...], ... ], [ [24575.8672, 24579.8672, 24583.8672, ...], [25087.8672, 25091.8672, 25095.8672, ...], [25599.8672, 25603.8672, 25607.8672, ...], ... ], [ [49151.8672, 49155.8672, 49159.8672, ...], [49663.8672, 49667.8672, 49671.8672, ...], [50175.8672, 50179.8672, 50183.8672, ...], ... ], ] sum = 677480.500000 ggml_debug: wqkv_clamped-22 (view) = (f32) VIEW(wqkv_clamped-22{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.5090, 6.5090, 10.5090, ...], [32770.5078, 32774.5078, 32778.5078, ...], [65538.5078, 65542.5078, 65546.5078, ...], ], ] sum = 294970.562500 ggml_debug: Kcur-22 = (f32) CONT(wqkv_clamped-22 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.5090, 6.5090, 10.5090, ...], [4098.5088, 4102.5088, 4106.5088, ...], [8194.5088, 8198.5088, 8202.5088, ...], ], ] sum = 36922.578125 ggml_debug: Kcur-22 (reshaped) = (f32) RESHAPE(Kcur-22{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 2.5090, 6.5090, 10.5090, ...], [514.5090, 518.5090, 522.5090, ...], [1026.5090, 1030.5090, 1034.5090, ...], ... ], [ [4098.5088, 4102.5088, 4106.5088, ...], [4610.5088, 4614.5088, 4618.5088, ...], [5122.5088, 5126.5088, 5130.5088, ...], ... ], [ [8194.5088, 8198.5088, 8202.5088, ...], [8706.5088, 8710.5088, 8714.5088, ...], [9218.5088, 9222.5088, 9226.5088, ...], ... ], ] sum = 124591.726562 ggml_debug: Kcur-22 = (f32) ROPE(Kcur-22 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 2.5090, 6.5090, 10.5090, ...], [514.5090, 518.5090, 522.5090, ...], [1026.5090, 1030.5090, 1034.5090, ...], ... ], [ [4098.5088, 4102.5088, 4106.5088, ...], [4610.5088, 4614.5088, 4618.5088, ...], [5122.5088, 5126.5088, 5130.5088, ...], ... ], [ [8194.5088, 8198.5088, 8202.5088, ...], [8706.5088, 8710.5088, 8714.5088, ...], [9218.5088, 9222.5088, 9226.5088, ...], ... ], ] sum = 124591.726562 ggml_debug: wqkv_clamped-22 (view) = (f32) VIEW(wqkv_clamped-22{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.1872, 4.1872, 8.1872, ...], [32768.1875, 32772.1875, 32776.1875, ...], [65536.1875, 65540.1875, 65544.1875, ...], ], ] sum = 294949.687500 ggml_debug: Vcur-22 = (f32) CONT(wqkv_clamped-22 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.1872, 4.1872, 8.1872, ...], [4096.1870, 4100.1870, 4104.1870, ...], [8192.1875, 8196.1875, 8200.1875, ...], ], ] sum = 36901.687500 ggml_debug: k_cache_view-22 = (f16) VIEW(cache_k_l22{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-22 (copy of Kcur-22) = (f16) CPY(Kcur-22{128, 8, 3, 1}, k_cache_view-22{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 2.5098, 2.5137, 2.5176, ...], ], ] sum = 7.541016 ggml_debug: v_cur_t-22 = (f32) TRANSPOSE(Vcur-22{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.1872, 4096.1870, 8192.1875], [ 4.1872, 4100.1870, 8196.1875], [ 8.1872, 4104.1870, 8200.1875], ... ], ] sum = 36901.687500 ggml_debug: v_cache_view-22 = (f16) VIEW(cache_v_l22{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-22 (copy of v_cur_t-22) = (f16) CPY(v_cur_t-22{3, 1024, 1, 1}, v_cache_view-22{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.1871, 0.1874, 0.1876], [ 0.3743, 0.3748, 0.3752], [ 0.7485, 0.7495, 0.7505], ... ], ] sum = 3.934937 ggml_debug: v-22 = (f16) VIEW(cache_v_l22{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.1871, 0.1874, 0.1876, ...], [ 0.3743, 0.3748, 0.3752, ...], [ 0.7485, 0.7495, 0.7505, ...], ... ], [ [ 0.1871, 0.1874, 0.1876, ...], [ 0.3743, 0.3748, 0.3752, ...], [ 0.7485, 0.7495, 0.7505, ...], ... ], [ [ 0.1871, 0.1874, 0.1876, ...], [ 0.3743, 0.3748, 0.3752, ...], [ 0.7485, 0.7495, 0.7505, ...], ... ], ... ] sum = 11.804810 ggml_debug: k-22 = (f16) VIEW(cache_k_l22{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 2.5098, 2.5137, 2.5176, ...], [ 10.0391, 10.0547, 10.0703, ...], [ 40.1562, 40.2188, 40.2812, ...], ... ], [ [ 3.0098, 3.0137, 3.0176, ...], [ 12.0391, 12.0547, 12.0703, ...], [ 48.1562, 48.2188, 48.2812, ...], ... ], [ [ 3.5098, 3.5137, 3.5176, ...], [ 14.0391, 14.0547, 14.0703, ...], [ 56.1562, 56.2188, 56.2812, ...], ... ], ... ] sum = 569.583984 ggml_debug: q-22 = (f32) PERMUTE(Qcur-22{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.1330, 3.8670, 7.8670, ...], [24575.8672, 24579.8672, 24583.8672, ...], [49151.8672, 49155.8672, 49159.8672, ...], ], [ [511.8670, 515.8670, 519.8670, ...], [25087.8672, 25091.8672, 25095.8672, ...], [49663.8672, 49667.8672, 49671.8672, ...], ], [ [1023.8670, 1027.8671, 1031.8671, ...], [25599.8672, 25603.8672, 25607.8672, ...], [50175.8672, 50179.8672, 50183.8672, ...], ], ... ] sum = 677480.500000 ggml_debug: kq-22 = (f32) MUL_MAT(k-22{128, 32, 8, 1}, q-22{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 0.6353, 4.6353, 8.6353, ...], [128.6353, 132.6353, 136.6353, ...], [256.6353, 260.6353, 264.6353, ...], ], [ [384.6353, 388.6353, 392.6353, ...], [512.6353, 516.6353, 520.6353, ...], [640.6353, 644.6353, 648.6353, ...], ], [ [768.6353, 772.6353, 776.6353, ...], [896.6353, 900.6353, 904.6353, ...], [1024.6353, 1028.6353, 1032.6353, ...], ], ... ] sum = 13949.150391 ggml_debug: kq_soft_max_ext-22 = (f32) SOFT_MAX(kq-22{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-22 = (f32) MUL_MAT(v-22{32, 128, 8, 1}, kq_soft_max_ext-22{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.1871, 4.1871, 8.1871, ...], [512.1871, 516.1871, 520.1871, ...], [1024.1871, 1028.1871, 1032.1871, ...], ], [ [1536.1871, 1540.1871, 1544.1871, ...], [2048.1870, 2052.1870, 2056.1870, ...], [2560.1870, 2564.1870, 2568.1870, ...], ], [ [3072.1870, 3076.1870, 3080.1870, ...], [3584.1870, 3588.1870, 3592.1870, ...], [4096.1870, 4100.1870, 4104.1870, ...], ], ... ] sum = 55409.054688 ggml_debug: kqv_merged-22 = (f32) PERMUTE(kqv-22{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.1871, 4.1871, 8.1871, ...], [1536.1871, 1540.1871, 1544.1871, ...], [3072.1870, 3076.1870, 3080.1870, ...], ... ], [ [512.1871, 516.1871, 520.1871, ...], [2048.1870, 2052.1870, 2056.1870, ...], [3584.1870, 3588.1870, 3592.1870, ...], ... ], [ [1024.1871, 1028.1871, 1032.1871, ...], [2560.1870, 2564.1870, 2568.1870, ...], [4096.1870, 4100.1870, 4104.1870, ...], ... ], ] sum = 55409.058594 ggml_debug: kqv_merged_cont-22 = (f32) CONT(kqv_merged-22{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1871, 4.1871, 8.1871, ...], [24576.1875, 24580.1875, 24584.1875, ...], [49152.1875, 49156.1875, 49160.1875, ...], ], ] sum = 221221.687500 ggml_debug: kqv_out-22 = (f32) MUL_MAT(blk.22.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-22{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1765, 3.8235, 7.8235, ...], [24575.8242, 24579.8242, 24583.8242, ...], [49151.8242, 49155.8242, 49159.8242, ...], ], ] sum = 221218.421875 ggml_debug: norm-22 = (f32) NORM(kqv_out-22{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6129, 3.3871, 7.3871, ...], [24575.3867, 24579.3867, 24583.3867, ...], [49151.3867, 49155.3867, 49159.3867, ...], ], ] sum = 221214.500000 ggml_debug: attn_out_norm-22 = (f32) MUL(norm-22{6144, 3, 1, 1}, blk.22.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2669, 3.7331, 7.7331, ...], [24575.7324, 24579.7324, 24583.7324, ...], [49151.7344, 49155.7344, 49159.7344, ...], ], ] sum = 221217.609375 ggml_debug: ffn_moe_logits-22 = (f32) MUL_MAT(blk.22.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.4575, 4.4575, 8.4575, ...], [ 64.4575, 68.4575, 72.4575, ...], [128.4575, 132.4575, 136.4575, ...], ], ] sum = 616.117676 ggml_debug: ffn_moe_probs-22 = (f32) SOFT_MAX(ffn_moe_logits-22{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.1021, 4.1021, 8.1021, ...], [ 64.1021, 68.1021, 72.1021, ...], [128.1021, 132.1021, 136.1021, ...], ], ] sum = 612.918457 ggml_debug: ffn_moe_argsort-22 = (i32) ARGSORT(ffn_moe_probs-22{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-22{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: ffn_moe_up-22 = (f32) MUL_MAT_ID(blk.22.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0185, 3.9815, 7.9815, ...], [43007.9805, 43011.9805, 43015.9805, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_gate-22 = (f32) MUL_MAT_ID(blk.22.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4787, 3.5213, 7.5213, ...], [43007.5195, 43011.5195, 43015.5195, ...], [86015.5234, 86019.5234, 86023.5234, ...], ], ] sum = 387103.718750 ggml_debug: ffn_moe_silu-22 = (f32) UNARY(ffn_moe_gate-22{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1831, 3.8169, 7.8169, ...], [43007.8164, 43011.8164, 43015.8164, ...], [86015.8203, 86019.8203, 86023.8203, ...], ], ] sum = 387106.343750 ggml_debug: ffn_moe_gate_par-22 = (f32) MUL(ffn_moe_up-22{10752, 3, 1, 1}, ffn_moe_silu-22{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0034, 4.0034, 8.0034, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-22 = (f32) MUL_MAT_ID(blk.22.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-22{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0706, 4.0706, 8.0706, ...], [24576.0703, 24580.0703, 24584.0703, ...], [49152.0703, 49156.0703, 49160.0703, ...], ], ] sum = 221220.625000 ggml_debug: ffn_moe_probs-22 (reshaped) = (f32) RESHAPE(ffn_moe_probs-22{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.1021], [ 4.1021], [ 8.1021], ... ], [ [ 64.1021], [ 68.1021], [ 72.1021], ... ], [ [128.1021], [132.1021], [136.1021], ... ], ] sum = 612.918457 ggml_debug: ffn_moe_weights-22 = (f32) GET_ROWS(ffn_moe_probs-22 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1021], [ 4.1021], [ 8.1021], ... ], [ [ 16.1021], [ 20.1021], [ 24.1021], ... ], [ [ 32.1021], [ 36.1021], [ 40.1021], ... ], ] sum = 180.918472 ggml_debug: ffn_moe_weights-22 (reshaped) = (f32) RESHAPE(ffn_moe_weights-22{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1021, 4.1021, 8.1021, ...], [ 16.1021, 20.1021, 24.1021, ...], [ 32.1021, 36.1021, 40.1021, ...], ], ] sum = 180.918472 ggml_debug: ffn_moe_weights_sum-22 = (f32) SUM_ROWS(ffn_moe_weights-22 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3106], [ 4.3106], [ 8.3106], ], ] sum = 12.931656 ggml_debug: ffn_moe_weights_norm-22 = (f32) DIV(ffn_moe_weights-22 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-22{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3286, 4.3286, 8.3286, ...], [ 16.3286, 20.3286, 24.3286, ...], [ 32.3286, 36.3286, 40.3286, ...], ], ] sum = 182.957550 ggml_debug: ffn_moe_weights_norm-22 (view) = (f32) VIEW(ffn_moe_weights_norm-22{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3286], [ 16.3286], [ 32.3286], ], ] sum = 48.985851 ggml_debug: ffn_moe_weighted-22 = (f32) MUL(ffn_moe_down-22{6144, 3, 1, 1}, ffn_moe_weights_norm-22 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0232, 4.0232, 8.0232, ...], [24576.0234, 24580.0234, 24584.0234, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: ffn_moe_up-22 = (f32) MUL_MAT_ID(blk.22.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.5618, 2.4382, 6.4382, ...], [43006.4375, 43010.4375, 43014.4375, ...], [86014.4375, 86018.4375, 86022.4375, ...], ], ] sum = 387093.937500 ggml_debug: ffn_moe_gate-22 = (f32) MUL_MAT_ID(blk.22.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0562, 3.9438, 7.9438, ...], [43007.9453, 43011.9453, 43015.9453, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.500000 ggml_debug: ffn_moe_silu-22 = (f32) UNARY(ffn_moe_gate-22{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0273, 3.9727, 7.9727, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.750000 ggml_debug: ffn_moe_gate_par-22 = (f32) MUL(ffn_moe_up-22{10752, 3, 1, 1}, ffn_moe_silu-22{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0426, 4.0426, 8.0426, ...], [43008.0430, 43012.0430, 43016.0430, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.375000 ggml_debug: ffn_moe_down-22 = (f32) MUL_MAT_ID(blk.22.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-22{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.4309, 4.4309, 8.4309, ...], [24576.4316, 24580.4316, 24584.4316, ...], [49152.4297, 49156.4297, 49160.4297, ...], ], ] sum = 221223.875000 ggml_debug: ffn_moe_weights_norm-22 (view) = (f32) VIEW(ffn_moe_weights_norm-22{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2301], [ 16.2301], [ 32.2301], ], ] sum = 48.690254 ggml_debug: ffn_moe_weighted-22 = (f32) MUL(ffn_moe_down-22{6144, 3, 1, 1}, ffn_moe_weights_norm-22 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0991, 4.0991, 8.0991, ...], [24576.0996, 24580.0996, 24584.0996, ...], [49152.0977, 49156.0977, 49160.0977, ...], ], ] sum = 221220.875000 ggml_debug: ffn_moe_out-22 = (f32) ADD(ffn_moe_weighted-22{6144, 3, 1, 1}, ffn_moe_weighted-22{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1223, 4.1223, 8.1223, ...], [24576.1230, 24580.1230, 24584.1230, ...], [49152.1211, 49156.1211, 49160.1211, ...], ], ] sum = 221221.109375 ggml_debug: ffn_moe_up-22 = (f32) MUL_MAT_ID(blk.22.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3518, 4.3518, 8.3518, ...], [43008.3516, 43012.3516, 43016.3516, ...], [86016.3516, 86020.3516, 86024.3516, ...], ], ] sum = 387111.156250 ggml_debug: ffn_moe_gate-22 = (f32) MUL_MAT_ID(blk.22.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5205, 4.5205, 8.5205, ...], [43008.5195, 43012.5195, 43016.5195, ...], [86016.5234, 86020.5234, 86024.5234, ...], ], ] sum = 387112.718750 ggml_debug: ffn_moe_silu-22 = (f32) UNARY(ffn_moe_gate-22{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.3265, 4.3265, 8.3265, ...], [43008.3281, 43012.3281, 43016.3281, ...], [86016.3281, 86020.3281, 86024.3281, ...], ], ] sum = 387110.937500 ggml_debug: ffn_moe_gate_par-22 = (f32) MUL(ffn_moe_up-22{10752, 3, 1, 1}, ffn_moe_silu-22{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1149, 4.1149, 8.1149, ...], [43008.1133, 43012.1133, 43016.1133, ...], [86016.1172, 86020.1172, 86024.1172, ...], ], ] sum = 387109.062500 ggml_debug: ffn_moe_down-22 = (f32) MUL_MAT_ID(blk.22.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-22{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1443, 4.1443, 8.1443, ...], [24576.1445, 24580.1445, 24584.1445, ...], [49152.1445, 49156.1445, 49160.1445, ...], ], ] sum = 221221.296875 ggml_debug: ffn_moe_weights_norm-22 (view) = (f32) VIEW(ffn_moe_weights_norm-22{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2250], [ 16.2250], [ 32.2250], ], ] sum = 48.674881 ggml_debug: ffn_moe_weighted-22 = (f32) MUL(ffn_moe_down-22{6144, 3, 1, 1}, ffn_moe_weights_norm-22 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0325, 4.0325, 8.0325, ...], [24576.0332, 24580.0332, 24584.0332, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: ffn_moe_out-22 = (f32) ADD(ffn_moe_out-22{6144, 3, 1, 1}, ffn_moe_weighted-22{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1548, 4.1548, 8.1548, ...], [24576.1543, 24580.1543, 24584.1543, ...], [49152.1562, 49156.1562, 49160.1562, ...], ], ] sum = 221221.406250 ggml_debug: ffn_moe_up-22 = (f32) MUL_MAT_ID(blk.22.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0812, 3.9188, 7.9188, ...], [43007.9180, 43011.9180, 43015.9180, ...], [86015.9219, 86019.9219, 86023.9219, ...], ], ] sum = 387107.312500 ggml_debug: ffn_moe_gate-22 = (f32) MUL_MAT_ID(blk.22.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-22{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5247, 3.4753, 7.4753, ...], [43007.4766, 43011.4766, 43015.4766, ...], [86015.4766, 86019.4766, 86023.4766, ...], ], ] sum = 387103.281250 ggml_debug: ffn_moe_silu-22 = (f32) UNARY(ffn_moe_gate-22{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1951, 3.8049, 7.8049, ...], [43007.8047, 43011.8047, 43015.8047, ...], [86015.8047, 86019.8047, 86023.8047, ...], ], ] sum = 387106.250000 ggml_debug: ffn_moe_gate_par-22 = (f32) MUL(ffn_moe_up-22{10752, 3, 1, 1}, ffn_moe_silu-22{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0158, 4.0158, 8.0158, ...], [43008.0156, 43012.0156, 43016.0156, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_down-22 = (f32) MUL_MAT_ID(blk.22.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-22{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.0767, 2.9233, 6.9233, ...], [24574.9238, 24578.9238, 24582.9238, ...], [49150.9219, 49154.9219, 49158.9219, ...], ], ] sum = 221210.296875 ggml_debug: ffn_moe_weights_norm-22 (view) = (f32) VIEW(ffn_moe_weights_norm-22{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2163], [ 16.2163], [ 32.2163], ], ] sum = 48.649010 ggml_debug: ffn_moe_weighted-22 = (f32) MUL(ffn_moe_down-22{6144, 3, 1, 1}, ffn_moe_weights_norm-22 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2329, 3.7671, 7.7671, ...], [24575.7676, 24579.7676, 24583.7676, ...], [49151.7656, 49155.7656, 49159.7656, ...], ], ] sum = 221217.890625 ggml_debug: ffn_moe_out-22 = (f32) ADD(ffn_moe_out-22{6144, 3, 1, 1}, ffn_moe_weighted-22{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0781, 3.9219, 7.9219, ...], [24575.9219, 24579.9219, 24583.9219, ...], [49151.9219, 49155.9219, 49159.9219, ...], ], ] sum = 221219.296875 ggml_debug: ffn_inp-22 = (f32) ADD(kqv_out-22{6144, 3, 1, 1}, l_out-21{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.8065, 3.1935, 7.1935, ...], [24575.1934, 24579.1934, 24583.1934, ...], [49151.1953, 49155.1953, 49159.1953, ...], ], ] sum = 221212.750000 ggml_debug: l_out-22 = (f32) ADD(ffn_moe_out-22{6144, 3, 1, 1}, ffn_inp-22{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.8846, 3.1154, 7.1154, ...], [24575.1152, 24579.1152, 24583.1152, ...], [49151.1172, 49155.1172, 49159.1172, ...], ], ] sum = 221212.062500 ggml_debug: norm-23 = (f32) NORM(l_out-22{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.7136, 3.2864, 7.2864, ...], [24575.2871, 24579.2871, 24583.2871, ...], [49151.2852, 49155.2852, 49159.2852, ...], ], ] sum = 221213.562500 ggml_debug: attn_norm-23 = (f32) MUL(norm-23{6144, 3, 1, 1}, blk.23.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1854, 3.8146, 7.8146, ...], [24575.8145, 24579.8145, 24583.8145, ...], [49151.8164, 49155.8164, 49159.8164, ...], ], ] sum = 221218.328125 ggml_debug: wqkv-23 = (f32) MUL_MAT(blk.23.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-23{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -2.4908, 1.5092, 5.5092, ...], [32765.5098, 32769.5078, 32773.5078, ...], [65533.5078, 65537.5078, 65541.5078, ...], ], ] sum = 294925.562500 ggml_debug: wqkv_clamped-23 = (f32) CLAMP(wqkv-23{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -2.4908, 1.5092, 5.5092, ...], [32765.5098, 32769.5078, 32773.5078, ...], [65533.5078, 65537.5078, 65541.5078, ...], ], ] sum = 294925.562500 ggml_debug: wqkv_clamped-23 (view) = (f32) VIEW(wqkv_clamped-23{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -2.4908, 1.5092, 5.5092, ...], [32765.5098, 32769.5078, 32773.5078, ...], [65533.5078, 65537.5078, 65541.5078, ...], ], ] sum = 294925.562500 ggml_debug: Qcur-23 = (f32) CONT(wqkv_clamped-23 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -2.4908, 1.5092, 5.5092, ...], [24573.5098, 24577.5098, 24581.5098, ...], [49149.5078, 49153.5078, 49157.5078, ...], ], ] sum = 221197.562500 ggml_debug: Qcur-23 (reshaped) = (f32) RESHAPE(Qcur-23{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -2.4908, 1.5092, 5.5092, ...], [509.5092, 513.5092, 517.5092, ...], [1021.5092, 1025.5092, 1029.5092, ...], ... ], [ [24573.5098, 24577.5098, 24581.5098, ...], [25085.5098, 25089.5098, 25093.5098, ...], [25597.5098, 25601.5098, 25605.5098, ...], ... ], [ [49149.5078, 49153.5078, 49157.5078, ...], [49661.5078, 49665.5078, 49669.5078, ...], [50173.5078, 50177.5078, 50181.5078, ...], ... ], ] sum = 677416.687500 ggml_debug: Qcur-23 = (f32) ROPE(Qcur-23 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -2.4908, 1.5092, 5.5092, ...], [509.5092, 513.5092, 517.5092, ...], [1021.5092, 1025.5092, 1029.5092, ...], ... ], [ [24573.5098, 24577.5098, 24581.5098, ...], [25085.5098, 25089.5098, 25093.5098, ...], [25597.5098, 25601.5098, 25605.5098, ...], ... ], [ [49149.5078, 49153.5078, 49157.5078, ...], [49661.5078, 49665.5078, 49669.5078, ...], [50173.5078, 50177.5078, 50181.5078, ...], ... ], ] sum = 677416.687500 ggml_debug: wqkv_clamped-23 (view) = (f32) VIEW(wqkv_clamped-23{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.3816, 6.3816, 10.3816, ...], [32770.3828, 32774.3828, 32778.3828, ...], [65538.3828, 65542.3828, 65546.3828, ...], ], ] sum = 294969.437500 ggml_debug: Kcur-23 = (f32) CONT(wqkv_clamped-23 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.3816, 6.3816, 10.3816, ...], [4098.3818, 4102.3818, 4106.3818, ...], [8194.3818, 8198.3818, 8202.3818, ...], ], ] sum = 36921.437500 ggml_debug: Kcur-23 (reshaped) = (f32) RESHAPE(Kcur-23{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 2.3816, 6.3816, 10.3816, ...], [514.3817, 518.3817, 522.3817, ...], [1026.3816, 1030.3816, 1034.3816, ...], ... ], [ [4098.3818, 4102.3818, 4106.3818, ...], [4610.3818, 4614.3818, 4618.3818, ...], [5122.3818, 5126.3818, 5130.3818, ...], ... ], [ [8194.3818, 8198.3818, 8202.3818, ...], [8706.3818, 8710.3818, 8714.3818, ...], [9218.3818, 9222.3818, 9226.3818, ...], ... ], ] sum = 124588.320312 ggml_debug: Kcur-23 = (f32) ROPE(Kcur-23 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 2.3816, 6.3816, 10.3816, ...], [514.3817, 518.3817, 522.3817, ...], [1026.3816, 1030.3816, 1034.3816, ...], ... ], [ [4098.3818, 4102.3818, 4106.3818, ...], [4610.3818, 4614.3818, 4618.3818, ...], [5122.3818, 5126.3818, 5130.3818, ...], ... ], [ [8194.3818, 8198.3818, 8202.3818, ...], [8706.3818, 8710.3818, 8714.3818, ...], [9218.3818, 9222.3818, 9226.3818, ...], ... ], ] sum = 124588.320312 ggml_debug: wqkv_clamped-23 (view) = (f32) VIEW(wqkv_clamped-23{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1026, 3.8974, 7.8974, ...], [32767.8965, 32771.8984, 32775.8984, ...], [65535.8984, 65539.8984, 65543.8984, ...], ], ] sum = 294947.093750 ggml_debug: Vcur-23 = (f32) CONT(wqkv_clamped-23 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1026, 3.8974, 7.8974, ...], [4095.8975, 4099.8975, 4103.8975, ...], [8191.8975, 8195.8975, 8199.8975, ...], ], ] sum = 36899.078125 ggml_debug: k_cache_view-23 = (f16) VIEW(cache_k_l23{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-23 (copy of Kcur-23) = (f16) CPY(Kcur-23{128, 8, 3, 1}, k_cache_view-23{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 2.3809, 2.3848, 2.3887, ...], ], ] sum = 7.154297 ggml_debug: v_cur_t-23 = (f32) TRANSPOSE(Vcur-23{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.1026, 4095.8975, 8191.8975], [ 3.8974, 4099.8975, 8195.8975], [ 7.8974, 4103.8975, 8199.8975], ... ], ] sum = 36899.078125 ggml_debug: v_cache_view-23 = (f16) VIEW(cache_v_l23{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-23 (copy of v_cur_t-23) = (f16) CPY(v_cur_t-23{3, 1024, 1, 1}, v_cache_view-23{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.1026, -0.1027, -0.1028], [ -0.2052, -0.2054, -0.2057], [ -0.4104, -0.4109, -0.4114], ... ], ] sum = -2.157166 ggml_debug: v-23 = (f16) VIEW(cache_v_l23{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.1026, -0.1027, -0.1028, ...], [ -0.2052, -0.2054, -0.2057, ...], [ -0.4104, -0.4109, -0.4114, ...], ... ], [ [ -0.1026, -0.1027, -0.1028, ...], [ -0.2052, -0.2054, -0.2057, ...], [ -0.4104, -0.4109, -0.4114, ...], ... ], [ [ -0.1026, -0.1027, -0.1028, ...], [ -0.2052, -0.2054, -0.2057, ...], [ -0.4104, -0.4109, -0.4114, ...], ... ], ... ] sum = -6.471497 ggml_debug: k-23 = (f16) VIEW(cache_k_l23{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 2.3809, 2.3848, 2.3887, ...], [ 9.5234, 9.5391, 9.5547, ...], [ 38.0938, 38.1562, 38.2188, ...], ... ], [ [ 2.8809, 2.8848, 2.8887, ...], [ 11.5234, 11.5391, 11.5547, ...], [ 46.0938, 46.1562, 46.2188, ...], ... ], [ [ 3.3809, 3.3848, 3.3887, ...], [ 13.5234, 13.5391, 13.5547, ...], [ 54.0938, 54.1562, 54.2188, ...], ... ], ... ] sum = 545.220703 ggml_debug: q-23 = (f32) PERMUTE(Qcur-23{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -2.4908, 1.5092, 5.5092, ...], [24573.5098, 24577.5098, 24581.5098, ...], [49149.5078, 49153.5078, 49157.5078, ...], ], [ [509.5092, 513.5092, 517.5092, ...], [25085.5098, 25089.5098, 25093.5098, ...], [49661.5078, 49665.5078, 49669.5078, ...], ], [ [1021.5092, 1025.5092, 1029.5092, ...], [25597.5098, 25601.5098, 25605.5098, ...], [50173.5078, 50177.5078, 50181.5078, ...], ], ... ] sum = 677416.625000 ggml_debug: kq-23 = (f32) MUL_MAT(k-23{128, 32, 8, 1}, q-23{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [-91.9375, -87.9375, -83.9375, ...], [ 36.0625, 40.0625, 44.0625, ...], [164.0625, 168.0625, 172.0625, ...], ], [ [292.0625, 296.0625, 300.0625, ...], [420.0625, 424.0625, 428.0625, ...], [548.0625, 552.0625, 556.0625, ...], ], [ [676.0625, 680.0625, 684.0625, ...], [804.0625, 808.0625, 812.0625, ...], [932.0625, 936.0625, 940.0625, ...], ], ... ] sum = 11449.687500 ggml_debug: kq_soft_max_ext-23 = (f32) SOFT_MAX(kq-23{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-23 = (f32) MUL_MAT(v-23{32, 128, 8, 1}, kq_soft_max_ext-23{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.1026, 3.8974, 7.8974, ...], [511.8974, 515.8974, 519.8974, ...], [1023.8974, 1027.8975, 1031.8975, ...], ], [ [1535.8975, 1539.8975, 1543.8975, ...], [2047.8975, 2051.8975, 2055.8975, ...], [2559.8975, 2563.8975, 2567.8975, ...], ], [ [3071.8975, 3075.8975, 3079.8975, ...], [3583.8975, 3587.8975, 3591.8975, ...], [4095.8975, 4099.8975, 4103.8975, ...], ], ... ] sum = 55401.242188 ggml_debug: kqv_merged-23 = (f32) PERMUTE(kqv-23{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1026, 3.8974, 7.8974, ...], [1535.8975, 1539.8975, 1543.8975, ...], [3071.8975, 3075.8975, 3079.8975, ...], ... ], [ [511.8974, 515.8974, 519.8974, ...], [2047.8975, 2051.8975, 2055.8975, ...], [3583.8975, 3587.8975, 3591.8975, ...], ... ], [ [1023.8974, 1027.8975, 1031.8975, ...], [2559.8975, 2563.8975, 2567.8975, ...], [4095.8975, 4099.8975, 4103.8975, ...], ... ], ] sum = 55401.246094 ggml_debug: kqv_merged_cont-23 = (f32) CONT(kqv_merged-23{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1026, 3.8974, 7.8974, ...], [24575.8965, 24579.8965, 24583.8965, ...], [49151.8984, 49155.8984, 49159.8984, ...], ], ] sum = 221219.093750 ggml_debug: kqv_out-23 = (f32) MUL_MAT(blk.23.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-23{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0078, 3.9922, 7.9922, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: norm-23 = (f32) NORM(kqv_out-23{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0387, 3.9613, 7.9613, ...], [24575.9609, 24579.9609, 24583.9609, ...], [49151.9609, 49155.9609, 49159.9609, ...], ], ] sum = 221219.656250 ggml_debug: attn_out_norm-23 = (f32) MUL(norm-23{6144, 3, 1, 1}, blk.23.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0168, 3.9832, 7.9832, ...], [24575.9824, 24579.9824, 24583.9824, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_moe_logits-23 = (f32) MUL_MAT(blk.23.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.6748, 4.6748, 8.6748, ...], [ 64.6748, 68.6748, 72.6748, ...], [128.6748, 132.6748, 136.6748, ...], ], ] sum = 618.073242 ggml_debug: ffn_moe_probs-23 = (f32) SOFT_MAX(ffn_moe_logits-23{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.1196, 4.1196, 8.1196, ...], [ 64.1196, 68.1196, 72.1196, ...], [128.1196, 132.1196, 136.1196, ...], ], ] sum = 613.076538 ggml_debug: ffn_moe_argsort-23 = (i32) ARGSORT(ffn_moe_probs-23{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-23{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: ffn_moe_up-23 = (f32) MUL_MAT_ID(blk.23.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0282, 4.0282, 8.0282, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_gate-23 = (f32) MUL_MAT_ID(blk.23.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3581, 3.6419, 7.6419, ...], [43007.6406, 43011.6406, 43015.6406, ...], [86015.6406, 86019.6406, 86023.6406, ...], ], ] sum = 387104.750000 ggml_debug: ffn_moe_silu-23 = (f32) UNARY(ffn_moe_gate-23{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1473, 3.8527, 7.8527, ...], [43007.8516, 43011.8516, 43015.8516, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.656250 ggml_debug: ffn_moe_gate_par-23 = (f32) MUL(ffn_moe_up-23{10752, 3, 1, 1}, ffn_moe_silu-23{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0042, 3.9958, 7.9958, ...], [43007.9961, 43011.9961, 43015.9961, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.968750 ggml_debug: ffn_moe_down-23 = (f32) MUL_MAT_ID(blk.23.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-23{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1820, 3.8180, 7.8180, ...], [24575.8184, 24579.8184, 24583.8184, ...], [49151.8164, 49155.8164, 49159.8164, ...], ], ] sum = 221218.359375 ggml_debug: ffn_moe_probs-23 (reshaped) = (f32) RESHAPE(ffn_moe_probs-23{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.1196], [ 4.1196], [ 8.1196], ... ], [ [ 64.1196], [ 68.1196], [ 72.1196], ... ], [ [128.1196], [132.1196], [136.1196], ... ], ] sum = 613.076538 ggml_debug: ffn_moe_weights-23 = (f32) GET_ROWS(ffn_moe_probs-23 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1196], [ 4.1196], [ 8.1196], ... ], [ [ 16.1196], [ 20.1196], [ 24.1196], ... ], [ [ 32.1196], [ 36.1196], [ 40.1196], ... ], ] sum = 181.076538 ggml_debug: ffn_moe_weights-23 (reshaped) = (f32) RESHAPE(ffn_moe_weights-23{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1196, 4.1196, 8.1196, ...], [ 16.1196, 20.1196, 24.1196, ...], [ 32.1196, 36.1196, 40.1196, ...], ], ] sum = 181.076538 ggml_debug: ffn_moe_weights_sum-23 = (f32) SUM_ROWS(ffn_moe_weights-23 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3622], [ 4.3622], [ 8.3622], ], ] sum = 13.086654 ggml_debug: ffn_moe_weights_norm-23 = (f32) DIV(ffn_moe_weights-23 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-23{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3302, 4.3302, 8.3302, ...], [ 16.3302, 20.3302, 24.3302, ...], [ 32.3302, 36.3302, 40.3302, ...], ], ] sum = 182.972076 ggml_debug: ffn_moe_weights_norm-23 (view) = (f32) VIEW(ffn_moe_weights_norm-23{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3302], [ 16.3302], [ 32.3302], ], ] sum = 48.990692 ggml_debug: ffn_moe_weighted-23 = (f32) MUL(ffn_moe_down-23{6144, 3, 1, 1}, ffn_moe_weights_norm-23 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0601, 3.9399, 7.9399, ...], [24575.9395, 24579.9395, 24583.9395, ...], [49151.9414, 49155.9414, 49159.9414, ...], ], ] sum = 221219.453125 ggml_debug: ffn_moe_up-23 = (f32) MUL_MAT_ID(blk.23.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2814, 4.2814, 8.2814, ...], [43008.2812, 43012.2812, 43016.2812, ...], [86016.2812, 86020.2812, 86024.2812, ...], ], ] sum = 387110.531250 ggml_debug: ffn_moe_gate-23 = (f32) MUL_MAT_ID(blk.23.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0070, 4.0070, 8.0070, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_silu-23 = (f32) UNARY(ffn_moe_gate-23{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0035, 4.0035, 8.0035, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_gate_par-23 = (f32) MUL(ffn_moe_up-23{10752, 3, 1, 1}, ffn_moe_silu-23{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0010, 4.0010, 8.0010, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-23 = (f32) MUL_MAT_ID(blk.23.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-23{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2471, 3.7529, 7.7529, ...], [24575.7520, 24579.7520, 24583.7520, ...], [49151.7539, 49155.7539, 49159.7539, ...], ], ] sum = 221217.765625 ggml_debug: ffn_moe_weights_norm-23 (view) = (f32) VIEW(ffn_moe_weights_norm-23{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2628], [ 16.2628], [ 32.2628], ], ] sum = 48.788311 ggml_debug: ffn_moe_weighted-23 = (f32) MUL(ffn_moe_down-23{6144, 3, 1, 1}, ffn_moe_weights_norm-23 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0649, 3.9351, 7.9351, ...], [24575.9355, 24579.9355, 24583.9355, ...], [49151.9336, 49155.9336, 49159.9336, ...], ], ] sum = 221219.421875 ggml_debug: ffn_moe_out-23 = (f32) ADD(ffn_moe_weighted-23{6144, 3, 1, 1}, ffn_moe_weighted-23{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1250, 3.8750, 7.8750, ...], [24575.8750, 24579.8750, 24583.8750, ...], [49151.8750, 49155.8750, 49159.8750, ...], ], ] sum = 221218.875000 ggml_debug: ffn_moe_up-23 = (f32) MUL_MAT_ID(blk.23.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0421, 4.0421, 8.0421, ...], [43008.0430, 43012.0430, 43016.0430, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.343750 ggml_debug: ffn_moe_gate-23 = (f32) MUL_MAT_ID(blk.23.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0179, 3.9821, 7.9821, ...], [43007.9805, 43011.9805, 43015.9805, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_silu-23 = (f32) UNARY(ffn_moe_gate-23{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0089, 3.9911, 7.9911, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_gate_par-23 = (f32) MUL(ffn_moe_up-23{10752, 3, 1, 1}, ffn_moe_silu-23{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0004, 3.9996, 7.9996, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-23 = (f32) MUL_MAT_ID(blk.23.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-23{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1688, 3.8312, 7.8312, ...], [24575.8320, 24579.8320, 24583.8320, ...], [49151.8320, 49155.8320, 49159.8320, ...], ], ] sum = 221218.468750 ggml_debug: ffn_moe_weights_norm-23 (view) = (f32) VIEW(ffn_moe_weights_norm-23{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2154], [ 16.2154], [ 32.2154], ], ] sum = 48.646156 ggml_debug: ffn_moe_weighted-23 = (f32) MUL(ffn_moe_down-23{6144, 3, 1, 1}, ffn_moe_weights_norm-23 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0364, 3.9636, 7.9636, ...], [24575.9629, 24579.9629, 24583.9629, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.687500 ggml_debug: ffn_moe_out-23 = (f32) ADD(ffn_moe_out-23{6144, 3, 1, 1}, ffn_moe_weighted-23{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1614, 3.8386, 7.8386, ...], [24575.8379, 24579.8379, 24583.8379, ...], [49151.8398, 49155.8398, 49159.8398, ...], ], ] sum = 221218.562500 ggml_debug: ffn_moe_up-23 = (f32) MUL_MAT_ID(blk.23.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4506, 4.4506, 8.4506, ...], [43008.4492, 43012.4492, 43016.4492, ...], [86016.4531, 86020.4531, 86024.4531, ...], ], ] sum = 387112.062500 ggml_debug: ffn_moe_gate-23 = (f32) MUL_MAT_ID(blk.23.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-23{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5777, 4.5777, 8.5777, ...], [43008.5781, 43012.5781, 43016.5781, ...], [86016.5781, 86020.5781, 86024.5781, ...], ], ] sum = 387113.187500 ggml_debug: ffn_moe_silu-23 = (f32) UNARY(ffn_moe_gate-23{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.3700, 4.3700, 8.3700, ...], [43008.3711, 43012.3711, 43016.3711, ...], [86016.3672, 86020.3672, 86024.3672, ...], ], ] sum = 387111.343750 ggml_debug: ffn_moe_gate_par-23 = (f32) MUL(ffn_moe_up-23{10752, 3, 1, 1}, ffn_moe_silu-23{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1668, 4.1668, 8.1668, ...], [43008.1680, 43012.1680, 43016.1680, ...], [86016.1641, 86020.1641, 86024.1641, ...], ], ] sum = 387109.468750 ggml_debug: ffn_moe_down-23 = (f32) MUL_MAT_ID(blk.23.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-23{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0099, 3.9901, 7.9901, ...], [24575.9902, 24579.9902, 24583.9902, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.906250 ggml_debug: ffn_moe_weights_norm-23 (view) = (f32) VIEW(ffn_moe_weights_norm-23{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1916], [ 16.1916], [ 32.1916], ], ] sum = 48.574841 ggml_debug: ffn_moe_weighted-23 = (f32) MUL(ffn_moe_down-23{6144, 3, 1, 1}, ffn_moe_weights_norm-23 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0019, 3.9981, 7.9981, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: ffn_moe_out-23 = (f32) ADD(ffn_moe_out-23{6144, 3, 1, 1}, ffn_moe_weighted-23{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1633, 3.8367, 7.8367, ...], [24575.8359, 24579.8359, 24583.8359, ...], [49151.8359, 49155.8359, 49159.8359, ...], ], ] sum = 221218.531250 ggml_debug: ffn_inp-23 = (f32) ADD(kqv_out-23{6144, 3, 1, 1}, l_out-22{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.8925, 3.1075, 7.1075, ...], [24575.1074, 24579.1074, 24583.1074, ...], [49151.1094, 49155.1094, 49159.1094, ...], ], ] sum = 221211.984375 ggml_debug: l_out-23 = (f32) ADD(ffn_moe_out-23{6144, 3, 1, 1}, ffn_inp-23{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.0558, 2.9442, 6.9442, ...], [24574.9434, 24578.9434, 24582.9434, ...], [49150.9453, 49154.9453, 49158.9453, ...], ], ] sum = 221210.500000 ggml_debug: norm-24 = (f32) NORM(l_out-23{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.7623, 3.2377, 7.2377, ...], [24575.2383, 24579.2383, 24583.2383, ...], [49151.2383, 49155.2383, 49159.2383, ...], ], ] sum = 221213.125000 ggml_debug: attn_norm-24 = (f32) MUL(norm-24{6144, 3, 1, 1}, blk.24.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2308, 3.7692, 7.7692, ...], [24575.7695, 24579.7695, 24583.7695, ...], [49151.7695, 49155.7695, 49159.7695, ...], ], ] sum = 221217.921875 ggml_debug: wqkv-24 = (f32) MUL_MAT(blk.24.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-24{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.0330, 3.9670, 7.9670, ...], [32767.9668, 32771.9688, 32775.9688, ...], [65535.9688, 65539.9688, 65543.9688, ...], ], ] sum = 294947.718750 ggml_debug: wqkv_clamped-24 = (f32) CLAMP(wqkv-24{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.0330, 3.9670, 7.9670, ...], [32767.9668, 32771.9688, 32775.9688, ...], [65535.9688, 65539.9688, 65543.9688, ...], ], ] sum = 294947.718750 ggml_debug: wqkv_clamped-24 (view) = (f32) VIEW(wqkv_clamped-24{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0330, 3.9670, 7.9670, ...], [32767.9668, 32771.9688, 32775.9688, ...], [65535.9688, 65539.9688, 65543.9688, ...], ], ] sum = 294947.718750 ggml_debug: Qcur-24 = (f32) CONT(wqkv_clamped-24 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0330, 3.9670, 7.9670, ...], [24575.9668, 24579.9668, 24583.9668, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], ] sum = 221219.718750 ggml_debug: Qcur-24 (reshaped) = (f32) RESHAPE(Qcur-24{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0330, 3.9670, 7.9670, ...], [511.9670, 515.9670, 519.9670, ...], [1023.9670, 1027.9670, 1031.9670, ...], ... ], [ [24575.9668, 24579.9668, 24583.9668, ...], [25087.9668, 25091.9668, 25095.9668, ...], [25599.9668, 25603.9668, 25607.9668, ...], ... ], [ [49151.9688, 49155.9688, 49159.9688, ...], [49663.9688, 49667.9688, 49671.9688, ...], [50175.9688, 50179.9688, 50183.9688, ...], ... ], ] sum = 677483.250000 ggml_debug: Qcur-24 = (f32) ROPE(Qcur-24 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.0330, 3.9670, 7.9670, ...], [511.9670, 515.9670, 519.9670, ...], [1023.9670, 1027.9670, 1031.9670, ...], ... ], [ [24575.9668, 24579.9668, 24583.9668, ...], [25087.9668, 25091.9668, 25095.9668, ...], [25599.9668, 25603.9668, 25607.9668, ...], ... ], [ [49151.9688, 49155.9688, 49159.9688, ...], [49663.9688, 49667.9688, 49671.9688, ...], [50175.9688, 50179.9688, 50183.9688, ...], ... ], ] sum = 677483.250000 ggml_debug: wqkv_clamped-24 (view) = (f32) VIEW(wqkv_clamped-24{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.7557, 4.7557, 8.7557, ...], [32768.7539, 32772.7539, 32776.7539, ...], [65536.7578, 65540.7578, 65544.7578, ...], ], ] sum = 294954.781250 ggml_debug: Kcur-24 = (f32) CONT(wqkv_clamped-24 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.7557, 4.7557, 8.7557, ...], [4096.7559, 4100.7559, 4104.7559, ...], [8192.7559, 8196.7559, 8200.7559, ...], ], ] sum = 36906.804688 ggml_debug: Kcur-24 (reshaped) = (f32) RESHAPE(Kcur-24{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.7557, 4.7557, 8.7557, ...], [512.7557, 516.7557, 520.7557, ...], [1024.7556, 1028.7556, 1032.7556, ...], ... ], [ [4096.7559, 4100.7559, 4104.7559, ...], [4608.7559, 4612.7559, 4616.7559, ...], [5120.7559, 5124.7559, 5128.7559, ...], ... ], [ [8192.7559, 8196.7559, 8200.7559, ...], [8704.7559, 8708.7559, 8712.7559, ...], [9216.7559, 9220.7559, 9224.7559, ...], ... ], ] sum = 124544.429688 ggml_debug: Kcur-24 = (f32) ROPE(Kcur-24 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.7557, 4.7557, 8.7557, ...], [512.7557, 516.7557, 520.7557, ...], [1024.7556, 1028.7556, 1032.7556, ...], ... ], [ [4096.7559, 4100.7559, 4104.7559, ...], [4608.7559, 4612.7559, 4616.7559, ...], [5120.7559, 5124.7559, 5128.7559, ...], ... ], [ [8192.7559, 8196.7559, 8200.7559, ...], [8704.7559, 8708.7559, 8712.7559, ...], [9216.7559, 9220.7559, 9224.7559, ...], ... ], ] sum = 124544.429688 ggml_debug: wqkv_clamped-24 (view) = (f32) VIEW(wqkv_clamped-24{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0652, 4.0652, 8.0652, ...], [32768.0664, 32772.0664, 32776.0664, ...], [65536.0625, 65540.0625, 65544.0625, ...], ], ] sum = 294948.562500 ggml_debug: Vcur-24 = (f32) CONT(wqkv_clamped-24 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0652, 4.0652, 8.0652, ...], [4096.0654, 4100.0654, 4104.0654, ...], [8192.0654, 8196.0654, 8200.0654, ...], ], ] sum = 36900.589844 ggml_debug: k_cache_view-24 = (f16) VIEW(cache_k_l24{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-24 (copy of Kcur-24) = (f16) CPY(Kcur-24{128, 8, 3, 1}, k_cache_view-24{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.7559, 0.7568, 0.7578, ...], ], ] sum = 2.270508 ggml_debug: v_cur_t-24 = (f32) TRANSPOSE(Vcur-24{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0652, 4096.0654, 8192.0654], [ 4.0652, 4100.0654, 8196.0654], [ 8.0652, 4104.0654, 8200.0654], ... ], ] sum = 36900.589844 ggml_debug: v_cache_view-24 = (f16) VIEW(cache_v_l24{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-24 (copy of v_cur_t-24) = (f16) CPY(v_cur_t-24{3, 1024, 1, 1}, v_cache_view-24{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0652, 0.0654, 0.0655], [ 0.1305, 0.1307, 0.1310], [ 0.2610, 0.2615, 0.2620], ... ], ] sum = 1.372742 ggml_debug: v-24 = (f16) VIEW(cache_v_l24{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0652, 0.0654, 0.0655, ...], [ 0.1305, 0.1307, 0.1310, ...], [ 0.2610, 0.2615, 0.2620, ...], ... ], [ [ 0.0652, 0.0654, 0.0655, ...], [ 0.1305, 0.1307, 0.1310, ...], [ 0.2610, 0.2615, 0.2620, ...], ... ], [ [ 0.0652, 0.0654, 0.0655, ...], [ 0.1305, 0.1307, 0.1310, ...], [ 0.2610, 0.2615, 0.2620, ...], ... ], ... ] sum = 4.118225 ggml_debug: k-24 = (f16) VIEW(cache_k_l24{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.7559, 0.7568, 0.7578, ...], [ 3.0234, 3.0273, 3.0312, ...], [ 12.0938, 12.1094, 12.1250, ...], ... ], [ [ 0.8809, 0.8818, 0.8828, ...], [ 3.5234, 3.5273, 3.5312, ...], [ 14.0938, 14.1094, 14.1250, ...], ... ], [ [ 1.0117, 1.0137, 1.0156, ...], [ 4.0469, 4.0547, 4.0625, ...], [ 16.1875, 16.2188, 16.2500, ...], ... ], ... ] sum = 167.097656 ggml_debug: q-24 = (f32) PERMUTE(Qcur-24{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.0330, 3.9670, 7.9670, ...], [24575.9668, 24579.9668, 24583.9668, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], [ [511.9670, 515.9670, 519.9670, ...], [25087.9668, 25091.9668, 25095.9668, ...], [49663.9688, 49667.9688, 49671.9688, ...], ], [ [1023.9670, 1027.9670, 1031.9670, ...], [25599.9668, 25603.9668, 25607.9668, ...], [50175.9688, 50179.9688, 50183.9688, ...], ], ... ] sum = 677483.250000 ggml_debug: kq-24 = (f32) MUL_MAT(k-24{128, 32, 8, 1}, q-24{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [-21.3438, -17.3438, -13.3438, ...], [106.6562, 110.6562, 114.6562, ...], [234.6562, 238.6562, 242.6562, ...], ], [ [362.6562, 366.6562, 370.6562, ...], [490.6562, 494.6562, 498.6562, ...], [618.6562, 622.6562, 626.6562, ...], ], [ [746.6562, 750.6562, 754.6562, ...], [874.6562, 878.6562, 882.6562, ...], [1002.6562, 1006.6562, 1010.6562, ...], ], ... ] sum = 13355.718750 ggml_debug: kq_soft_max_ext-24 = (f32) SOFT_MAX(kq-24{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-24 = (f32) MUL_MAT(v-24{32, 128, 8, 1}, kq_soft_max_ext-24{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0652, 4.0652, 8.0652, ...], [512.0652, 516.0652, 520.0652, ...], [1024.0652, 1028.0652, 1032.0652, ...], ], [ [1536.0652, 1540.0652, 1544.0652, ...], [2048.0652, 2052.0652, 2056.0652, ...], [2560.0652, 2564.0652, 2568.0652, ...], ], [ [3072.0652, 3076.0652, 3080.0652, ...], [3584.0652, 3588.0652, 3592.0652, ...], [4096.0654, 4100.0654, 4104.0654, ...], ], ... ] sum = 55405.765625 ggml_debug: kqv_merged-24 = (f32) PERMUTE(kqv-24{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0652, 4.0652, 8.0652, ...], [1536.0652, 1540.0652, 1544.0652, ...], [3072.0652, 3076.0652, 3080.0652, ...], ... ], [ [512.0652, 516.0652, 520.0652, ...], [2048.0652, 2052.0652, 2056.0652, ...], [3584.0652, 3588.0652, 3592.0652, ...], ... ], [ [1024.0652, 1028.0652, 1032.0652, ...], [2560.0652, 2564.0652, 2568.0652, ...], [4096.0654, 4100.0654, 4104.0654, ...], ... ], ] sum = 55405.769531 ggml_debug: kqv_merged_cont-24 = (f32) CONT(kqv_merged-24{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0652, 4.0652, 8.0652, ...], [24576.0645, 24580.0645, 24584.0645, ...], [49152.0664, 49156.0664, 49160.0664, ...], ], ] sum = 221220.578125 ggml_debug: kqv_out-24 = (f32) MUL_MAT(blk.24.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-24{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0622, 4.0622, 8.0622, ...], [24576.0625, 24580.0625, 24584.0625, ...], [49152.0625, 49156.0625, 49160.0625, ...], ], ] sum = 221220.562500 ggml_debug: norm-24 = (f32) NORM(kqv_out-24{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1995, 4.1995, 8.1995, ...], [24576.1992, 24580.1992, 24584.1992, ...], [49152.1992, 49156.1992, 49160.1992, ...], ], ] sum = 221221.796875 ggml_debug: attn_out_norm-24 = (f32) MUL(norm-24{6144, 3, 1, 1}, blk.24.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0869, 4.0869, 8.0869, ...], [24576.0859, 24580.0859, 24584.0859, ...], [49152.0859, 49156.0859, 49160.0859, ...], ], ] sum = 221220.781250 ggml_debug: ffn_moe_logits-24 = (f32) MUL_MAT(blk.24.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.1821, 3.8179, 7.8179, ...], [ 63.8179, 67.8179, 71.8179, ...], [127.8179, 131.8179, 135.8179, ...], ], ] sum = 610.360840 ggml_debug: ffn_moe_probs-24 = (f32) SOFT_MAX(ffn_moe_logits-24{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0465, 4.0465, 8.0465, ...], [ 64.0465, 68.0465, 72.0465, ...], [128.0465, 132.0465, 136.0465, ...], ], ] sum = 612.418518 ggml_debug: ffn_moe_argsort-24 = (i32) ARGSORT(ffn_moe_probs-24{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 13.0000, 17.0000, 21.0000, ...], [ 77.0000, 81.0000, 85.0000, ...], [141.0000, 145.0000, 149.0000, ...], ], ] sum = 729.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-24{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 13.0000, 17.0000, 21.0000, ...], [ 77.0000, 81.0000, 85.0000, ...], [141.0000, 145.0000, 149.0000, ...], ], ] sum = 729.000000 ggml_debug: ffn_moe_up-24 = (f32) MUL_MAT_ID(blk.24.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1774, 4.1774, 8.1774, ...], [43008.1758, 43012.1758, 43016.1758, ...], [86016.1797, 86020.1797, 86024.1797, ...], ], ] sum = 387109.625000 ggml_debug: ffn_moe_gate-24 = (f32) MUL_MAT_ID(blk.24.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1496, 4.1496, 8.1496, ...], [43008.1484, 43012.1484, 43016.1484, ...], [86016.1484, 86020.1484, 86024.1484, ...], ], ] sum = 387109.343750 ggml_debug: ffn_moe_silu-24 = (f32) UNARY(ffn_moe_gate-24{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0804, 4.0804, 8.0804, ...], [43008.0820, 43012.0820, 43016.0820, ...], [86016.0781, 86020.0781, 86024.0781, ...], ], ] sum = 387108.687500 ggml_debug: ffn_moe_gate_par-24 = (f32) MUL(ffn_moe_up-24{10752, 3, 1, 1}, ffn_moe_silu-24{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0143, 4.0143, 8.0143, ...], [43008.0156, 43012.0156, 43016.0156, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_down-24 = (f32) MUL_MAT_ID(blk.24.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-24{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0618, 3.9382, 7.9382, ...], [24575.9375, 24579.9375, 24583.9375, ...], [49151.9375, 49155.9375, 49159.9375, ...], ], ] sum = 221219.437500 ggml_debug: ffn_moe_probs-24 (reshaped) = (f32) RESHAPE(ffn_moe_probs-24{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0465], [ 4.0465], [ 8.0465], ... ], [ [ 64.0465], [ 68.0465], [ 72.0465], ... ], [ [128.0465], [132.0465], [136.0465], ... ], ] sum = 612.418518 ggml_debug: ffn_moe_weights-24 = (f32) GET_ROWS(ffn_moe_probs-24 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0964], [ 4.0964], [ 8.0964], ... ], [ [ 16.0964], [ 20.0964], [ 24.0964], ... ], [ [ 32.0964], [ 36.0964], [ 40.0964], ... ], ] sum = 180.867981 ggml_debug: ffn_moe_weights-24 (reshaped) = (f32) RESHAPE(ffn_moe_weights-24{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0964, 4.0964, 8.0964, ...], [ 16.0964, 20.0964, 24.0964, ...], [ 32.0964, 36.0964, 40.0964, ...], ], ] sum = 180.867981 ggml_debug: ffn_moe_weights_sum-24 = (f32) SUM_ROWS(ffn_moe_weights-24 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3520], [ 4.3520], [ 8.3520], ], ] sum = 13.056126 ggml_debug: ffn_moe_weights_norm-24 = (f32) DIV(ffn_moe_weights-24 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-24{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2740, 4.2740, 8.2740, ...], [ 16.2740, 20.2740, 24.2740, ...], [ 32.2740, 36.2740, 40.2740, ...], ], ] sum = 182.465576 ggml_debug: ffn_moe_weights_norm-24 (view) = (f32) VIEW(ffn_moe_weights_norm-24{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2740], [ 16.2740], [ 32.2740], ], ] sum = 48.821854 ggml_debug: ffn_moe_weighted-24 = (f32) MUL(ffn_moe_down-24{6144, 3, 1, 1}, ffn_moe_weights_norm-24 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0169, 3.9831, 7.9831, ...], [24575.9824, 24579.9824, 24583.9824, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_moe_up-24 = (f32) MUL_MAT_ID(blk.24.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1948, 3.8052, 7.8052, ...], [43007.8047, 43011.8047, 43015.8047, ...], [86015.8047, 86019.8047, 86023.8047, ...], ], ] sum = 387106.250000 ggml_debug: ffn_moe_gate-24 = (f32) MUL_MAT_ID(blk.24.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3850, 4.3850, 8.3850, ...], [43008.3867, 43012.3867, 43016.3867, ...], [86016.3828, 86020.3828, 86024.3828, ...], ], ] sum = 387111.437500 ggml_debug: ffn_moe_silu-24 = (f32) UNARY(ffn_moe_gate-24{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2291, 4.2291, 8.2291, ...], [43008.2305, 43012.2305, 43016.2305, ...], [86016.2266, 86020.2266, 86024.2266, ...], ], ] sum = 387110.031250 ggml_debug: ffn_moe_gate_par-24 = (f32) MUL(ffn_moe_up-24{10752, 3, 1, 1}, ffn_moe_silu-24{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0446, 3.9554, 7.9554, ...], [43007.9570, 43011.9570, 43015.9570, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_down-24 = (f32) MUL_MAT_ID(blk.24.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-24{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0430, 4.0430, 8.0430, ...], [24576.0430, 24580.0430, 24584.0430, ...], [49152.0430, 49156.0430, 49160.0430, ...], ], ] sum = 221220.390625 ggml_debug: ffn_moe_weights_norm-24 (view) = (f32) VIEW(ffn_moe_weights_norm-24{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2681], [ 16.2681], [ 32.2681], ], ] sum = 48.804386 ggml_debug: ffn_moe_weighted-24 = (f32) MUL(ffn_moe_down-24{6144, 3, 1, 1}, ffn_moe_weights_norm-24 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0115, 4.0115, 8.0115, ...], [24576.0117, 24580.0117, 24584.0117, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.109375 ggml_debug: ffn_moe_out-24 = (f32) ADD(ffn_moe_weighted-24{6144, 3, 1, 1}, ffn_moe_weighted-24{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0054, 3.9946, 7.9946, ...], [24575.9941, 24579.9941, 24583.9941, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.968750 ggml_debug: ffn_moe_up-24 = (f32) MUL_MAT_ID(blk.24.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.6311, 3.3689, 7.3689, ...], [43007.3672, 43011.3672, 43015.3672, ...], [86015.3672, 86019.3672, 86023.3672, ...], ], ] sum = 387102.312500 ggml_debug: ffn_moe_gate-24 = (f32) MUL_MAT_ID(blk.24.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1127, 4.1127, 8.1127, ...], [43008.1133, 43012.1133, 43016.1133, ...], [86016.1094, 86020.1094, 86024.1094, ...], ], ] sum = 387109.000000 ggml_debug: ffn_moe_silu-24 = (f32) UNARY(ffn_moe_gate-24{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0595, 4.0595, 8.0595, ...], [43008.0586, 43012.0586, 43016.0586, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_gate_par-24 = (f32) MUL(ffn_moe_up-24{10752, 3, 1, 1}, ffn_moe_silu-24{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0376, 3.9624, 7.9624, ...], [43007.9609, 43011.9609, 43015.9609, ...], [86015.9609, 86019.9609, 86023.9609, ...], ], ] sum = 387107.656250 ggml_debug: ffn_moe_down-24 = (f32) MUL_MAT_ID(blk.24.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-24{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0322, 3.9678, 7.9678, ...], [24575.9688, 24579.9688, 24583.9688, ...], [49151.9688, 49155.9688, 49159.9688, ...], ], ] sum = 221219.718750 ggml_debug: ffn_moe_weights_norm-24 (view) = (f32) VIEW(ffn_moe_weights_norm-24{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2438], [ 16.2438], [ 32.2438], ], ] sum = 48.731506 ggml_debug: ffn_moe_weighted-24 = (f32) MUL(ffn_moe_down-24{6144, 3, 1, 1}, ffn_moe_weights_norm-24 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0079, 3.9921, 7.9921, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_out-24 = (f32) ADD(ffn_moe_out-24{6144, 3, 1, 1}, ffn_moe_weighted-24{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0133, 3.9867, 7.9867, ...], [24575.9863, 24579.9863, 24583.9863, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.875000 ggml_debug: ffn_moe_up-24 = (f32) MUL_MAT_ID(blk.24.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1654, 4.1654, 8.1654, ...], [43008.1641, 43012.1641, 43016.1641, ...], [86016.1641, 86020.1641, 86024.1641, ...], ], ] sum = 387109.468750 ggml_debug: ffn_moe_gate-24 = (f32) MUL_MAT_ID(blk.24.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-24{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1385, 3.8615, 7.8615, ...], [43007.8633, 43011.8633, 43015.8633, ...], [86015.8594, 86019.8594, 86023.8594, ...], ], ] sum = 387106.750000 ggml_debug: ffn_moe_silu-24 = (f32) UNARY(ffn_moe_gate-24{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0645, 3.9355, 7.9355, ...], [43007.9336, 43011.9336, 43015.9336, ...], [86015.9375, 86019.9375, 86023.9375, ...], ], ] sum = 387107.437500 ggml_debug: ffn_moe_gate_par-24 = (f32) MUL(ffn_moe_up-24{10752, 3, 1, 1}, ffn_moe_silu-24{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0107, 3.9893, 7.9893, ...], [43007.9883, 43011.9883, 43015.9883, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-24 = (f32) MUL_MAT_ID(blk.24.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-24{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0745, 4.0745, 8.0745, ...], [24576.0742, 24580.0742, 24584.0742, ...], [49152.0742, 49156.0742, 49160.0742, ...], ], ] sum = 221220.671875 ggml_debug: ffn_moe_weights_norm-24 (view) = (f32) VIEW(ffn_moe_weights_norm-24{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2141], [ 16.2141], [ 32.2141], ], ] sum = 48.642254 ggml_debug: ffn_moe_weighted-24 = (f32) MUL(ffn_moe_down-24{6144, 3, 1, 1}, ffn_moe_weights_norm-24 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0160, 4.0160, 8.0160, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_out-24 = (f32) ADD(ffn_moe_out-24{6144, 3, 1, 1}, ffn_moe_weighted-24{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0027, 4.0027, 8.0027, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_inp-24 = (f32) ADD(kqv_out-24{6144, 3, 1, 1}, l_out-23{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.9935, 3.0065, 7.0065, ...], [24575.0059, 24579.0059, 24583.0059, ...], [49151.0078, 49155.0078, 49159.0078, ...], ], ] sum = 221211.062500 ggml_debug: l_out-24 = (f32) ADD(ffn_moe_out-24{6144, 3, 1, 1}, ffn_inp-24{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.9908, 3.0092, 7.0092, ...], [24575.0098, 24579.0098, 24583.0098, ...], [49151.0078, 49155.0078, 49159.0078, ...], ], ] sum = 221211.062500 ggml_debug: norm-25 = (f32) NORM(l_out-24{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6527, 3.3473, 7.3473, ...], [24575.3477, 24579.3477, 24583.3477, ...], [49151.3477, 49155.3477, 49159.3477, ...], ], ] sum = 221214.109375 ggml_debug: attn_norm-25 = (f32) MUL(norm-25{6144, 3, 1, 1}, blk.25.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1683, 3.8317, 7.8317, ...], [24575.8320, 24579.8320, 24583.8320, ...], [49151.8320, 49155.8320, 49159.8320, ...], ], ] sum = 221218.484375 ggml_debug: wqkv-25 = (f32) MUL_MAT(blk.25.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-25{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 1.5468, 5.5468, 9.5468, ...], [32769.5469, 32773.5469, 32777.5469, ...], [65537.5469, 65541.5469, 65545.5469, ...], ], ] sum = 294961.937500 ggml_debug: wqkv_clamped-25 = (f32) CLAMP(wqkv-25{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 1.5468, 5.5468, 9.5468, ...], [32769.5469, 32773.5469, 32777.5469, ...], [65537.5469, 65541.5469, 65545.5469, ...], ], ] sum = 294961.937500 ggml_debug: wqkv_clamped-25 (view) = (f32) VIEW(wqkv_clamped-25{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 1.5468, 5.5468, 9.5468, ...], [32769.5469, 32773.5469, 32777.5469, ...], [65537.5469, 65541.5469, 65545.5469, ...], ], ] sum = 294961.937500 ggml_debug: Qcur-25 = (f32) CONT(wqkv_clamped-25 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 1.5468, 5.5468, 9.5468, ...], [24577.5469, 24581.5469, 24585.5469, ...], [49153.5469, 49157.5469, 49161.5469, ...], ], ] sum = 221233.921875 ggml_debug: Qcur-25 (reshaped) = (f32) RESHAPE(Qcur-25{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 1.5468, 5.5468, 9.5468, ...], [513.5468, 517.5468, 521.5468, ...], [1025.5468, 1029.5468, 1033.5468, ...], ... ], [ [24577.5469, 24581.5469, 24585.5469, ...], [25089.5469, 25093.5469, 25097.5469, ...], [25601.5469, 25605.5469, 25609.5469, ...], ... ], [ [49153.5469, 49157.5469, 49161.5469, ...], [49665.5469, 49669.5469, 49673.5469, ...], [50177.5469, 50181.5469, 50185.5469, ...], ... ], ] sum = 677525.875000 ggml_debug: Qcur-25 = (f32) ROPE(Qcur-25 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 1.5468, 5.5468, 9.5468, ...], [513.5468, 517.5468, 521.5468, ...], [1025.5468, 1029.5468, 1033.5468, ...], ... ], [ [24577.5469, 24581.5469, 24585.5469, ...], [25089.5469, 25093.5469, 25097.5469, ...], [25601.5469, 25605.5469, 25609.5469, ...], ... ], [ [49153.5469, 49157.5469, 49161.5469, ...], [49665.5469, 49669.5469, 49673.5469, ...], [50177.5469, 50181.5469, 50185.5469, ...], ... ], ] sum = 677525.875000 ggml_debug: wqkv_clamped-25 (view) = (f32) VIEW(wqkv_clamped-25{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.8000, 2.2000, 6.2000, ...], [32766.1992, 32770.1992, 32774.1992, ...], [65534.1992, 65538.2031, 65542.2031, ...], ], ] sum = 294931.812500 ggml_debug: Kcur-25 = (f32) CONT(wqkv_clamped-25 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.8000, 2.2000, 6.2000, ...], [4094.2000, 4098.2002, 4102.2002, ...], [8190.2002, 8194.2002, 8198.2002, ...], ], ] sum = 36883.800781 ggml_debug: Kcur-25 (reshaped) = (f32) RESHAPE(Kcur-25{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -1.8000, 2.2000, 6.2000, ...], [510.2000, 514.2000, 518.2000, ...], [1022.2000, 1026.2000, 1030.2000, ...], ... ], [ [4094.2000, 4098.2002, 4102.2002, ...], [4606.2002, 4610.2002, 4614.2002, ...], [5118.2002, 5122.2002, 5126.2002, ...], ... ], [ [8190.2002, 8194.2002, 8198.2002, ...], [8702.2002, 8706.2002, 8710.2002, ...], [9214.2002, 9218.2002, 9222.2002, ...], ... ], ] sum = 124475.414062 ggml_debug: Kcur-25 = (f32) ROPE(Kcur-25 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -1.8000, 2.2000, 6.2000, ...], [510.2000, 514.2000, 518.2000, ...], [1022.2000, 1026.2000, 1030.2000, ...], ... ], [ [4094.2000, 4098.2002, 4102.2002, ...], [4606.2002, 4610.2002, 4614.2002, ...], [5118.2002, 5122.2002, 5126.2002, ...], ... ], [ [8190.2002, 8194.2002, 8198.2002, ...], [8702.2002, 8706.2002, 8710.2002, ...], [9214.2002, 9218.2002, 9222.2002, ...], ... ], ] sum = 124475.414062 ggml_debug: wqkv_clamped-25 (view) = (f32) VIEW(wqkv_clamped-25{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0521, 3.9479, 7.9479, ...], [32767.9473, 32771.9492, 32775.9492, ...], [65535.9492, 65539.9453, 65543.9453, ...], ], ] sum = 294947.531250 ggml_debug: Vcur-25 = (f32) CONT(wqkv_clamped-25 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0521, 3.9479, 7.9479, ...], [4095.9480, 4099.9478, 4103.9478, ...], [8191.9478, 8195.9482, 8199.9482, ...], ], ] sum = 36899.531250 ggml_debug: k_cache_view-25 = (f16) VIEW(cache_k_l25{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-25 (copy of Kcur-25) = (f16) CPY(Kcur-25{128, 8, 3, 1}, k_cache_view-25{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -1.7998, -1.8018, -1.8037, ...], ], ] sum = -5.405273 ggml_debug: v_cur_t-25 = (f32) TRANSPOSE(Vcur-25{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0521, 4095.9480, 8191.9478], [ 3.9479, 4099.9478, 8195.9482], [ 7.9479, 4103.9478, 8199.9482], ... ], ] sum = 36899.531250 ggml_debug: v_cache_view-25 = (f16) VIEW(cache_v_l25{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-25 (copy of v_cur_t-25) = (f16) CPY(v_cur_t-25{3, 1024, 1, 1}, v_cache_view-25{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0521, -0.0522, -0.0522], [ -0.1042, -0.1043, -0.1044], [ -0.2084, -0.2086, -0.2089], ... ], ] sum = -1.095245 ggml_debug: v-25 = (f16) VIEW(cache_v_l25{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0521, -0.0522, -0.0522, ...], [ -0.1042, -0.1043, -0.1044, ...], [ -0.2084, -0.2086, -0.2089, ...], ... ], [ [ -0.0521, -0.0522, -0.0522, ...], [ -0.1042, -0.1043, -0.1044, ...], [ -0.2084, -0.2086, -0.2089, ...], ... ], [ [ -0.0521, -0.0522, -0.0522, ...], [ -0.1042, -0.1043, -0.1044, ...], [ -0.2084, -0.2086, -0.2089, ...], ... ], ... ] sum = -3.285736 ggml_debug: k-25 = (f16) VIEW(cache_k_l25{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -1.7998, -1.8018, -1.8037, ...], [ -7.1992, -7.2070, -7.2148, ...], [-28.7969, -28.8281, -28.8594, ...], ... ], [ [ -2.0996, -2.1035, -2.1074, ...], [ -8.3984, -8.4141, -8.4297, ...], [-33.5938, -33.6562, -33.7188, ...], ... ], [ [ -2.5996, -2.6035, -2.6074, ...], [-10.3984, -10.4141, -10.4297, ...], [-41.5938, -41.6562, -41.7188, ...], ... ], ... ] sum = -410.053711 ggml_debug: q-25 = (f32) PERMUTE(Qcur-25{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 1.5468, 5.5468, 9.5468, ...], [24577.5469, 24581.5469, 24585.5469, ...], [49153.5469, 49157.5469, 49161.5469, ...], ], [ [513.5468, 517.5468, 521.5468, ...], [25089.5469, 25093.5469, 25097.5469, ...], [49665.5469, 49669.5469, 49673.5469, ...], ], [ [1025.5468, 1029.5468, 1033.5468, ...], [25601.5469, 25605.5469, 25609.5469, ...], [50177.5469, 50181.5469, 50185.5469, ...], ], ... ] sum = 677525.812500 ggml_debug: kq-25 = (f32) MUL_MAT(k-25{128, 32, 8, 1}, q-25{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 1.5146, 5.5146, 9.5146, ...], [129.5146, 133.5146, 137.5146, ...], [257.5146, 261.5146, 265.5146, ...], ], [ [385.5146, 389.5146, 393.5146, ...], [513.5146, 517.5146, 521.5146, ...], [641.5146, 645.5146, 649.5146, ...], ], [ [769.5146, 773.5146, 777.5146, ...], [897.5146, 901.5146, 905.5146, ...], [1025.5146, 1029.5146, 1033.5146, ...], ], ... ] sum = 13972.895508 ggml_debug: kq_soft_max_ext-25 = (f32) SOFT_MAX(kq-25{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-25 = (f32) MUL_MAT(v-25{32, 128, 8, 1}, kq_soft_max_ext-25{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0521, 3.9479, 7.9479, ...], [511.9479, 515.9479, 519.9479, ...], [1023.9479, 1027.9479, 1031.9479, ...], ], [ [1535.9479, 1539.9479, 1543.9479, ...], [2047.9479, 2051.9480, 2055.9480, ...], [2559.9480, 2563.9480, 2567.9480, ...], ], [ [3071.9480, 3075.9480, 3079.9480, ...], [3583.9480, 3587.9480, 3591.9480, ...], [4095.9480, 4099.9478, 4103.9478, ...], ], ... ] sum = 55402.597656 ggml_debug: kqv_merged-25 = (f32) PERMUTE(kqv-25{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0521, 3.9479, 7.9479, ...], [1535.9479, 1539.9479, 1543.9479, ...], [3071.9480, 3075.9480, 3079.9480, ...], ... ], [ [511.9479, 515.9479, 519.9479, ...], [2047.9479, 2051.9480, 2055.9480, ...], [3583.9480, 3587.9480, 3591.9480, ...], ... ], [ [1023.9479, 1027.9479, 1031.9479, ...], [2559.9480, 2563.9480, 2567.9480, ...], [4095.9480, 4099.9478, 4103.9478, ...], ... ], ] sum = 55402.601562 ggml_debug: kqv_merged_cont-25 = (f32) CONT(kqv_merged-25{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0521, 3.9479, 7.9479, ...], [24575.9473, 24579.9473, 24583.9473, ...], [49151.9492, 49155.9492, 49159.9492, ...], ], ] sum = 221219.546875 ggml_debug: kqv_out-25 = (f32) MUL_MAT(blk.25.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-25{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0883, 3.9117, 7.9117, ...], [24575.9121, 24579.9121, 24583.9121, ...], [49151.9102, 49155.9102, 49159.9102, ...], ], ] sum = 221219.187500 ggml_debug: norm-25 = (f32) NORM(kqv_out-25{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.3846, 3.6154, 7.6154, ...], [24575.6152, 24579.6152, 24583.6152, ...], [49151.6172, 49155.6172, 49159.6172, ...], ], ] sum = 221216.562500 ggml_debug: attn_out_norm-25 = (f32) MUL(norm-25{6144, 3, 1, 1}, blk.25.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1705, 3.8295, 7.8295, ...], [24575.8301, 24579.8301, 24583.8301, ...], [49151.8281, 49155.8281, 49159.8281, ...], ], ] sum = 221218.453125 ggml_debug: ffn_moe_logits-25 = (f32) MUL_MAT(blk.25.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 1.8926, 5.8926, 9.8926, ...], [ 65.8926, 69.8926, 73.8926, ...], [129.8926, 133.8926, 137.8926, ...], ], ] sum = 629.033203 ggml_debug: ffn_moe_probs-25 = (f32) SOFT_MAX(ffn_moe_logits-25{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.2610, 4.2610, 8.2610, ...], [ 64.2610, 68.2610, 72.2610, ...], [128.2610, 132.2610, 136.2610, ...], ], ] sum = 614.348816 ggml_debug: ffn_moe_argsort-25 = (i32) ARGSORT(ffn_moe_probs-25{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-25{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: ffn_moe_up-25 = (f32) MUL_MAT_ID(blk.25.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1883, 3.8117, 7.8117, ...], [43007.8125, 43011.8125, 43015.8125, ...], [86015.8125, 86019.8125, 86023.8125, ...], ], ] sum = 387106.312500 ggml_debug: ffn_moe_gate-25 = (f32) MUL_MAT_ID(blk.25.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0669, 4.0669, 8.0669, ...], [43008.0664, 43012.0664, 43016.0664, ...], [86016.0703, 86020.0703, 86024.0703, ...], ], ] sum = 387108.593750 ggml_debug: ffn_moe_silu-25 = (f32) UNARY(ffn_moe_gate-25{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0346, 4.0346, 8.0346, ...], [43008.0352, 43012.0352, 43016.0352, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_gate_par-25 = (f32) MUL(ffn_moe_up-25{10752, 3, 1, 1}, ffn_moe_silu-25{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0065, 3.9935, 7.9935, ...], [43007.9922, 43011.9922, 43015.9922, ...], [86015.9922, 86019.9922, 86023.9922, ...], ], ] sum = 387107.937500 ggml_debug: ffn_moe_down-25 = (f32) MUL_MAT_ID(blk.25.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-25{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0216, 3.9784, 7.9784, ...], [24575.9785, 24579.9785, 24583.9785, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_probs-25 (reshaped) = (f32) RESHAPE(ffn_moe_probs-25{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.2610], [ 4.2610], [ 8.2610], ... ], [ [ 64.2610], [ 68.2610], [ 72.2610], ... ], [ [128.2610], [132.2610], [136.2610], ... ], ] sum = 614.348816 ggml_debug: ffn_moe_weights-25 = (f32) GET_ROWS(ffn_moe_probs-25 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.2610], [ 4.2610], [ 8.2610], ... ], [ [ 16.2610], [ 20.2610], [ 24.2610], ... ], [ [ 32.2610], [ 36.2610], [ 40.2610], ... ], ] sum = 182.348770 ggml_debug: ffn_moe_weights-25 (reshaped) = (f32) RESHAPE(ffn_moe_weights-25{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.2610, 4.2610, 8.2610, ...], [ 16.2610, 20.2610, 24.2610, ...], [ 32.2610, 36.2610, 40.2610, ...], ], ] sum = 182.348770 ggml_debug: ffn_moe_weights_sum-25 = (f32) SUM_ROWS(ffn_moe_weights-25 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.5517], [ 4.5517], [ 8.5517], ], ] sum = 13.655109 ggml_debug: ffn_moe_weights_norm-25 = (f32) DIV(ffn_moe_weights-25 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-25{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.4730, 4.4730, 8.4730, ...], [ 16.4730, 20.4730, 24.4730, ...], [ 32.4730, 36.4730, 40.4730, ...], ], ] sum = 184.257339 ggml_debug: ffn_moe_weights_norm-25 (view) = (f32) VIEW(ffn_moe_weights_norm-25{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.4730], [ 16.4730], [ 32.4730], ], ] sum = 49.419113 ggml_debug: ffn_moe_weighted-25 = (f32) MUL(ffn_moe_down-25{6144, 3, 1, 1}, ffn_moe_weights_norm-25 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0102, 3.9898, 7.9898, ...], [24575.9902, 24579.9902, 24583.9902, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_up-25 = (f32) MUL_MAT_ID(blk.25.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1451, 4.1451, 8.1451, ...], [43008.1445, 43012.1445, 43016.1445, ...], [86016.1484, 86020.1484, 86024.1484, ...], ], ] sum = 387109.312500 ggml_debug: ffn_moe_gate-25 = (f32) MUL_MAT_ID(blk.25.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1958, 3.8042, 7.8042, ...], [43007.8047, 43011.8047, 43015.8047, ...], [86015.8047, 86019.8047, 86023.8047, ...], ], ] sum = 387106.250000 ggml_debug: ffn_moe_silu-25 = (f32) UNARY(ffn_moe_gate-25{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0883, 3.9117, 7.9117, ...], [43007.9102, 43011.9102, 43015.9102, ...], [86015.9141, 86019.9141, 86023.9141, ...], ], ] sum = 387107.187500 ggml_debug: ffn_moe_gate_par-25 = (f32) MUL(ffn_moe_up-25{10752, 3, 1, 1}, ffn_moe_silu-25{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0128, 3.9872, 7.9872, ...], [43007.9883, 43011.9883, 43015.9883, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_down-25 = (f32) MUL_MAT_ID(blk.25.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-25{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0686, 3.9314, 7.9314, ...], [24575.9316, 24579.9316, 24583.9316, ...], [49151.9297, 49155.9297, 49159.9297, ...], ], ] sum = 221219.375000 ggml_debug: ffn_moe_weights_norm-25 (view) = (f32) VIEW(ffn_moe_weights_norm-25{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2430], [ 16.2430], [ 32.2430], ], ] sum = 48.729069 ggml_debug: ffn_moe_weighted-25 = (f32) MUL(ffn_moe_down-25{6144, 3, 1, 1}, ffn_moe_weights_norm-25 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0167, 3.9833, 7.9833, ...], [24575.9824, 24579.9824, 24583.9824, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_moe_out-25 = (f32) ADD(ffn_moe_weighted-25{6144, 3, 1, 1}, ffn_moe_weighted-25{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0269, 3.9731, 7.9731, ...], [24575.9727, 24579.9727, 24583.9727, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.750000 ggml_debug: ffn_moe_up-25 = (f32) MUL_MAT_ID(blk.25.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4455, 3.5545, 7.5545, ...], [43007.5547, 43011.5547, 43015.5547, ...], [86015.5547, 86019.5547, 86023.5547, ...], ], ] sum = 387104.000000 ggml_debug: ffn_moe_gate-25 = (f32) MUL_MAT_ID(blk.25.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1733, 4.1733, 8.1733, ...], [43008.1719, 43012.1719, 43016.1719, ...], [86016.1719, 86020.1719, 86024.1719, ...], ], ] sum = 387109.562500 ggml_debug: ffn_moe_silu-25 = (f32) UNARY(ffn_moe_gate-25{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0942, 4.0942, 8.0942, ...], [43008.0938, 43012.0938, 43016.0938, ...], [86016.0938, 86020.0938, 86024.0938, ...], ], ] sum = 387108.843750 ggml_debug: ffn_moe_gate_par-25 = (f32) MUL(ffn_moe_up-25{10752, 3, 1, 1}, ffn_moe_silu-25{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0419, 3.9581, 7.9581, ...], [43007.9570, 43011.9570, 43015.9570, ...], [86015.9609, 86019.9609, 86023.9609, ...], ], ] sum = 387107.656250 ggml_debug: ffn_moe_down-25 = (f32) MUL_MAT_ID(blk.25.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-25{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1194, 4.1194, 8.1194, ...], [24576.1191, 24580.1191, 24584.1191, ...], [49152.1211, 49156.1211, 49160.1211, ...], ], ] sum = 221221.078125 ggml_debug: ffn_moe_weights_norm-25 (view) = (f32) VIEW(ffn_moe_weights_norm-25{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1557], [ 16.1557], [ 32.1557], ], ] sum = 48.467060 ggml_debug: ffn_moe_weighted-25 = (f32) MUL(ffn_moe_down-25{6144, 3, 1, 1}, ffn_moe_weights_norm-25 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0186, 4.0186, 8.0186, ...], [24576.0195, 24580.0195, 24584.0195, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: ffn_moe_out-25 = (f32) ADD(ffn_moe_out-25{6144, 3, 1, 1}, ffn_moe_weighted-25{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0083, 3.9917, 7.9917, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_up-25 = (f32) MUL_MAT_ID(blk.25.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5767, 4.5767, 8.5767, ...], [43008.5781, 43012.5781, 43016.5781, ...], [86016.5781, 86020.5781, 86024.5781, ...], ], ] sum = 387113.187500 ggml_debug: ffn_moe_gate-25 = (f32) MUL_MAT_ID(blk.25.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-25{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1905, 4.1905, 8.1905, ...], [43008.1914, 43012.1914, 43016.1914, ...], [86016.1875, 86020.1875, 86024.1875, ...], ], ] sum = 387109.687500 ggml_debug: ffn_moe_silu-25 = (f32) UNARY(ffn_moe_gate-25{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1043, 4.1043, 8.1043, ...], [43008.1055, 43012.1055, 43016.1055, ...], [86016.1016, 86020.1016, 86024.1016, ...], ], ] sum = 387108.906250 ggml_debug: ffn_moe_gate_par-25 = (f32) MUL(ffn_moe_up-25{10752, 3, 1, 1}, ffn_moe_silu-25{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0601, 4.0601, 8.0601, ...], [43008.0586, 43012.0586, 43016.0586, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_down-25 = (f32) MUL_MAT_ID(blk.25.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-25{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0431, 4.0431, 8.0431, ...], [24576.0430, 24580.0430, 24584.0430, ...], [49152.0430, 49156.0430, 49160.0430, ...], ], ] sum = 221220.390625 ggml_debug: ffn_moe_weights_norm-25 (view) = (f32) VIEW(ffn_moe_weights_norm-25{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1283], [ 16.1283], [ 32.1283], ], ] sum = 48.384758 ggml_debug: ffn_moe_weighted-25 = (f32) MUL(ffn_moe_down-25{6144, 3, 1, 1}, ffn_moe_weights_norm-25 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0055, 4.0055, 8.0055, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_out-25 = (f32) ADD(ffn_moe_out-25{6144, 3, 1, 1}, ffn_moe_weighted-25{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0028, 3.9972, 7.9972, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49151.9961, 49155.9961, 49159.9961, ...], ], ] sum = 221219.984375 ggml_debug: ffn_inp-25 = (f32) ADD(kqv_out-25{6144, 3, 1, 1}, l_out-24{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.0791, 2.9209, 6.9209, ...], [24574.9199, 24578.9199, 24582.9199, ...], [49150.9219, 49154.9219, 49158.9219, ...], ], ] sum = 221210.296875 ggml_debug: l_out-25 = (f32) ADD(ffn_moe_out-25{6144, 3, 1, 1}, ffn_inp-25{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.0819, 2.9181, 6.9181, ...], [24574.9180, 24578.9180, 24582.9180, ...], [49150.9180, 49154.9180, 49158.9180, ...], ], ] sum = 221210.265625 ggml_debug: norm-26 = (f32) NORM(l_out-25{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6653, 3.3347, 7.3347, ...], [24575.3340, 24579.3340, 24583.3340, ...], [49151.3359, 49155.3359, 49159.3359, ...], ], ] sum = 221214.031250 ggml_debug: attn_norm-26 = (f32) MUL(norm-26{6144, 3, 1, 1}, blk.26.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1793, 3.8207, 7.8207, ...], [24575.8203, 24579.8203, 24583.8203, ...], [49151.8203, 49155.8203, 49159.8203, ...], ], ] sum = 221218.375000 ggml_debug: wqkv-26 = (f32) MUL_MAT(blk.26.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-26{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.4922, 3.5078, 7.5078, ...], [32767.5078, 32771.5078, 32775.5078, ...], [65535.5078, 65539.5078, 65543.5078, ...], ], ] sum = 294943.562500 ggml_debug: wqkv_clamped-26 = (f32) CLAMP(wqkv-26{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.4922, 3.5078, 7.5078, ...], [32767.5078, 32771.5078, 32775.5078, ...], [65535.5078, 65539.5078, 65543.5078, ...], ], ] sum = 294943.562500 ggml_debug: wqkv_clamped-26 (view) = (f32) VIEW(wqkv_clamped-26{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.4922, 3.5078, 7.5078, ...], [32767.5078, 32771.5078, 32775.5078, ...], [65535.5078, 65539.5078, 65543.5078, ...], ], ] sum = 294943.562500 ggml_debug: Qcur-26 = (f32) CONT(wqkv_clamped-26 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.4922, 3.5078, 7.5078, ...], [24575.5078, 24579.5078, 24583.5078, ...], [49151.5078, 49155.5078, 49159.5078, ...], ], ] sum = 221215.562500 ggml_debug: Qcur-26 (reshaped) = (f32) RESHAPE(Qcur-26{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.4922, 3.5078, 7.5078, ...], [511.5078, 515.5078, 519.5078, ...], [1023.5078, 1027.5078, 1031.5078, ...], ... ], [ [24575.5078, 24579.5078, 24583.5078, ...], [25087.5078, 25091.5078, 25095.5078, ...], [25599.5078, 25603.5078, 25607.5078, ...], ... ], [ [49151.5078, 49155.5078, 49159.5078, ...], [49663.5078, 49667.5078, 49671.5078, ...], [50175.5078, 50179.5078, 50183.5078, ...], ... ], ] sum = 677470.625000 ggml_debug: Qcur-26 = (f32) ROPE(Qcur-26 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.4922, 3.5078, 7.5078, ...], [511.5078, 515.5078, 519.5078, ...], [1023.5078, 1027.5078, 1031.5078, ...], ... ], [ [24575.5078, 24579.5078, 24583.5078, ...], [25087.5078, 25091.5078, 25095.5078, ...], [25599.5078, 25603.5078, 25607.5078, ...], ... ], [ [49151.5078, 49155.5078, 49159.5078, ...], [49663.5078, 49667.5078, 49671.5078, ...], [50175.5078, 50179.5078, 50183.5078, ...], ... ], ] sum = 677470.625000 ggml_debug: wqkv_clamped-26 (view) = (f32) VIEW(wqkv_clamped-26{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2158, 3.7842, 7.7842, ...], [32767.7852, 32771.7852, 32775.7852, ...], [65535.7852, 65539.7812, 65543.7812, ...], ], ] sum = 294946.062500 ggml_debug: Kcur-26 = (f32) CONT(wqkv_clamped-26 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2158, 3.7842, 7.7842, ...], [4095.7842, 4099.7842, 4103.7842, ...], [8191.7842, 8195.7842, 8199.7842, ...], ], ] sum = 36898.058594 ggml_debug: Kcur-26 (reshaped) = (f32) RESHAPE(Kcur-26{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -0.2158, 3.7842, 7.7842, ...], [511.7842, 515.7842, 519.7842, ...], [1023.7842, 1027.7842, 1031.7842, ...], ... ], [ [4095.7842, 4099.7842, 4103.7842, ...], [4607.7842, 4611.7842, 4615.7842, ...], [5119.7842, 5123.7842, 5127.7842, ...], ... ], [ [8191.7842, 8195.7842, 8199.7842, ...], [8703.7842, 8707.7842, 8711.7842, ...], [9215.7842, 9219.7842, 9223.7842, ...], ... ], ] sum = 124518.164062 ggml_debug: Kcur-26 = (f32) ROPE(Kcur-26 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -0.2158, 3.7842, 7.7842, ...], [511.7842, 515.7842, 519.7842, ...], [1023.7842, 1027.7842, 1031.7842, ...], ... ], [ [4095.7842, 4099.7842, 4103.7842, ...], [4607.7842, 4611.7842, 4615.7842, ...], [5119.7842, 5123.7842, 5127.7842, ...], ... ], [ [8191.7842, 8195.7842, 8199.7842, ...], [8703.7842, 8707.7842, 8711.7842, ...], [9215.7842, 9219.7842, 9223.7842, ...], ... ], ] sum = 124518.164062 ggml_debug: wqkv_clamped-26 (view) = (f32) VIEW(wqkv_clamped-26{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0480, 4.0480, 8.0480, ...], [32768.0469, 32772.0469, 32776.0469, ...], [65536.0469, 65540.0469, 65544.0469, ...], ], ] sum = 294948.437500 ggml_debug: Vcur-26 = (f32) CONT(wqkv_clamped-26 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0480, 4.0480, 8.0480, ...], [4096.0479, 4100.0479, 4104.0479, ...], [8192.0479, 8196.0479, 8200.0479, ...], ], ] sum = 36900.429688 ggml_debug: k_cache_view-26 = (f16) VIEW(cache_k_l26{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-26 (copy of Kcur-26) = (f16) CPY(Kcur-26{128, 8, 3, 1}, k_cache_view-26{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -0.2158, -0.2161, -0.2163, ...], ], ] sum = -0.648193 ggml_debug: v_cur_t-26 = (f32) TRANSPOSE(Vcur-26{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0480, 4096.0479, 8192.0479], [ 4.0480, 4100.0479, 8196.0479], [ 8.0480, 4104.0479, 8200.0479], ... ], ] sum = 36900.429688 ggml_debug: v_cache_view-26 = (f16) VIEW(cache_v_l26{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-26 (copy of v_cur_t-26) = (f16) CPY(v_cur_t-26{3, 1024, 1, 1}, v_cache_view-26{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0480, 0.0481, 0.0481], [ 0.0960, 0.0961, 0.0963], [ 0.1920, 0.1923, 0.1925], ... ], ] sum = 1.009369 ggml_debug: v-26 = (f16) VIEW(cache_v_l26{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0480, 0.0481, 0.0481, ...], [ 0.0960, 0.0961, 0.0963, ...], [ 0.1920, 0.1923, 0.1925, ...], ... ], [ [ 0.0480, 0.0481, 0.0481, ...], [ 0.0960, 0.0961, 0.0963, ...], [ 0.1920, 0.1923, 0.1925, ...], ... ], [ [ 0.0480, 0.0481, 0.0481, ...], [ 0.0960, 0.0961, 0.0963, ...], [ 0.1920, 0.1923, 0.1925, ...], ... ], ... ] sum = 3.028107 ggml_debug: k-26 = (f16) VIEW(cache_k_l26{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -0.2158, -0.2161, -0.2163, ...], [ -0.8633, -0.8643, -0.8652, ...], [ -3.4531, -3.4570, -3.4609, ...], ... ], [ [ -0.2471, -0.2473, -0.2476, ...], [ -0.9883, -0.9893, -0.9902, ...], [ -3.9531, -3.9570, -3.9609, ...], ... ], [ [ -0.3066, -0.3071, -0.3076, ...], [ -1.2266, -1.2285, -1.2305, ...], [ -4.9062, -4.9141, -4.9219, ...], ... ], ... ] sum = -48.541992 ggml_debug: q-26 = (f32) PERMUTE(Qcur-26{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.4922, 3.5078, 7.5078, ...], [24575.5078, 24579.5078, 24583.5078, ...], [49151.5078, 49155.5078, 49159.5078, ...], ], [ [511.5078, 515.5078, 519.5078, ...], [25087.5078, 25091.5078, 25095.5078, ...], [49663.5078, 49667.5078, 49671.5078, ...], ], [ [1023.5078, 1027.5078, 1031.5078, ...], [25599.5078, 25603.5078, 25607.5078, ...], [50175.5078, 50179.5078, 50183.5078, ...], ], ... ] sum = 677470.562500 ggml_debug: kq-26 = (f32) MUL_MAT(k-26{128, 32, 8, 1}, q-26{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ -0.1887, 3.8113, 7.8113, ...], [127.8113, 131.8113, 135.8113, ...], [255.8113, 259.8113, 263.8113, ...], ], [ [383.8113, 387.8113, 391.8113, ...], [511.8113, 515.8113, 519.8113, ...], [639.8113, 643.8113, 647.8113, ...], ], [ [767.8113, 771.8113, 775.8113, ...], [895.8113, 899.8113, 903.8113, ...], [1023.8113, 1027.8113, 1031.8113, ...], ], ... ] sum = 13926.907227 ggml_debug: kq_soft_max_ext-26 = (f32) SOFT_MAX(kq-26{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-26 = (f32) MUL_MAT(v-26{32, 128, 8, 1}, kq_soft_max_ext-26{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0480, 4.0480, 8.0480, ...], [512.0480, 516.0480, 520.0480, ...], [1024.0480, 1028.0480, 1032.0480, ...], ], [ [1536.0480, 1540.0480, 1544.0480, ...], [2048.0481, 2052.0481, 2056.0481, ...], [2560.0481, 2564.0481, 2568.0481, ...], ], [ [3072.0481, 3076.0481, 3080.0481, ...], [3584.0481, 3588.0481, 3592.0481, ...], [4096.0479, 4100.0479, 4104.0479, ...], ], ... ] sum = 55405.292969 ggml_debug: kqv_merged-26 = (f32) PERMUTE(kqv-26{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0480, 4.0480, 8.0480, ...], [1536.0480, 1540.0480, 1544.0480, ...], [3072.0481, 3076.0481, 3080.0481, ...], ... ], [ [512.0480, 516.0480, 520.0480, ...], [2048.0481, 2052.0481, 2056.0481, ...], [3584.0481, 3588.0481, 3592.0481, ...], ... ], [ [1024.0480, 1028.0480, 1032.0480, ...], [2560.0481, 2564.0481, 2568.0481, ...], [4096.0479, 4100.0479, 4104.0479, ...], ... ], ] sum = 55405.289062 ggml_debug: kqv_merged_cont-26 = (f32) CONT(kqv_merged-26{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0480, 4.0480, 8.0480, ...], [24576.0488, 24580.0488, 24584.0488, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: kqv_out-26 = (f32) MUL_MAT(blk.26.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-26{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0127, 3.9873, 7.9873, ...], [24575.9863, 24579.9863, 24583.9863, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.875000 ggml_debug: norm-26 = (f32) NORM(kqv_out-26{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0751, 3.9249, 7.9249, ...], [24575.9258, 24579.9258, 24583.9258, ...], [49151.9258, 49155.9258, 49159.9258, ...], ], ] sum = 221219.312500 ggml_debug: attn_out_norm-26 = (f32) MUL(norm-26{6144, 3, 1, 1}, blk.26.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0346, 3.9654, 7.9654, ...], [24575.9648, 24579.9648, 24583.9648, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.703125 ggml_debug: ffn_moe_logits-26 = (f32) MUL_MAT(blk.26.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.2542, 3.7458, 7.7458, ...], [ 63.7458, 67.7458, 71.7458, ...], [127.7458, 131.7458, 135.7458, ...], ], ] sum = 609.712646 ggml_debug: ffn_moe_probs-26 = (f32) SOFT_MAX(ffn_moe_logits-26{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0472, 4.0472, 8.0472, ...], [ 64.0472, 68.0472, 72.0472, ...], [128.0472, 132.0472, 136.0472, ...], ], ] sum = 612.424744 ggml_debug: ffn_moe_argsort-26 = (i32) ARGSORT(ffn_moe_probs-26{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 3.0000, 7.0000, 11.0000, ...], [ 67.0000, 71.0000, 75.0000, ...], [131.0000, 135.0000, 139.0000, ...], ], ] sum = 639.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-26{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 3.0000, 7.0000, 11.0000, ...], [ 67.0000, 71.0000, 75.0000, ...], [131.0000, 135.0000, 139.0000, ...], ], ] sum = 639.000000 ggml_debug: ffn_moe_up-26 = (f32) MUL_MAT_ID(blk.26.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4851, 3.5149, 7.5149, ...], [43007.5156, 43011.5156, 43015.5156, ...], [86015.5156, 86019.5156, 86023.5156, ...], ], ] sum = 387103.625000 ggml_debug: ffn_moe_gate-26 = (f32) MUL_MAT_ID(blk.26.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0529, 3.9471, 7.9471, ...], [43007.9453, 43011.9453, 43015.9453, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.500000 ggml_debug: ffn_moe_silu-26 = (f32) UNARY(ffn_moe_gate-26{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0257, 3.9743, 7.9743, ...], [43007.9727, 43011.9727, 43015.9727, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.750000 ggml_debug: ffn_moe_gate_par-26 = (f32) MUL(ffn_moe_up-26{10752, 3, 1, 1}, ffn_moe_silu-26{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0125, 4.0125, 8.0125, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_down-26 = (f32) MUL_MAT_ID(blk.26.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-26{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0822, 3.9178, 7.9178, ...], [24575.9180, 24579.9180, 24583.9180, ...], [49151.9180, 49155.9180, 49159.9180, ...], ], ] sum = 221219.265625 ggml_debug: ffn_moe_probs-26 (reshaped) = (f32) RESHAPE(ffn_moe_probs-26{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0472], [ 4.0472], [ 8.0472], ... ], [ [ 64.0472], [ 68.0472], [ 72.0472], ... ], [ [128.0472], [132.0472], [136.0472], ... ], ] sum = 612.424744 ggml_debug: ffn_moe_weights-26 = (f32) GET_ROWS(ffn_moe_probs-26 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0965], [ 4.0965], [ 8.0965], ... ], [ [ 16.0965], [ 20.0965], [ 24.0965], ... ], [ [ 32.0965], [ 36.0965], [ 40.0965], ... ], ] sum = 180.868851 ggml_debug: ffn_moe_weights-26 (reshaped) = (f32) RESHAPE(ffn_moe_weights-26{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0965, 4.0965, 8.0965, ...], [ 16.0965, 20.0965, 24.0965, ...], [ 32.0965, 36.0965, 40.0965, ...], ], ] sum = 180.868851 ggml_debug: ffn_moe_weights_sum-26 = (f32) SUM_ROWS(ffn_moe_weights-26 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3316], [ 4.3316], [ 8.3316], ], ] sum = 12.994849 ggml_debug: ffn_moe_weights_norm-26 = (f32) DIV(ffn_moe_weights-26 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-26{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2911, 4.2911, 8.2911, ...], [ 16.2911, 20.2911, 24.2911, ...], [ 32.2911, 36.2911, 40.2911, ...], ], ] sum = 182.619995 ggml_debug: ffn_moe_weights_norm-26 (view) = (f32) VIEW(ffn_moe_weights_norm-26{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2911], [ 16.2911], [ 32.2911], ], ] sum = 48.873333 ggml_debug: ffn_moe_weighted-26 = (f32) MUL(ffn_moe_down-26{6144, 3, 1, 1}, ffn_moe_weights_norm-26 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0239, 3.9761, 7.9761, ...], [24575.9766, 24579.9766, 24583.9766, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_up-26 = (f32) MUL_MAT_ID(blk.26.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.5891, 2.4109, 6.4109, ...], [43006.4102, 43010.4102, 43014.4102, ...], [86014.4141, 86018.4141, 86022.4141, ...], ], ] sum = 387093.687500 ggml_debug: ffn_moe_gate-26 = (f32) MUL_MAT_ID(blk.26.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0456, 3.9544, 7.9544, ...], [43007.9531, 43011.9531, 43015.9531, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_silu-26 = (f32) UNARY(ffn_moe_gate-26{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0223, 3.9777, 7.9777, ...], [43007.9766, 43011.9766, 43015.9766, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.781250 ggml_debug: ffn_moe_gate_par-26 = (f32) MUL(ffn_moe_up-26{10752, 3, 1, 1}, ffn_moe_silu-26{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0354, 4.0354, 8.0354, ...], [43008.0352, 43012.0352, 43016.0352, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.312500 ggml_debug: ffn_moe_down-26 = (f32) MUL_MAT_ID(blk.26.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-26{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0438, 3.9562, 7.9562, ...], [24575.9570, 24579.9570, 24583.9570, ...], [49151.9570, 49155.9570, 49159.9570, ...], ], ] sum = 221219.593750 ggml_debug: ffn_moe_weights_norm-26 (view) = (f32) VIEW(ffn_moe_weights_norm-26{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2397], [ 16.2397], [ 32.2397], ], ] sum = 48.719090 ggml_debug: ffn_moe_weighted-26 = (f32) MUL(ffn_moe_down-26{6144, 3, 1, 1}, ffn_moe_weights_norm-26 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0105, 3.9895, 7.9895, ...], [24575.9902, 24579.9902, 24583.9902, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_out-26 = (f32) ADD(ffn_moe_weighted-26{6144, 3, 1, 1}, ffn_moe_weighted-26{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0344, 3.9656, 7.9656, ...], [24575.9648, 24579.9648, 24583.9648, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.703125 ggml_debug: ffn_moe_up-26 = (f32) MUL_MAT_ID(blk.26.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5118, 4.5118, 8.5118, ...], [43008.5117, 43012.5117, 43016.5117, ...], [86016.5156, 86020.5156, 86024.5156, ...], ], ] sum = 387112.625000 ggml_debug: ffn_moe_gate-26 = (f32) MUL_MAT_ID(blk.26.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3395, 4.3395, 8.3395, ...], [43008.3398, 43012.3398, 43016.3398, ...], [86016.3359, 86020.3359, 86024.3359, ...], ], ] sum = 387111.062500 ggml_debug: ffn_moe_silu-26 = (f32) UNARY(ffn_moe_gate-26{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1983, 4.1983, 8.1983, ...], [43008.1992, 43012.1992, 43016.1992, ...], [86016.1953, 86020.1953, 86024.1953, ...], ], ] sum = 387109.750000 ggml_debug: ffn_moe_gate_par-26 = (f32) MUL(ffn_moe_up-26{10752, 3, 1, 1}, ffn_moe_silu-26{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1015, 4.1015, 8.1015, ...], [43008.1016, 43012.1016, 43016.1016, ...], [86016.1016, 86020.1016, 86024.1016, ...], ], ] sum = 387108.906250 ggml_debug: ffn_moe_down-26 = (f32) MUL_MAT_ID(blk.26.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-26{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1538, 3.8462, 7.8462, ...], [24575.8457, 24579.8457, 24583.8457, ...], [49151.8477, 49155.8477, 49159.8477, ...], ], ] sum = 221218.609375 ggml_debug: ffn_moe_weights_norm-26 (view) = (f32) VIEW(ffn_moe_weights_norm-26{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2396], [ 16.2396], [ 32.2396], ], ] sum = 48.718735 ggml_debug: ffn_moe_weighted-26 = (f32) MUL(ffn_moe_down-26{6144, 3, 1, 1}, ffn_moe_weights_norm-26 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0369, 3.9631, 7.9631, ...], [24575.9629, 24579.9629, 24583.9629, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.671875 ggml_debug: ffn_moe_out-26 = (f32) ADD(ffn_moe_out-26{6144, 3, 1, 1}, ffn_moe_weighted-26{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0713, 3.9287, 7.9287, ...], [24575.9277, 24579.9277, 24583.9277, ...], [49151.9297, 49155.9297, 49159.9297, ...], ], ] sum = 221219.375000 ggml_debug: ffn_moe_up-26 = (f32) MUL_MAT_ID(blk.26.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3755, 3.6245, 7.6245, ...], [43007.6250, 43011.6250, 43015.6250, ...], [86015.6250, 86019.6250, 86023.6250, ...], ], ] sum = 387104.625000 ggml_debug: ffn_moe_gate-26 = (f32) MUL_MAT_ID(blk.26.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-26{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.9841, 4.9841, 8.9841, ...], [43008.9844, 43012.9844, 43016.9844, ...], [86016.9844, 86020.9844, 86024.9844, ...], ], ] sum = 387116.875000 ggml_debug: ffn_moe_silu-26 = (f32) UNARY(ffn_moe_gate-26{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.7163, 4.7163, 8.7163, ...], [43008.7148, 43012.7148, 43016.7148, ...], [86016.7188, 86020.7188, 86024.7188, ...], ], ] sum = 387114.468750 ggml_debug: ffn_moe_gate_par-26 = (f32) MUL(ffn_moe_up-26{10752, 3, 1, 1}, ffn_moe_silu-26{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2690, 3.7310, 7.7310, ...], [43007.7305, 43011.7305, 43015.7305, ...], [86015.7344, 86019.7344, 86023.7344, ...], ], ] sum = 387105.625000 ggml_debug: ffn_moe_down-26 = (f32) MUL_MAT_ID(blk.26.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-26{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1453, 3.8547, 7.8547, ...], [24575.8555, 24579.8555, 24583.8555, ...], [49151.8555, 49155.8555, 49159.8555, ...], ], ] sum = 221218.703125 ggml_debug: ffn_moe_weights_norm-26 (view) = (f32) VIEW(ffn_moe_weights_norm-26{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2296], [ 16.2296], [ 32.2296], ], ] sum = 48.688843 ggml_debug: ffn_moe_weighted-26 = (f32) MUL(ffn_moe_down-26{6144, 3, 1, 1}, ffn_moe_weights_norm-26 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0334, 3.9666, 7.9666, ...], [24575.9668, 24579.9668, 24583.9668, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.703125 ggml_debug: ffn_moe_out-26 = (f32) ADD(ffn_moe_out-26{6144, 3, 1, 1}, ffn_moe_weighted-26{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1047, 3.8953, 7.8953, ...], [24575.8945, 24579.8945, 24583.8945, ...], [49151.8945, 49155.8945, 49159.8945, ...], ], ] sum = 221219.046875 ggml_debug: ffn_inp-26 = (f32) ADD(kqv_out-26{6144, 3, 1, 1}, l_out-25{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.0946, 2.9054, 6.9054, ...], [24574.9062, 24578.9062, 24582.9062, ...], [49150.9062, 49154.9062, 49158.9062, ...], ], ] sum = 221210.156250 ggml_debug: l_out-26 = (f32) ADD(ffn_moe_out-26{6144, 3, 1, 1}, ffn_inp-26{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.1993, 2.8007, 6.8007, ...], [24574.8008, 24578.8008, 24582.8008, ...], [49150.8008, 49154.8008, 49158.8008, ...], ], ] sum = 221209.203125 ggml_debug: norm-27 = (f32) NORM(l_out-26{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6955, 3.3045, 7.3045, ...], [24575.3047, 24579.3047, 24583.3047, ...], [49151.3047, 49155.3047, 49159.3047, ...], ], ] sum = 221213.750000 ggml_debug: attn_norm-27 = (f32) MUL(norm-27{6144, 3, 1, 1}, blk.27.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2092, 3.7908, 7.7908, ...], [24575.7910, 24579.7910, 24583.7910, ...], [49151.7891, 49155.7891, 49159.7891, ...], ], ] sum = 221218.093750 ggml_debug: wqkv-27 = (f32) MUL_MAT(blk.27.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-27{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.0227, 4.0227, 8.0227, ...], [32768.0234, 32772.0234, 32776.0234, ...], [65536.0234, 65540.0234, 65544.0234, ...], ], ] sum = 294948.218750 ggml_debug: wqkv_clamped-27 = (f32) CLAMP(wqkv-27{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.0227, 4.0227, 8.0227, ...], [32768.0234, 32772.0234, 32776.0234, ...], [65536.0234, 65540.0234, 65544.0234, ...], ], ] sum = 294948.218750 ggml_debug: wqkv_clamped-27 (view) = (f32) VIEW(wqkv_clamped-27{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0227, 4.0227, 8.0227, ...], [32768.0234, 32772.0234, 32776.0234, ...], [65536.0234, 65540.0234, 65544.0234, ...], ], ] sum = 294948.218750 ggml_debug: Qcur-27 = (f32) CONT(wqkv_clamped-27 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0227, 4.0227, 8.0227, ...], [24576.0234, 24580.0234, 24584.0234, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], ] sum = 221220.218750 ggml_debug: Qcur-27 (reshaped) = (f32) RESHAPE(Qcur-27{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0227, 4.0227, 8.0227, ...], [512.0227, 516.0227, 520.0227, ...], [1024.0227, 1028.0227, 1032.0227, ...], ... ], [ [24576.0234, 24580.0234, 24584.0234, ...], [25088.0234, 25092.0234, 25096.0234, ...], [25600.0234, 25604.0234, 25608.0234, ...], ... ], [ [49152.0234, 49156.0234, 49160.0234, ...], [49664.0234, 49668.0234, 49672.0234, ...], [50176.0234, 50180.0234, 50184.0234, ...], ... ], ] sum = 677484.625000 ggml_debug: Qcur-27 = (f32) ROPE(Qcur-27 (reshaped){128, 48, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.0227, 4.0227, 8.0227, ...], [512.0227, 516.0227, 520.0227, ...], [1024.0227, 1028.0227, 1032.0227, ...], ... ], [ [24576.0234, 24580.0234, 24584.0234, ...], [25088.0234, 25092.0234, 25096.0234, ...], [25600.0234, 25604.0234, 25608.0234, ...], ... ], [ [49152.0234, 49156.0234, 49160.0234, ...], [49664.0234, 49668.0234, 49672.0234, ...], [50176.0234, 50180.0234, 50184.0234, ...], ... ], ] sum = 677484.625000 ggml_debug: wqkv_clamped-27 (view) = (f32) VIEW(wqkv_clamped-27{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.4531, 2.5469, 6.5469, ...], [32766.5469, 32770.5469, 32774.5469, ...], [65534.5469, 65538.5469, 65542.5469, ...], ], ] sum = 294934.937500 ggml_debug: Kcur-27 = (f32) CONT(wqkv_clamped-27 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.4531, 2.5469, 6.5469, ...], [4094.5469, 4098.5469, 4102.5469, ...], [8190.5469, 8194.5469, 8198.5469, ...], ], ] sum = 36886.921875 ggml_debug: Kcur-27 (reshaped) = (f32) RESHAPE(Kcur-27{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -1.4531, 2.5469, 6.5469, ...], [510.5469, 514.5469, 518.5469, ...], [1022.5469, 1026.5469, 1030.5469, ...], ... ], [ [4094.5469, 4098.5469, 4102.5469, ...], [4606.5469, 4610.5469, 4614.5469, ...], [5118.5469, 5122.5469, 5126.5469, ...], ... ], [ [8190.5469, 8194.5469, 8198.5469, ...], [8702.5469, 8706.5469, 8710.5469, ...], [9214.5469, 9218.5469, 9222.5469, ...], ... ], ] sum = 124484.765625 ggml_debug: Kcur-27 = (f32) ROPE(Kcur-27 (reshaped){128, 8, 3, 1}, CUDA1#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -1.4531, 2.5469, 6.5469, ...], [510.5469, 514.5469, 518.5469, ...], [1022.5469, 1026.5469, 1030.5469, ...], ... ], [ [4094.5469, 4098.5469, 4102.5469, ...], [4606.5469, 4610.5469, 4614.5469, ...], [5118.5469, 5122.5469, 5126.5469, ...], ... ], [ [8190.5469, 8194.5469, 8198.5469, ...], [8702.5469, 8706.5469, 8710.5469, ...], [9214.5469, 9218.5469, 9222.5469, ...], ... ], ] sum = 124484.765625 ggml_debug: wqkv_clamped-27 (view) = (f32) VIEW(wqkv_clamped-27{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0015, 3.9985, 7.9985, ...], [32767.9980, 32772.0000, 32776.0000, ...], [65536.0000, 65540.0000, 65544.0000, ...], ], ] sum = 294948.000000 ggml_debug: Vcur-27 = (f32) CONT(wqkv_clamped-27 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.0015, 3.9985, 7.9985, ...], [4095.9985, 4099.9985, 4103.9985, ...], [8191.9985, 8195.9980, 8199.9980, ...], ], ] sum = 36899.984375 ggml_debug: k_cache_view-27 = (f16) VIEW(cache_k_l27{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-27 (copy of Kcur-27) = (f16) CPY(Kcur-27{128, 8, 3, 1}, k_cache_view-27{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -1.4531, -1.4551, -1.4570, ...], ], ] sum = -4.365234 ggml_debug: v_cur_t-27 = (f32) TRANSPOSE(Vcur-27{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.0015, 4095.9985, 8191.9985], [ 3.9985, 4099.9985, 8195.9980], [ 7.9985, 4103.9985, 8199.9980], ... ], ] sum = 36899.984375 ggml_debug: v_cache_view-27 = (f16) VIEW(cache_v_l27{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-27 (copy of v_cur_t-27) = (f16) CPY(v_cur_t-27{3, 1024, 1, 1}, v_cache_view-27{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.0015, -0.0015, -0.0015], [ -0.0030, -0.0030, -0.0030], [ -0.0060, -0.0060, -0.0061], ... ], ] sum = -0.031743 ggml_debug: v-27 = (f16) VIEW(cache_v_l27{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.0015, -0.0015, -0.0015, ...], [ -0.0030, -0.0030, -0.0030, ...], [ -0.0060, -0.0060, -0.0061, ...], ... ], [ [ -0.0015, -0.0015, -0.0015, ...], [ -0.0030, -0.0030, -0.0030, ...], [ -0.0060, -0.0060, -0.0061, ...], ... ], [ [ -0.0015, -0.0015, -0.0015, ...], [ -0.0030, -0.0030, -0.0030, ...], [ -0.0060, -0.0060, -0.0061, ...], ... ], ... ] sum = -0.095229 ggml_debug: k-27 = (f16) VIEW(cache_k_l27{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -1.4531, -1.4551, -1.4570, ...], [ -5.8125, -5.8203, -5.8281, ...], [-23.2500, -23.2812, -23.3125, ...], ... ], [ [ -1.7031, -1.7051, -1.7070, ...], [ -6.8125, -6.8203, -6.8281, ...], [-27.2500, -27.2812, -27.3125, ...], ... ], [ [ -1.9531, -1.9551, -1.9570, ...], [ -7.8125, -7.8203, -7.8281, ...], [-31.2500, -31.2812, -31.3125, ...], ... ], ... ] sum = -322.259766 ggml_debug: q-27 = (f32) PERMUTE(Qcur-27{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.0227, 4.0227, 8.0227, ...], [24576.0234, 24580.0234, 24584.0234, ...], [49152.0234, 49156.0234, 49160.0234, ...], ], [ [512.0227, 516.0227, 520.0227, ...], [25088.0234, 25092.0234, 25096.0234, ...], [49664.0234, 49668.0234, 49672.0234, ...], ], [ [1024.0227, 1028.0227, 1032.0227, ...], [25600.0234, 25604.0234, 25608.0234, ...], [50176.0234, 50180.0234, 50184.0234, ...], ], ... ] sum = 677484.625000 ggml_debug: kq-27 = (f32) MUL_MAT(k-27{128, 32, 8, 1}, q-27{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [-22.7812, -18.7812, -14.7812, ...], [105.2188, 109.2188, 113.2188, ...], [233.2188, 237.2188, 241.2188, ...], ], [ [361.2188, 365.2188, 369.2188, ...], [489.2188, 493.2188, 497.2188, ...], [617.2188, 621.2188, 625.2188, ...], ], [ [745.2188, 749.2188, 753.2188, ...], [873.2188, 877.2188, 881.2188, ...], [1001.2188, 1005.2188, 1009.2188, ...], ], ... ] sum = 13316.906250 ggml_debug: kq_soft_max_ext-27 = (f32) SOFT_MAX(kq-27{32, 3, 48, 1}, CUDA1#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-27 = (f32) MUL_MAT(v-27{32, 128, 8, 1}, kq_soft_max_ext-27{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.0015, 3.9985, 7.9985, ...], [511.9985, 515.9985, 519.9985, ...], [1023.9985, 1027.9985, 1031.9985, ...], ], [ [1535.9985, 1539.9985, 1543.9985, ...], [2047.9985, 2051.9985, 2055.9985, ...], [2559.9985, 2563.9985, 2567.9985, ...], ], [ [3071.9985, 3075.9985, 3079.9985, ...], [3583.9985, 3587.9985, 3591.9985, ...], [4095.9985, 4099.9985, 4103.9985, ...], ], ... ] sum = 55403.964844 ggml_debug: kqv_merged-27 = (f32) PERMUTE(kqv-27{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0015, 3.9985, 7.9985, ...], [1535.9985, 1539.9985, 1543.9985, ...], [3071.9985, 3075.9985, 3079.9985, ...], ... ], [ [511.9985, 515.9985, 519.9985, ...], [2047.9985, 2051.9985, 2055.9985, ...], [3583.9985, 3587.9985, 3591.9985, ...], ... ], [ [1023.9985, 1027.9985, 1031.9985, ...], [2559.9985, 2563.9985, 2567.9985, ...], [4095.9985, 4099.9985, 4103.9985, ...], ... ], ] sum = 55403.968750 ggml_debug: kqv_merged_cont-27 = (f32) CONT(kqv_merged-27{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0015, 3.9985, 7.9985, ...], [24575.9980, 24579.9980, 24583.9980, ...], [49152.0000, 49156.0000, 49160.0000, ...], ], ] sum = 221220.000000 ggml_debug: kqv_out-27 = (f32) MUL_MAT(blk.27.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-27{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1435, 3.8565, 7.8565, ...], [24575.8574, 24579.8574, 24583.8574, ...], [49151.8555, 49155.8555, 49159.8555, ...], ], ] sum = 221218.718750 ggml_debug: norm-27 = (f32) NORM(kqv_out-27{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6709, 3.3291, 7.3291, ...], [24575.3281, 24579.3281, 24583.3281, ...], [49151.3281, 49155.3281, 49159.3281, ...], ], ] sum = 221213.953125 ggml_debug: attn_out_norm-27 = (f32) MUL(norm-27{6144, 3, 1, 1}, blk.27.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3158, 3.6842, 7.6842, ...], [24575.6836, 24579.6836, 24583.6836, ...], [49151.6836, 49155.6836, 49159.6836, ...], ], ] sum = 221217.171875 ggml_debug: ffn_moe_logits-27 = (f32) MUL_MAT(blk.27.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.9092, 3.0908, 7.0908, ...], [ 63.0908, 67.0908, 71.0908, ...], [127.0908, 131.0908, 135.0908, ...], ], ] sum = 603.817383 ggml_debug: ffn_moe_probs-27 = (f32) SOFT_MAX(ffn_moe_logits-27{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0202, 4.0202, 8.0202, ...], [ 64.0202, 68.0202, 72.0202, ...], [128.0202, 132.0202, 136.0202, ...], ], ] sum = 612.182068 ggml_debug: ffn_moe_argsort-27 = (i32) ARGSORT(ffn_moe_probs-27{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 5.0000, 9.0000, 13.0000, ...], [ 69.0000, 73.0000, 77.0000, ...], [133.0000, 137.0000, 141.0000, ...], ], ] sum = 657.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-27{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 5.0000, 9.0000, 13.0000, ...], [ 69.0000, 73.0000, 77.0000, ...], [133.0000, 137.0000, 141.0000, ...], ], ] sum = 657.000000 ggml_debug: ffn_moe_up-27 = (f32) MUL_MAT_ID(blk.27.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3712, 3.6288, 7.6288, ...], [43007.6289, 43011.6289, 43015.6289, ...], [86015.6250, 86019.6250, 86023.6250, ...], ], ] sum = 387104.625000 ggml_debug: ffn_moe_gate-27 = (f32) MUL_MAT_ID(blk.27.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4247, 3.5753, 7.5753, ...], [43007.5742, 43011.5742, 43015.5742, ...], [86015.5781, 86019.5781, 86023.5781, ...], ], ] sum = 387104.187500 ggml_debug: ffn_moe_silu-27 = (f32) UNARY(ffn_moe_gate-27{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1679, 3.8321, 7.8321, ...], [43007.8320, 43011.8320, 43015.8320, ...], [86015.8359, 86019.8359, 86023.8359, ...], ], ] sum = 387106.500000 ggml_debug: ffn_moe_gate_par-27 = (f32) MUL(ffn_moe_up-27{10752, 3, 1, 1}, ffn_moe_silu-27{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0623, 4.0623, 8.0623, ...], [43008.0625, 43012.0625, 43016.0625, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_down-27 = (f32) MUL_MAT_ID(blk.27.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-27{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.4995, 4.4995, 8.4995, ...], [24576.5000, 24580.5000, 24584.5000, ...], [49152.5000, 49156.5000, 49160.5000, ...], ], ] sum = 221224.500000 ggml_debug: ffn_moe_probs-27 (reshaped) = (f32) RESHAPE(ffn_moe_probs-27{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0202], [ 4.0202], [ 8.0202], ... ], [ [ 64.0202], [ 68.0202], [ 72.0202], ... ], [ [128.0202], [132.0202], [136.0202], ... ], ] sum = 612.182068 ggml_debug: ffn_moe_weights-27 = (f32) GET_ROWS(ffn_moe_probs-27 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1163], [ 4.1163], [ 8.1163], ... ], [ [ 16.1163], [ 20.1163], [ 24.1163], ... ], [ [ 32.1163], [ 36.1163], [ 40.1163], ... ], ] sum = 181.046585 ggml_debug: ffn_moe_weights-27 (reshaped) = (f32) RESHAPE(ffn_moe_weights-27{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1163, 4.1163, 8.1163, ...], [ 16.1163, 20.1163, 24.1163, ...], [ 32.1163, 36.1163, 40.1163, ...], ], ] sum = 181.046585 ggml_debug: ffn_moe_weights_sum-27 = (f32) SUM_ROWS(ffn_moe_weights-27 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.4249], [ 4.4249], [ 8.4249], ], ] sum = 13.274769 ggml_debug: ffn_moe_weights_norm-27 = (f32) DIV(ffn_moe_weights-27 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-27{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2737, 4.2737, 8.2737, ...], [ 16.2737, 20.2737, 24.2737, ...], [ 32.2737, 36.2737, 40.2737, ...], ], ] sum = 182.463013 ggml_debug: ffn_moe_weights_norm-27 (view) = (f32) VIEW(ffn_moe_weights_norm-27{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2737], [ 16.2737], [ 32.2737], ], ] sum = 48.821011 ggml_debug: ffn_moe_weighted-27 = (f32) MUL(ffn_moe_down-27{6144, 3, 1, 1}, ffn_moe_weights_norm-27 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1367, 4.1367, 8.1367, ...], [24576.1367, 24580.1367, 24584.1367, ...], [49152.1367, 49156.1367, 49160.1367, ...], ], ] sum = 221221.234375 ggml_debug: ffn_moe_up-27 = (f32) MUL_MAT_ID(blk.27.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5021, 4.5021, 8.5021, ...], [43008.5039, 43012.5039, 43016.5039, ...], [86016.5000, 86020.5000, 86024.5000, ...], ], ] sum = 387112.500000 ggml_debug: ffn_moe_gate-27 = (f32) MUL_MAT_ID(blk.27.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5635, 4.5635, 8.5635, ...], [43008.5625, 43012.5625, 43016.5625, ...], [86016.5625, 86020.5625, 86024.5625, ...], ], ] sum = 387113.062500 ggml_debug: ffn_moe_silu-27 = (f32) UNARY(ffn_moe_gate-27{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.3591, 4.3591, 8.3591, ...], [43008.3594, 43012.3594, 43016.3594, ...], [86016.3594, 86020.3594, 86024.3594, ...], ], ] sum = 387111.250000 ggml_debug: ffn_moe_gate_par-27 = (f32) MUL(ffn_moe_up-27{10752, 3, 1, 1}, ffn_moe_silu-27{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1803, 4.1803, 8.1803, ...], [43008.1797, 43012.1797, 43016.1797, ...], [86016.1797, 86020.1797, 86024.1797, ...], ], ] sum = 387109.625000 ggml_debug: ffn_moe_down-27 = (f32) MUL_MAT_ID(blk.27.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-27{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0470, 3.9530, 7.9530, ...], [24575.9531, 24579.9531, 24583.9531, ...], [49151.9531, 49155.9531, 49159.9531, ...], ], ] sum = 221219.578125 ggml_debug: ffn_moe_weights_norm-27 (view) = (f32) VIEW(ffn_moe_weights_norm-27{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2721], [ 16.2721], [ 32.2721], ], ] sum = 48.816212 ggml_debug: ffn_moe_weighted-27 = (f32) MUL(ffn_moe_down-27{6144, 3, 1, 1}, ffn_moe_weights_norm-27 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0128, 3.9872, 7.9872, ...], [24575.9863, 24579.9863, 24583.9863, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.875000 ggml_debug: ffn_moe_out-27 = (f32) ADD(ffn_moe_weighted-27{6144, 3, 1, 1}, ffn_moe_weighted-27{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1239, 4.1239, 8.1239, ...], [24576.1230, 24580.1230, 24584.1230, ...], [49152.1250, 49156.1250, 49160.1250, ...], ], ] sum = 221221.125000 ggml_debug: ffn_moe_up-27 = (f32) MUL_MAT_ID(blk.27.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.5296, 2.4704, 6.4704, ...], [43006.4688, 43010.4688, 43014.4688, ...], [86014.4688, 86018.4688, 86022.4688, ...], ], ] sum = 387094.218750 ggml_debug: ffn_moe_gate-27 = (f32) MUL_MAT_ID(blk.27.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4372, 4.4372, 8.4372, ...], [43008.4375, 43012.4375, 43016.4375, ...], [86016.4375, 86020.4375, 86024.4375, ...], ], ] sum = 387111.937500 ggml_debug: ffn_moe_silu-27 = (f32) UNARY(ffn_moe_gate-27{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2656, 4.2656, 8.2656, ...], [43008.2656, 43012.2656, 43016.2656, ...], [86016.2656, 86020.2656, 86024.2656, ...], ], ] sum = 387110.375000 ggml_debug: ffn_moe_gate_par-27 = (f32) MUL(ffn_moe_up-27{10752, 3, 1, 1}, ffn_moe_silu-27{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4063, 3.5937, 7.5937, ...], [43007.5938, 43011.5938, 43015.5938, ...], [86015.5938, 86019.5938, 86023.5938, ...], ], ] sum = 387104.343750 ggml_debug: ffn_moe_down-27 = (f32) MUL_MAT_ID(blk.27.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-27{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2378, 4.2378, 8.2378, ...], [24576.2383, 24580.2383, 24584.2383, ...], [49152.2383, 49156.2383, 49160.2383, ...], ], ] sum = 221222.125000 ggml_debug: ffn_moe_weights_norm-27 (view) = (f32) VIEW(ffn_moe_weights_norm-27{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2475], [ 16.2475], [ 32.2475], ], ] sum = 48.742447 ggml_debug: ffn_moe_weighted-27 = (f32) MUL(ffn_moe_down-27{6144, 3, 1, 1}, ffn_moe_weights_norm-27 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0589, 4.0589, 8.0589, ...], [24576.0586, 24580.0586, 24584.0586, ...], [49152.0586, 49156.0586, 49160.0586, ...], ], ] sum = 221220.531250 ggml_debug: ffn_moe_out-27 = (f32) ADD(ffn_moe_out-27{6144, 3, 1, 1}, ffn_moe_weighted-27{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1828, 4.1828, 8.1828, ...], [24576.1836, 24580.1836, 24584.1836, ...], [49152.1836, 49156.1836, 49160.1836, ...], ], ] sum = 221221.656250 ggml_debug: ffn_moe_up-27 = (f32) MUL_MAT_ID(blk.27.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7545, 3.2455, 7.2455, ...], [43007.2461, 43011.2461, 43015.2461, ...], [86015.2422, 86019.2422, 86023.2422, ...], ], ] sum = 387101.218750 ggml_debug: ffn_moe_gate-27 = (f32) MUL_MAT_ID(blk.27.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-27{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1805, 3.8195, 7.8195, ...], [43007.8203, 43011.8203, 43015.8203, ...], [86015.8203, 86019.8203, 86023.8203, ...], ], ] sum = 387106.375000 ggml_debug: ffn_moe_silu-27 = (f32) UNARY(ffn_moe_gate-27{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0821, 3.9179, 7.9179, ...], [43007.9180, 43011.9180, 43015.9180, ...], [86015.9141, 86019.9141, 86023.9141, ...], ], ] sum = 387107.250000 ggml_debug: ffn_moe_gate_par-27 = (f32) MUL(ffn_moe_up-27{10752, 3, 1, 1}, ffn_moe_silu-27{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0620, 4.0620, 8.0620, ...], [43008.0625, 43012.0625, 43016.0625, ...], [86016.0625, 86020.0625, 86024.0625, ...], ], ] sum = 387108.562500 ggml_debug: ffn_moe_down-27 = (f32) MUL_MAT_ID(blk.27.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-27{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2963, 3.7037, 7.7037, ...], [24575.7031, 24579.7031, 24583.7031, ...], [49151.7031, 49155.7031, 49159.7031, ...], ], ] sum = 221217.328125 ggml_debug: ffn_moe_weights_norm-27 (view) = (f32) VIEW(ffn_moe_weights_norm-27{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2068], [ 16.2068], [ 32.2068], ], ] sum = 48.620335 ggml_debug: ffn_moe_weighted-27 = (f32) MUL(ffn_moe_down-27{6144, 3, 1, 1}, ffn_moe_weights_norm-27 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0613, 3.9387, 7.9387, ...], [24575.9395, 24579.9395, 24583.9395, ...], [49151.9375, 49155.9375, 49159.9375, ...], ], ] sum = 221219.437500 ggml_debug: ffn_moe_out-27 = (f32) ADD(ffn_moe_out-27{6144, 3, 1, 1}, ffn_moe_weighted-27{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1215, 4.1215, 8.1215, ...], [24576.1211, 24580.1211, 24584.1211, ...], [49152.1211, 49156.1211, 49160.1211, ...], ], ] sum = 221221.109375 ggml_debug: ffn_inp-27 = (f32) ADD(kqv_out-27{6144, 3, 1, 1}, l_out-26{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.3428, 2.6572, 6.6572, ...], [24574.6582, 24578.6582, 24582.6582, ...], [49150.6562, 49154.6562, 49158.6562, ...], ], ] sum = 221207.906250 ggml_debug: l_out-27 = (f32) ADD(ffn_moe_out-27{6144, 3, 1, 1}, ffn_inp-27{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.2212, 2.7788, 6.7788, ...], [24574.7793, 24578.7793, 24582.7793, ...], [49150.7773, 49154.7773, 49158.7773, ...], ], ] sum = 221209.015625 ggml_debug: norm-28 = (f32) NORM(CUDA2#l_out-27#0{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6660, 3.3340, 7.3340, ...], [24575.3340, 24579.3340, 24583.3340, ...], [49151.3320, 49155.3320, 49159.3320, ...], ], ] sum = 221214.000000 ggml_debug: attn_norm-28 = (f32) MUL(norm-28{6144, 3, 1, 1}, blk.28.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1860, 3.8140, 7.8140, ...], [24575.8145, 24579.8145, 24583.8145, ...], [49151.8125, 49155.8125, 49159.8125, ...], ], ] sum = 221218.312500 ggml_debug: wqkv-28 = (f32) MUL_MAT(blk.28.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-28{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.3904, 3.6096, 7.6096, ...], [32767.6094, 32771.6094, 32775.6094, ...], [65535.6094, 65539.6094, 65543.6094, ...], ], ] sum = 294944.500000 ggml_debug: wqkv_clamped-28 = (f32) CLAMP(wqkv-28{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.3904, 3.6096, 7.6096, ...], [32767.6094, 32771.6094, 32775.6094, ...], [65535.6094, 65539.6094, 65543.6094, ...], ], ] sum = 294944.500000 ggml_debug: wqkv_clamped-28 (view) = (f32) VIEW(wqkv_clamped-28{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.3904, 3.6096, 7.6096, ...], [32767.6094, 32771.6094, 32775.6094, ...], [65535.6094, 65539.6094, 65543.6094, ...], ], ] sum = 294944.500000 ggml_debug: Qcur-28 = (f32) CONT(wqkv_clamped-28 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.3904, 3.6096, 7.6096, ...], [24575.6094, 24579.6094, 24583.6094, ...], [49151.6094, 49155.6094, 49159.6094, ...], ], ] sum = 221216.484375 ggml_debug: Qcur-28 (reshaped) = (f32) RESHAPE(Qcur-28{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.3904, 3.6096, 7.6096, ...], [511.6096, 515.6096, 519.6096, ...], [1023.6096, 1027.6096, 1031.6096, ...], ... ], [ [24575.6094, 24579.6094, 24583.6094, ...], [25087.6094, 25091.6094, 25095.6094, ...], [25599.6094, 25603.6094, 25607.6094, ...], ... ], [ [49151.6094, 49155.6094, 49159.6094, ...], [49663.6094, 49667.6094, 49671.6094, ...], [50175.6094, 50179.6094, 50183.6094, ...], ... ], ] sum = 677473.562500 ggml_debug: Qcur-28 = (f32) ROPE(Qcur-28 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.3904, 3.6096, 7.6096, ...], [511.6096, 515.6096, 519.6096, ...], [1023.6096, 1027.6096, 1031.6096, ...], ... ], [ [24575.6094, 24579.6094, 24583.6094, ...], [25087.6094, 25091.6094, 25095.6094, ...], [25599.6094, 25603.6094, 25607.6094, ...], ... ], [ [49151.6094, 49155.6094, 49159.6094, ...], [49663.6094, 49667.6094, 49671.6094, ...], [50175.6094, 50179.6094, 50183.6094, ...], ... ], ] sum = 677473.562500 ggml_debug: wqkv_clamped-28 (view) = (f32) VIEW(wqkv_clamped-28{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.2558, 2.7442, 6.7442, ...], [32766.7441, 32770.7461, 32774.7461, ...], [65534.7461, 65538.7422, 65542.7422, ...], ], ] sum = 294936.718750 ggml_debug: Kcur-28 = (f32) CONT(wqkv_clamped-28 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.2558, 2.7442, 6.7442, ...], [4094.7441, 4098.7441, 4102.7441, ...], [8190.7441, 8194.7441, 8198.7441, ...], ], ] sum = 36888.695312 ggml_debug: Kcur-28 (reshaped) = (f32) RESHAPE(Kcur-28{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -1.2558, 2.7442, 6.7442, ...], [510.7442, 514.7442, 518.7442, ...], [1022.7442, 1026.7443, 1030.7443, ...], ... ], [ [4094.7441, 4098.7441, 4102.7441, ...], [4606.7441, 4610.7441, 4614.7441, ...], [5118.7441, 5122.7441, 5126.7441, ...], ... ], [ [8190.7441, 8194.7441, 8198.7441, ...], [8702.7441, 8706.7441, 8710.7441, ...], [9214.7441, 9218.7441, 9222.7441, ...], ... ], ] sum = 124490.070312 ggml_debug: Kcur-28 = (f32) ROPE(Kcur-28 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -1.2558, 2.7442, 6.7442, ...], [510.7442, 514.7442, 518.7442, ...], [1022.7442, 1026.7443, 1030.7443, ...], ... ], [ [4094.7441, 4098.7441, 4102.7441, ...], [4606.7441, 4610.7441, 4614.7441, ...], [5118.7441, 5122.7441, 5126.7441, ...], ... ], [ [8190.7441, 8194.7441, 8198.7441, ...], [8702.7441, 8706.7441, 8710.7441, ...], [9214.7441, 9218.7441, 9222.7441, ...], ... ], ] sum = 124490.070312 ggml_debug: wqkv_clamped-28 (view) = (f32) VIEW(wqkv_clamped-28{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0321, 4.0321, 8.0321, ...], [32768.0312, 32772.0312, 32776.0312, ...], [65536.0312, 65540.0312, 65544.0312, ...], ], ] sum = 294948.281250 ggml_debug: Vcur-28 = (f32) CONT(wqkv_clamped-28 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0321, 4.0321, 8.0321, ...], [4096.0322, 4100.0322, 4104.0322, ...], [8192.0322, 8196.0322, 8200.0322, ...], ], ] sum = 36900.289062 ggml_debug: k_cache_view-28 = (f16) VIEW(cache_k_l28{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-28 (copy of Kcur-28) = (f16) CPY(Kcur-28{128, 8, 3, 1}, k_cache_view-28{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -1.2559, -1.2578, -1.2598, ...], ], ] sum = -3.773438 ggml_debug: v_cur_t-28 = (f32) TRANSPOSE(Vcur-28{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0321, 4096.0322, 8192.0322], [ 4.0321, 4100.0322, 8196.0322], [ 8.0321, 4104.0322, 8200.0322], ... ], ] sum = 36900.285156 ggml_debug: v_cache_view-28 = (f16) VIEW(cache_v_l28{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-28 (copy of v_cur_t-28) = (f16) CPY(v_cur_t-28{3, 1024, 1, 1}, v_cache_view-28{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0321, 0.0322, 0.0323], [ 0.0643, 0.0644, 0.0645], [ 0.1285, 0.1288, 0.1290], ... ], ] sum = 0.676117 ggml_debug: v-28 = (f16) VIEW(cache_v_l28{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0321, 0.0322, 0.0323, ...], [ 0.0643, 0.0644, 0.0645, ...], [ 0.1285, 0.1288, 0.1290, ...], ... ], [ [ 0.0321, 0.0322, 0.0323, ...], [ 0.0643, 0.0644, 0.0645, ...], [ 0.1285, 0.1288, 0.1290, ...], ... ], [ [ 0.0321, 0.0322, 0.0323, ...], [ 0.0643, 0.0644, 0.0645, ...], [ 0.1285, 0.1288, 0.1290, ...], ... ], ... ] sum = 2.028351 ggml_debug: k-28 = (f16) VIEW(cache_k_l28{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -1.2559, -1.2578, -1.2598, ...], [ -5.0234, -5.0312, -5.0391, ...], [-20.0938, -20.1250, -20.1562, ...], ... ], [ [ -1.5059, -1.5078, -1.5098, ...], [ -6.0234, -6.0312, -6.0391, ...], [-24.0938, -24.1250, -24.1562, ...], ... ], [ [ -1.7559, -1.7578, -1.7598, ...], [ -7.0234, -7.0312, -7.0391, ...], [-28.0938, -28.1250, -28.1562, ...], ... ], ... ] sum = -284.976562 ggml_debug: q-28 = (f32) PERMUTE(Qcur-28{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.3904, 3.6096, 7.6096, ...], [24575.6094, 24579.6094, 24583.6094, ...], [49151.6094, 49155.6094, 49159.6094, ...], ], [ [511.6096, 515.6096, 519.6096, ...], [25087.6094, 25091.6094, 25095.6094, ...], [49663.6094, 49667.6094, 49671.6094, ...], ], [ [1023.6096, 1027.6096, 1031.6096, ...], [25599.6094, 25603.6094, 25607.6094, ...], [50175.6094, 50179.6094, 50183.6094, ...], ], ... ] sum = 677473.625000 ggml_debug: kq-28 = (f32) MUL_MAT(k-28{128, 32, 8, 1}, q-28{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 1.8447, 5.8447, 9.8447, ...], [129.8447, 133.8447, 137.8447, ...], [257.8447, 261.8447, 265.8447, ...], ], [ [385.8447, 389.8447, 393.8447, ...], [513.8447, 517.8447, 521.8447, ...], [641.8447, 645.8447, 649.8447, ...], ], [ [769.8447, 773.8447, 777.8447, ...], [897.8447, 901.8447, 905.8447, ...], [1025.8447, 1029.8447, 1033.8447, ...], ], ... ] sum = 13981.807617 ggml_debug: kq_soft_max_ext-28 = (f32) SOFT_MAX(kq-28{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-28 = (f32) MUL_MAT(v-28{32, 128, 8, 1}, kq_soft_max_ext-28{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0321, 4.0321, 8.0321, ...], [512.0321, 516.0321, 520.0321, ...], [1024.0321, 1028.0321, 1032.0321, ...], ], [ [1536.0321, 1540.0321, 1544.0321, ...], [2048.0322, 2052.0322, 2056.0322, ...], [2560.0322, 2564.0322, 2568.0322, ...], ], [ [3072.0322, 3076.0322, 3080.0322, ...], [3584.0322, 3588.0322, 3592.0322, ...], [4096.0322, 4100.0322, 4104.0322, ...], ], ... ] sum = 55404.859375 ggml_debug: kqv_merged-28 = (f32) PERMUTE(kqv-28{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0321, 4.0321, 8.0321, ...], [1536.0321, 1540.0321, 1544.0321, ...], [3072.0322, 3076.0322, 3080.0322, ...], ... ], [ [512.0321, 516.0321, 520.0321, ...], [2048.0322, 2052.0322, 2056.0322, ...], [3584.0322, 3588.0322, 3592.0322, ...], ... ], [ [1024.0321, 1028.0321, 1032.0321, ...], [2560.0322, 2564.0322, 2568.0322, ...], [4096.0322, 4100.0322, 4104.0322, ...], ... ], ] sum = 55404.855469 ggml_debug: kqv_merged_cont-28 = (f32) CONT(kqv_merged-28{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0321, 4.0321, 8.0321, ...], [24576.0312, 24580.0312, 24584.0312, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: kqv_out-28 = (f32) MUL_MAT(blk.28.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-28{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0623, 4.0623, 8.0623, ...], [24576.0625, 24580.0625, 24584.0625, ...], [49152.0625, 49156.0625, 49160.0625, ...], ], ] sum = 221220.562500 ggml_debug: norm-28 = (f32) NORM(kqv_out-28{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1260, 4.1260, 8.1260, ...], [24576.1270, 24580.1270, 24584.1270, ...], [49152.1250, 49156.1250, 49160.1250, ...], ], ] sum = 221221.125000 ggml_debug: attn_out_norm-28 = (f32) MUL(norm-28{6144, 3, 1, 1}, blk.28.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0605, 4.0605, 8.0605, ...], [24576.0605, 24580.0605, 24584.0605, ...], [49152.0586, 49156.0586, 49160.0586, ...], ], ] sum = 221220.546875 ggml_debug: ffn_moe_logits-28 = (f32) MUL_MAT(blk.28.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.2302, 3.7698, 7.7698, ...], [ 63.7698, 67.7698, 71.7698, ...], [127.7698, 131.7698, 135.7698, ...], ], ] sum = 609.927979 ggml_debug: ffn_moe_probs-28 = (f32) SOFT_MAX(ffn_moe_logits-28{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0295, 4.0295, 8.0295, ...], [ 64.0295, 68.0295, 72.0295, ...], [128.0295, 132.0295, 136.0295, ...], ], ] sum = 612.265869 ggml_debug: ffn_moe_argsort-28 = (i32) ARGSORT(ffn_moe_probs-28{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 2.0000, 6.0000, 10.0000, ...], [ 66.0000, 70.0000, 74.0000, ...], [130.0000, 134.0000, 138.0000, ...], ], ] sum = 630.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-28{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 2.0000, 6.0000, 10.0000, ...], [ 66.0000, 70.0000, 74.0000, ...], [130.0000, 134.0000, 138.0000, ...], ], ] sum = 630.000000 ggml_debug: ffn_moe_up-28 = (f32) MUL_MAT_ID(blk.28.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6886, 4.6886, 8.6886, ...], [43008.6875, 43012.6875, 43016.6875, ...], [86016.6875, 86020.6875, 86024.6875, ...], ], ] sum = 387114.187500 ggml_debug: ffn_moe_gate-28 = (f32) MUL_MAT_ID(blk.28.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1064, 3.8936, 7.8936, ...], [43007.8945, 43011.8945, 43015.8945, ...], [86015.8906, 86019.8906, 86023.8906, ...], ], ] sum = 387107.000000 ggml_debug: ffn_moe_silu-28 = (f32) UNARY(ffn_moe_gate-28{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0504, 3.9496, 7.9496, ...], [43007.9492, 43011.9492, 43015.9492, ...], [86015.9531, 86019.9531, 86023.9531, ...], ], ] sum = 387107.562500 ggml_debug: ffn_moe_gate_par-28 = (f32) MUL(ffn_moe_up-28{10752, 3, 1, 1}, ffn_moe_silu-28{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0347, 3.9653, 7.9653, ...], [43007.9648, 43011.9648, 43015.9648, ...], [86015.9688, 86019.9688, 86023.9688, ...], ], ] sum = 387107.718750 ggml_debug: ffn_moe_down-28 = (f32) MUL_MAT_ID(blk.28.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-28{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0169, 4.0169, 8.0169, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_probs-28 (reshaped) = (f32) RESHAPE(ffn_moe_probs-28{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0295], [ 4.0295], [ 8.0295], ... ], [ [ 64.0295], [ 68.0295], [ 72.0295], ... ], [ [128.0295], [132.0295], [136.0295], ... ], ] sum = 612.265869 ggml_debug: ffn_moe_weights-28 = (f32) GET_ROWS(ffn_moe_probs-28 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1929], [ 4.1929], [ 8.1929], ... ], [ [ 16.1929], [ 20.1929], [ 24.1929], ... ], [ [ 32.1929], [ 36.1929], [ 40.1929], ... ], ] sum = 181.736237 ggml_debug: ffn_moe_weights-28 (reshaped) = (f32) RESHAPE(ffn_moe_weights-28{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1929, 4.1929, 8.1929, ...], [ 16.1929, 20.1929, 24.1929, ...], [ 32.1929, 36.1929, 40.1929, ...], ], ] sum = 181.736237 ggml_debug: ffn_moe_weights_sum-28 = (f32) SUM_ROWS(ffn_moe_weights-28 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.6133], [ 4.6133], [ 8.6133], ], ] sum = 13.839875 ggml_debug: ffn_moe_weights_norm-28 = (f32) DIV(ffn_moe_weights-28 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-28{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3146, 4.3146, 8.3146, ...], [ 16.3146, 20.3146, 24.3146, ...], [ 32.3146, 36.3146, 40.3146, ...], ], ] sum = 182.831009 ggml_debug: ffn_moe_weights_norm-28 (view) = (f32) VIEW(ffn_moe_weights_norm-28{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3146], [ 16.3146], [ 32.3146], ], ] sum = 48.943668 ggml_debug: ffn_moe_weighted-28 = (f32) MUL(ffn_moe_down-28{6144, 3, 1, 1}, ffn_moe_weights_norm-28 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0053, 4.0053, 8.0053, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_up-28 = (f32) MUL_MAT_ID(blk.28.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6946, 4.6946, 8.6946, ...], [43008.6953, 43012.6953, 43016.6953, ...], [86016.6953, 86020.6953, 86024.6953, ...], ], ] sum = 387114.250000 ggml_debug: ffn_moe_gate-28 = (f32) MUL_MAT_ID(blk.28.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0265, 4.0265, 8.0266, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.250000 ggml_debug: ffn_moe_silu-28 = (f32) UNARY(ffn_moe_gate-28{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0135, 4.0135, 8.0135, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_gate_par-28 = (f32) MUL(ffn_moe_up-28{10752, 3, 1, 1}, ffn_moe_silu-28{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0093, 4.0093, 8.0093, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_down-28 = (f32) MUL_MAT_ID(blk.28.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-28{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0537, 3.9463, 7.9463, ...], [24575.9453, 24579.9453, 24583.9453, ...], [49151.9453, 49155.9453, 49159.9453, ...], ], ] sum = 221219.500000 ggml_debug: ffn_moe_weights_norm-28 (view) = (f32) VIEW(ffn_moe_weights_norm-28{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3016], [ 16.3016], [ 32.3016], ], ] sum = 48.904861 ggml_debug: ffn_moe_weighted-28 = (f32) MUL(ffn_moe_down-28{6144, 3, 1, 1}, ffn_moe_weights_norm-28 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0162, 3.9838, 7.9838, ...], [24575.9844, 24579.9844, 24583.9844, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_moe_out-28 = (f32) ADD(ffn_moe_weighted-28{6144, 3, 1, 1}, ffn_moe_weighted-28{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0109, 3.9891, 7.9891, ...], [24575.9883, 24579.9883, 24583.9883, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_up-28 = (f32) MUL_MAT_ID(blk.28.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3173, 4.3173, 8.3173, ...], [43008.3164, 43012.3164, 43016.3164, ...], [86016.3203, 86020.3203, 86024.3203, ...], ], ] sum = 387110.843750 ggml_debug: ffn_moe_gate-28 = (f32) MUL_MAT_ID(blk.28.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1699, 4.1699, 8.1699, ...], [43008.1680, 43012.1680, 43016.1680, ...], [86016.1719, 86020.1719, 86024.1719, ...], ], ] sum = 387109.562500 ggml_debug: ffn_moe_silu-28 = (f32) UNARY(ffn_moe_gate-28{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0922, 4.0922, 8.0922, ...], [43008.0938, 43012.0938, 43016.0938, ...], [86016.0938, 86020.0938, 86024.0938, ...], ], ] sum = 387108.843750 ggml_debug: ffn_moe_gate_par-28 = (f32) MUL(ffn_moe_up-28{10752, 3, 1, 1}, ffn_moe_silu-28{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0292, 4.0292, 8.0292, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_down-28 = (f32) MUL_MAT_ID(blk.28.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-28{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1501, 3.8499, 7.8499, ...], [24575.8496, 24579.8496, 24583.8496, ...], [49151.8516, 49155.8516, 49159.8516, ...], ], ] sum = 221218.656250 ggml_debug: ffn_moe_weights_norm-28 (view) = (f32) VIEW(ffn_moe_weights_norm-28{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1930], [ 16.1930], [ 32.1930], ], ] sum = 48.579109 ggml_debug: ffn_moe_weighted-28 = (f32) MUL(ffn_moe_down-28{6144, 3, 1, 1}, ffn_moe_weights_norm-28 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0290, 3.9710, 7.9710, ...], [24575.9707, 24579.9707, 24583.9707, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.734375 ggml_debug: ffn_moe_out-28 = (f32) ADD(ffn_moe_out-28{6144, 3, 1, 1}, ffn_moe_weighted-28{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0399, 3.9601, 7.9601, ...], [24575.9609, 24579.9609, 24583.9609, ...], [49151.9609, 49155.9609, 49159.9609, ...], ], ] sum = 221219.656250 ggml_debug: ffn_moe_up-28 = (f32) MUL_MAT_ID(blk.28.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0264, 4.0264, 8.0264, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.250000 ggml_debug: ffn_moe_gate-28 = (f32) MUL_MAT_ID(blk.28.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-28{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2872, 4.2872, 8.2872, ...], [43008.2891, 43012.2891, 43016.2891, ...], [86016.2891, 86020.2891, 86024.2891, ...], ], ] sum = 387110.593750 ggml_debug: ffn_moe_silu-28 = (f32) UNARY(ffn_moe_gate-28{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1641, 4.1641, 8.1641, ...], [43008.1641, 43012.1641, 43016.1641, ...], [86016.1641, 86020.1641, 86024.1641, ...], ], ] sum = 387109.468750 ggml_debug: ffn_moe_gate_par-28 = (f32) MUL(ffn_moe_up-28{10752, 3, 1, 1}, ffn_moe_silu-28{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0043, 4.0043, 8.0043, ...], [43008.0039, 43012.0039, 43016.0039, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.031250 ggml_debug: ffn_moe_down-28 = (f32) MUL_MAT_ID(blk.28.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-28{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0177, 4.0177, 8.0177, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: ffn_moe_weights_norm-28 (view) = (f32) VIEW(ffn_moe_weights_norm-28{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1908], [ 16.1908], [ 32.1908], ], ] sum = 48.572365 ggml_debug: ffn_moe_weighted-28 = (f32) MUL(ffn_moe_down-28{6144, 3, 1, 1}, ffn_moe_weights_norm-28 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0034, 4.0034, 8.0034, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_moe_out-28 = (f32) ADD(ffn_moe_out-28{6144, 3, 1, 1}, ffn_moe_weighted-28{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0365, 3.9635, 7.9635, ...], [24575.9629, 24579.9629, 24583.9629, ...], [49151.9648, 49155.9648, 49159.9648, ...], ], ] sum = 221219.687500 ggml_debug: ffn_inp-28 = (f32) ADD(kqv_out-28{6144, 3, 1, 1}, CUDA2#l_out-27#0{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.1590, 2.8410, 6.8410, ...], [24574.8418, 24578.8418, 24582.8418, ...], [49150.8398, 49154.8398, 49158.8398, ...], ], ] sum = 221209.578125 ggml_debug: l_out-28 = (f32) ADD(ffn_moe_out-28{6144, 3, 1, 1}, ffn_inp-28{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.1955, 2.8045, 6.8045, ...], [24574.8047, 24578.8047, 24582.8047, ...], [49150.8047, 49154.8047, 49158.8047, ...], ], ] sum = 221209.250000 ggml_debug: norm-29 = (f32) NORM(l_out-28{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.5952, 3.4048, 7.4048, ...], [24575.4043, 24579.4043, 24583.4043, ...], [49151.4062, 49155.4062, 49159.4062, ...], ], ] sum = 221214.656250 ggml_debug: attn_norm-29 = (f32) MUL(norm-29{6144, 3, 1, 1}, blk.29.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1965, 3.8035, 7.8035, ...], [24575.8027, 24579.8027, 24583.8027, ...], [49151.8047, 49155.8047, 49159.8047, ...], ], ] sum = 221218.250000 ggml_debug: wqkv-29 = (f32) MUL_MAT(blk.29.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-29{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.1892, 4.1892, 8.1892, ...], [32768.1875, 32772.1875, 32776.1875, ...], [65536.1875, 65540.1875, 65544.1875, ...], ], ] sum = 294949.687500 ggml_debug: wqkv_clamped-29 = (f32) CLAMP(wqkv-29{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.1892, 4.1892, 8.1892, ...], [32768.1875, 32772.1875, 32776.1875, ...], [65536.1875, 65540.1875, 65544.1875, ...], ], ] sum = 294949.687500 ggml_debug: wqkv_clamped-29 (view) = (f32) VIEW(wqkv_clamped-29{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1892, 4.1892, 8.1892, ...], [32768.1875, 32772.1875, 32776.1875, ...], [65536.1875, 65540.1875, 65544.1875, ...], ], ] sum = 294949.687500 ggml_debug: Qcur-29 = (f32) CONT(wqkv_clamped-29 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1892, 4.1892, 8.1892, ...], [24576.1895, 24580.1895, 24584.1895, ...], [49152.1875, 49156.1875, 49160.1875, ...], ], ] sum = 221221.687500 ggml_debug: Qcur-29 (reshaped) = (f32) RESHAPE(Qcur-29{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.1892, 4.1892, 8.1892, ...], [512.1892, 516.1892, 520.1892, ...], [1024.1892, 1028.1892, 1032.1892, ...], ... ], [ [24576.1895, 24580.1895, 24584.1895, ...], [25088.1895, 25092.1895, 25096.1895, ...], [25600.1895, 25604.1895, 25608.1895, ...], ... ], [ [49152.1875, 49156.1875, 49160.1875, ...], [49664.1875, 49668.1875, 49672.1875, ...], [50176.1875, 50180.1875, 50184.1875, ...], ... ], ] sum = 677489.062500 ggml_debug: Qcur-29 = (f32) ROPE(Qcur-29 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.1892, 4.1892, 8.1892, ...], [512.1892, 516.1892, 520.1892, ...], [1024.1892, 1028.1892, 1032.1892, ...], ... ], [ [24576.1895, 24580.1895, 24584.1895, ...], [25088.1895, 25092.1895, 25096.1895, ...], [25600.1895, 25604.1895, 25608.1895, ...], ... ], [ [49152.1875, 49156.1875, 49160.1875, ...], [49664.1875, 49668.1875, 49672.1875, ...], [50176.1875, 50180.1875, 50184.1875, ...], ... ], ] sum = 677489.062500 ggml_debug: wqkv_clamped-29 (view) = (f32) VIEW(wqkv_clamped-29{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.7471, 5.7471, 9.7471, ...], [32769.7461, 32773.7461, 32777.7461, ...], [65537.7500, 65541.7500, 65545.7500, ...], ], ] sum = 294963.750000 ggml_debug: Kcur-29 = (f32) CONT(wqkv_clamped-29 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.7471, 5.7471, 9.7471, ...], [4097.7471, 4101.7471, 4105.7471, ...], [8193.7471, 8197.7471, 8201.7471, ...], ], ] sum = 36915.722656 ggml_debug: Kcur-29 (reshaped) = (f32) RESHAPE(Kcur-29{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 1.7471, 5.7471, 9.7471, ...], [513.7471, 517.7471, 521.7471, ...], [1025.7471, 1029.7471, 1033.7471, ...], ... ], [ [4097.7471, 4101.7471, 4105.7471, ...], [4609.7471, 4613.7471, 4617.7471, ...], [5121.7471, 5125.7471, 5129.7471, ...], ... ], [ [8193.7471, 8197.7471, 8201.7471, ...], [8705.7471, 8709.7471, 8713.7471, ...], [9217.7471, 9221.7471, 9225.7471, ...], ... ], ] sum = 124571.179688 ggml_debug: Kcur-29 = (f32) ROPE(Kcur-29 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 1.7471, 5.7471, 9.7471, ...], [513.7471, 517.7471, 521.7471, ...], [1025.7471, 1029.7471, 1033.7471, ...], ... ], [ [4097.7471, 4101.7471, 4105.7471, ...], [4609.7471, 4613.7471, 4617.7471, ...], [5121.7471, 5125.7471, 5129.7471, ...], ... ], [ [8193.7471, 8197.7471, 8201.7471, ...], [8705.7471, 8709.7471, 8713.7471, ...], [9217.7471, 9221.7471, 9225.7471, ...], ... ], ] sum = 124571.179688 ggml_debug: wqkv_clamped-29 (view) = (f32) VIEW(wqkv_clamped-29{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0181, 4.0181, 8.0181, ...], [32768.0195, 32772.0195, 32776.0195, ...], [65536.0156, 65540.0156, 65544.0156, ...], ], ] sum = 294948.156250 ggml_debug: Vcur-29 = (f32) CONT(wqkv_clamped-29 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0181, 4.0181, 8.0181, ...], [4096.0181, 4100.0181, 4104.0181, ...], [8192.0176, 8196.0176, 8200.0176, ...], ], ] sum = 36900.160156 ggml_debug: k_cache_view-29 = (f16) VIEW(cache_k_l29{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-29 (copy of Kcur-29) = (f16) CPY(Kcur-29{128, 8, 3, 1}, k_cache_view-29{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 1.7471, 1.7490, 1.7510, ...], ], ] sum = 5.247070 ggml_debug: v_cur_t-29 = (f32) TRANSPOSE(Vcur-29{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0181, 4096.0181, 8192.0176], [ 4.0181, 4100.0181, 8196.0176], [ 8.0181, 4104.0181, 8200.0176], ... ], ] sum = 36900.160156 ggml_debug: v_cache_view-29 = (f16) VIEW(cache_v_l29{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-29 (copy of v_cur_t-29) = (f16) CPY(v_cur_t-29{3, 1024, 1, 1}, v_cache_view-29{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0181, 0.0181, 0.0181], [ 0.0361, 0.0362, 0.0362], [ 0.0722, 0.0723, 0.0724], ... ], ] sum = 0.379715 ggml_debug: v-29 = (f16) VIEW(cache_v_l29{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0181, 0.0181, 0.0181, ...], [ 0.0361, 0.0362, 0.0362, ...], [ 0.0722, 0.0723, 0.0724, ...], ... ], [ [ 0.0181, 0.0181, 0.0181, ...], [ 0.0361, 0.0362, 0.0362, ...], [ 0.0722, 0.0723, 0.0724, ...], ... ], [ [ 0.0181, 0.0181, 0.0181, ...], [ 0.0361, 0.0362, 0.0362, ...], [ 0.0722, 0.0723, 0.0724, ...], ... ], ... ] sum = 1.139145 ggml_debug: k-29 = (f16) VIEW(cache_k_l29{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 1.7471, 1.7490, 1.7510, ...], [ 6.9883, 6.9961, 7.0039, ...], [ 27.9531, 27.9844, 28.0156, ...], ... ], [ [ 1.9971, 1.9990, 2.0020, ...], [ 7.9883, 7.9961, 8.0078, ...], [ 31.9531, 31.9844, 32.0312, ...], ... ], [ [ 2.4941, 2.4980, 2.5020, ...], [ 9.9766, 9.9922, 10.0078, ...], [ 39.9062, 39.9688, 40.0312, ...], ... ], ... ] sum = 393.524414 ggml_debug: q-29 = (f32) PERMUTE(Qcur-29{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.1892, 4.1892, 8.1892, ...], [24576.1895, 24580.1895, 24584.1895, ...], [49152.1875, 49156.1875, 49160.1875, ...], ], [ [512.1892, 516.1892, 520.1892, ...], [25088.1895, 25092.1895, 25096.1895, ...], [49664.1875, 49668.1875, 49672.1875, ...], ], [ [1024.1892, 1028.1892, 1032.1892, ...], [25600.1895, 25604.1895, 25608.1895, ...], [50176.1875, 50180.1875, 50184.1875, ...], ], ... ] sum = 677489.062500 ggml_debug: kq-29 = (f32) MUL_MAT(k-29{128, 32, 8, 1}, q-29{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 12.9141, 16.9141, 20.9141, ...], [140.9141, 144.9141, 148.9141, ...], [268.9141, 272.9141, 276.9141, ...], ], [ [396.9141, 400.9141, 404.9141, ...], [524.9141, 528.9141, 532.9141, ...], [652.9141, 656.9141, 660.9141, ...], ], [ [780.9141, 784.9141, 788.9141, ...], [908.9141, 912.9141, 916.9141, ...], [1036.9141, 1040.9141, 1044.9141, ...], ], ... ] sum = 14280.679688 ggml_debug: kq_soft_max_ext-29 = (f32) SOFT_MAX(kq-29{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-29 = (f32) MUL_MAT(v-29{32, 128, 8, 1}, kq_soft_max_ext-29{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0181, 4.0181, 8.0181, ...], [512.0181, 516.0181, 520.0181, ...], [1024.0181, 1028.0181, 1032.0181, ...], ], [ [1536.0181, 1540.0181, 1544.0181, ...], [2048.0181, 2052.0181, 2056.0181, ...], [2560.0181, 2564.0181, 2568.0181, ...], ], [ [3072.0181, 3076.0181, 3080.0181, ...], [3584.0181, 3588.0181, 3592.0181, ...], [4096.0181, 4100.0181, 4104.0181, ...], ], ... ] sum = 55404.492188 ggml_debug: kqv_merged-29 = (f32) PERMUTE(kqv-29{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0181, 4.0181, 8.0181, ...], [1536.0181, 1540.0181, 1544.0181, ...], [3072.0181, 3076.0181, 3080.0181, ...], ... ], [ [512.0181, 516.0181, 520.0181, ...], [2048.0181, 2052.0181, 2056.0181, ...], [3584.0181, 3588.0181, 3592.0181, ...], ... ], [ [1024.0181, 1028.0181, 1032.0181, ...], [2560.0181, 2564.0181, 2568.0181, ...], [4096.0181, 4100.0181, 4104.0181, ...], ... ], ] sum = 55404.496094 ggml_debug: kqv_merged_cont-29 = (f32) CONT(kqv_merged-29{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0181, 4.0181, 8.0181, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: kqv_out-29 = (f32) MUL_MAT(blk.29.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-29{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0366, 4.0366, 8.0366, ...], [24576.0371, 24580.0371, 24584.0371, ...], [49152.0352, 49156.0352, 49160.0352, ...], ], ] sum = 221220.312500 ggml_debug: norm-29 = (f32) NORM(kqv_out-29{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1015, 4.1015, 8.1015, ...], [24576.1016, 24580.1016, 24584.1016, ...], [49152.1016, 49156.1016, 49160.1016, ...], ], ] sum = 221220.906250 ggml_debug: attn_out_norm-29 = (f32) MUL(norm-29{6144, 3, 1, 1}, blk.29.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0480, 4.0480, 8.0480, ...], [24576.0488, 24580.0488, 24584.0488, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: ffn_moe_logits-29 = (f32) MUL_MAT(blk.29.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.4546, 3.5454, 7.5454, ...], [ 63.5454, 67.5454, 71.5454, ...], [127.5454, 131.5454, 135.5454, ...], ], ] sum = 607.908691 ggml_debug: ffn_moe_probs-29 = (f32) SOFT_MAX(ffn_moe_logits-29{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0279, 4.0279, 8.0279, ...], [ 64.0279, 68.0279, 72.0279, ...], [128.0279, 132.0279, 136.0279, ...], ], ] sum = 612.251465 ggml_debug: ffn_moe_argsort-29 = (i32) ARGSORT(ffn_moe_probs-29{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 7.0000, 11.0000, 15.0000, ...], [ 71.0000, 75.0000, 79.0000, ...], [135.0000, 139.0000, 143.0000, ...], ], ] sum = 675.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-29{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 7.0000, 11.0000, 15.0000, ...], [ 71.0000, 75.0000, 79.0000, ...], [135.0000, 139.0000, 143.0000, ...], ], ] sum = 675.000000 ggml_debug: ffn_moe_up-29 = (f32) MUL_MAT_ID(blk.29.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2227, 3.7773, 7.7773, ...], [43007.7773, 43011.7773, 43015.7773, ...], [86015.7734, 86019.7734, 86023.7734, ...], ], ] sum = 387106.000000 ggml_debug: ffn_moe_gate-29 = (f32) MUL_MAT_ID(blk.29.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2919, 3.7081, 7.7081, ...], [43007.7070, 43011.7070, 43015.7070, ...], [86015.7109, 86019.7109, 86023.7109, ...], ], ] sum = 387105.406250 ggml_debug: ffn_moe_silu-29 = (f32) UNARY(ffn_moe_gate-29{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1248, 3.8752, 7.8752, ...], [43007.8750, 43011.8750, 43015.8750, ...], [86015.8750, 86019.8750, 86023.8750, ...], ], ] sum = 387106.875000 ggml_debug: ffn_moe_gate_par-29 = (f32) MUL(ffn_moe_up-29{10752, 3, 1, 1}, ffn_moe_silu-29{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0278, 4.0278, 8.0278, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_down-29 = (f32) MUL_MAT_ID(blk.29.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-29{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.8762, 2.1238, 6.1238, ...], [24574.1230, 24578.1230, 24582.1230, ...], [49150.1250, 49154.1250, 49158.1250, ...], ], ] sum = 221203.125000 ggml_debug: ffn_moe_probs-29 (reshaped) = (f32) RESHAPE(ffn_moe_probs-29{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0279], [ 4.0279], [ 8.0279], ... ], [ [ 64.0279], [ 68.0279], [ 72.0279], ... ], [ [128.0279], [132.0279], [136.0279], ... ], ] sum = 612.251465 ggml_debug: ffn_moe_weights-29 = (f32) GET_ROWS(ffn_moe_probs-29 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1660], [ 4.1660], [ 8.1660], ... ], [ [ 16.1660], [ 20.1660], [ 24.1660], ... ], [ [ 32.1660], [ 36.1660], [ 40.1660], ... ], ] sum = 181.493713 ggml_debug: ffn_moe_weights-29 (reshaped) = (f32) RESHAPE(ffn_moe_weights-29{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1660, 4.1660, 8.1660, ...], [ 16.1660, 20.1660, 24.1660, ...], [ 32.1660, 36.1660, 40.1660, ...], ], ] sum = 181.493713 ggml_debug: ffn_moe_weights_sum-29 = (f32) SUM_ROWS(ffn_moe_weights-29 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.5314], [ 4.5314], [ 8.5314], ], ] sum = 13.594058 ggml_debug: ffn_moe_weights_norm-29 = (f32) DIV(ffn_moe_weights-29 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-29{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3123, 4.3123, 8.3123, ...], [ 16.3123, 20.3123, 24.3123, ...], [ 32.3123, 36.3123, 40.3123, ...], ], ] sum = 182.811127 ggml_debug: ffn_moe_weights_norm-29 (view) = (f32) VIEW(ffn_moe_weights_norm-29{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3123], [ 16.3123], [ 32.3123], ], ] sum = 48.937046 ggml_debug: ffn_moe_weighted-29 = (f32) MUL(ffn_moe_down-29{6144, 3, 1, 1}, ffn_moe_weights_norm-29 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.5860, 3.4140, 7.4140, ...], [24575.4141, 24579.4141, 24583.4141, ...], [49151.4141, 49155.4141, 49159.4141, ...], ], ] sum = 221214.718750 ggml_debug: ffn_moe_up-29 = (f32) MUL_MAT_ID(blk.29.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7105, 3.2895, 7.2895, ...], [43007.2891, 43011.2891, 43015.2891, ...], [86015.2891, 86019.2891, 86023.2891, ...], ], ] sum = 387101.593750 ggml_debug: ffn_moe_gate-29 = (f32) MUL_MAT_ID(blk.29.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.8161, 4.8161, 8.8161, ...], [43008.8164, 43012.8164, 43016.8164, ...], [86016.8125, 86020.8125, 86024.8125, ...], ], ] sum = 387115.312500 ggml_debug: ffn_moe_silu-29 = (f32) UNARY(ffn_moe_gate-29{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.5658, 4.5658, 8.5658, ...], [43008.5664, 43012.5664, 43016.5664, ...], [86016.5625, 86020.5625, 86024.5625, ...], ], ] sum = 387113.062500 ggml_debug: ffn_moe_gate_par-29 = (f32) MUL(ffn_moe_up-29{10752, 3, 1, 1}, ffn_moe_silu-29{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4020, 3.5980, 7.5980, ...], [43007.5977, 43011.5977, 43015.5977, ...], [86015.6016, 86019.6016, 86023.6016, ...], ], ] sum = 387104.375000 ggml_debug: ffn_moe_down-29 = (f32) MUL_MAT_ID(blk.29.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-29{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.5226, 4.5226, 8.5226, ...], [24576.5234, 24580.5234, 24584.5234, ...], [49152.5234, 49156.5234, 49160.5234, ...], ], ] sum = 221224.718750 ggml_debug: ffn_moe_weights_norm-29 (view) = (f32) VIEW(ffn_moe_weights_norm-29{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2906], [ 16.2906], [ 32.2906], ], ] sum = 48.871719 ggml_debug: ffn_moe_weighted-29 = (f32) MUL(ffn_moe_down-29{6144, 3, 1, 1}, ffn_moe_weights_norm-29 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1519, 4.1519, 8.1519, ...], [24576.1523, 24580.1523, 24584.1523, ...], [49152.1523, 49156.1523, 49160.1523, ...], ], ] sum = 221221.375000 ggml_debug: ffn_moe_out-29 = (f32) ADD(ffn_moe_weighted-29{6144, 3, 1, 1}, ffn_moe_weighted-29{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4342, 3.5658, 7.5658, ...], [24575.5664, 24579.5664, 24583.5664, ...], [49151.5664, 49155.5664, 49159.5664, ...], ], ] sum = 221216.078125 ggml_debug: ffn_moe_up-29 = (f32) MUL_MAT_ID(blk.29.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0670, 3.9330, 7.9330, ...], [43007.9336, 43011.9336, 43015.9336, ...], [86015.9297, 86019.9297, 86023.9297, ...], ], ] sum = 387107.406250 ggml_debug: ffn_moe_gate-29 = (f32) MUL_MAT_ID(blk.29.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0898, 4.0898, 8.0898, ...], [43008.0898, 43012.0898, 43016.0898, ...], [86016.0859, 86020.0859, 86024.0859, ...], ], ] sum = 387108.812500 ggml_debug: ffn_moe_silu-29 = (f32) UNARY(ffn_moe_gate-29{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0469, 4.0469, 8.0469, ...], [43008.0469, 43012.0469, 43016.0469, ...], [86016.0469, 86020.0469, 86024.0469, ...], ], ] sum = 387108.437500 ggml_debug: ffn_moe_gate_par-29 = (f32) MUL(ffn_moe_up-29{10752, 3, 1, 1}, ffn_moe_silu-29{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0031, 3.9969, 7.9969, ...], [43007.9961, 43011.9961, 43015.9961, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-29 = (f32) MUL_MAT_ID(blk.29.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-29{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1393, 4.1393, 8.1393, ...], [24576.1387, 24580.1387, 24584.1387, ...], [49152.1406, 49156.1406, 49160.1406, ...], ], ] sum = 221221.265625 ggml_debug: ffn_moe_weights_norm-29 (view) = (f32) VIEW(ffn_moe_weights_norm-29{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2287], [ 16.2287], [ 32.2287], ], ] sum = 48.686230 ggml_debug: ffn_moe_weighted-29 = (f32) MUL(ffn_moe_down-29{6144, 3, 1, 1}, ffn_moe_weights_norm-29 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0319, 4.0319, 8.0319, ...], [24576.0312, 24580.0312, 24584.0312, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: ffn_moe_out-29 = (f32) ADD(ffn_moe_out-29{6144, 3, 1, 1}, ffn_moe_weighted-29{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4023, 3.5977, 7.5977, ...], [24575.5977, 24579.5977, 24583.5977, ...], [49151.5977, 49155.5977, 49159.5977, ...], ], ] sum = 221216.375000 ggml_debug: ffn_moe_up-29 = (f32) MUL_MAT_ID(blk.29.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1234, 4.1234, 8.1234, ...], [43008.1250, 43012.1250, 43016.1250, ...], [86016.1250, 86020.1250, 86024.1250, ...], ], ] sum = 387109.125000 ggml_debug: ffn_moe_gate-29 = (f32) MUL_MAT_ID(blk.29.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-29{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3730, 3.6270, 7.6270, ...], [43007.6289, 43011.6289, 43015.6289, ...], [86015.6250, 86019.6250, 86023.6250, ...], ], ] sum = 387104.625000 ggml_debug: ffn_moe_silu-29 = (f32) UNARY(ffn_moe_gate-29{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1521, 3.8479, 7.8479, ...], [43007.8477, 43011.8477, 43015.8477, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.625000 ggml_debug: ffn_moe_gate_par-29 = (f32) MUL(ffn_moe_up-29{10752, 3, 1, 1}, ffn_moe_silu-29{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0188, 3.9812, 7.9812, ...], [43007.9805, 43011.9805, 43015.9805, ...], [86015.9844, 86019.9844, 86023.9844, ...], ], ] sum = 387107.875000 ggml_debug: ffn_moe_down-29 = (f32) MUL_MAT_ID(blk.29.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-29{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0214, 4.0214, 8.0214, ...], [24576.0215, 24580.0215, 24584.0215, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.187500 ggml_debug: ffn_moe_weights_norm-29 (view) = (f32) VIEW(ffn_moe_weights_norm-29{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1683], [ 16.1683], [ 32.1683], ], ] sum = 48.505005 ggml_debug: ffn_moe_weighted-29 = (f32) MUL(ffn_moe_down-29{6144, 3, 1, 1}, ffn_moe_weights_norm-29 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0036, 4.0036, 8.0036, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_out-29 = (f32) ADD(ffn_moe_out-29{6144, 3, 1, 1}, ffn_moe_weighted-29{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3987, 3.6013, 7.6013, ...], [24575.6016, 24579.6016, 24583.6016, ...], [49151.6016, 49155.6016, 49159.6016, ...], ], ] sum = 221216.406250 ggml_debug: ffn_inp-29 = (f32) ADD(kqv_out-29{6144, 3, 1, 1}, l_out-28{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.1588, 2.8412, 6.8412, ...], [24574.8418, 24578.8418, 24582.8418, ...], [49150.8398, 49154.8398, 49158.8398, ...], ], ] sum = 221209.578125 ggml_debug: l_out-29 = (f32) ADD(ffn_moe_out-29{6144, 3, 1, 1}, ffn_inp-29{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.5575, 2.4425, 6.4425, ...], [24574.4434, 24578.4434, 24582.4434, ...], [49150.4414, 49154.4414, 49158.4414, ...], ], ] sum = 221205.968750 ggml_debug: norm-30 = (f32) NORM(l_out-29{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.6311, 3.3689, 7.3689, ...], [24575.3691, 24579.3691, 24583.3691, ...], [49151.3672, 49155.3672, 49159.3672, ...], ], ] sum = 221214.312500 ggml_debug: attn_norm-30 = (f32) MUL(norm-30{6144, 3, 1, 1}, blk.30.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1837, 3.8163, 7.8163, ...], [24575.8164, 24579.8164, 24583.8164, ...], [49151.8164, 49155.8164, 49159.8164, ...], ], ] sum = 221218.343750 ggml_debug: wqkv-30 = (f32) MUL_MAT(blk.30.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-30{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.8536, 3.1464, 7.1464, ...], [32767.1465, 32771.1445, 32775.1445, ...], [65535.1445, 65539.1484, 65543.1484, ...], ], ] sum = 294940.312500 ggml_debug: wqkv_clamped-30 = (f32) CLAMP(wqkv-30{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.8536, 3.1464, 7.1464, ...], [32767.1465, 32771.1445, 32775.1445, ...], [65535.1445, 65539.1484, 65543.1484, ...], ], ] sum = 294940.312500 ggml_debug: wqkv_clamped-30 (view) = (f32) VIEW(wqkv_clamped-30{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.8536, 3.1464, 7.1464, ...], [32767.1465, 32771.1445, 32775.1445, ...], [65535.1445, 65539.1484, 65543.1484, ...], ], ] sum = 294940.312500 ggml_debug: Qcur-30 = (f32) CONT(wqkv_clamped-30 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.8536, 3.1464, 7.1464, ...], [24575.1465, 24579.1465, 24583.1465, ...], [49151.1445, 49155.1445, 49159.1445, ...], ], ] sum = 221212.312500 ggml_debug: Qcur-30 (reshaped) = (f32) RESHAPE(Qcur-30{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.8536, 3.1464, 7.1464, ...], [511.1464, 515.1464, 519.1464, ...], [1023.1464, 1027.1465, 1031.1465, ...], ... ], [ [24575.1465, 24579.1465, 24583.1465, ...], [25087.1465, 25091.1465, 25095.1465, ...], [25599.1465, 25603.1465, 25607.1465, ...], ... ], [ [49151.1445, 49155.1445, 49159.1445, ...], [49663.1445, 49667.1445, 49671.1445, ...], [50175.1445, 50179.1445, 50183.1445, ...], ... ], ] sum = 677460.937500 ggml_debug: Qcur-30 = (f32) ROPE(Qcur-30 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.8536, 3.1464, 7.1464, ...], [511.1464, 515.1464, 519.1464, ...], [1023.1464, 1027.1465, 1031.1465, ...], ... ], [ [24575.1465, 24579.1465, 24583.1465, ...], [25087.1465, 25091.1465, 25095.1465, ...], [25599.1465, 25603.1465, 25607.1465, ...], ... ], [ [49151.1445, 49155.1445, 49159.1445, ...], [49663.1445, 49667.1445, 49671.1445, ...], [50175.1445, 50179.1445, 50183.1445, ...], ... ], ] sum = 677460.937500 ggml_debug: wqkv_clamped-30 (view) = (f32) VIEW(wqkv_clamped-30{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4402, 4.4402, 8.4402, ...], [32768.4414, 32772.4414, 32776.4414, ...], [65536.4375, 65540.4375, 65544.4375, ...], ], ] sum = 294951.937500 ggml_debug: Kcur-30 = (f32) CONT(wqkv_clamped-30 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4402, 4.4402, 8.4402, ...], [4096.4399, 4100.4399, 4104.4399, ...], [8192.4404, 8196.4404, 8200.4404, ...], ], ] sum = 36903.964844 ggml_debug: Kcur-30 (reshaped) = (f32) RESHAPE(Kcur-30{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.4402, 4.4402, 8.4402, ...], [512.4402, 516.4402, 520.4402, ...], [1024.4402, 1028.4402, 1032.4402, ...], ... ], [ [4096.4399, 4100.4399, 4104.4399, ...], [4608.4399, 4612.4399, 4616.4399, ...], [5120.4399, 5124.4399, 5128.4399, ...], ... ], [ [8192.4404, 8196.4404, 8200.4404, ...], [8704.4404, 8708.4404, 8712.4404, ...], [9216.4404, 9220.4404, 9224.4404, ...], ... ], ] sum = 124535.867188 ggml_debug: Kcur-30 = (f32) ROPE(Kcur-30 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.4402, 4.4402, 8.4402, ...], [512.4402, 516.4402, 520.4402, ...], [1024.4402, 1028.4402, 1032.4402, ...], ... ], [ [4096.4399, 4100.4399, 4104.4399, ...], [4608.4399, 4612.4399, 4616.4399, ...], [5120.4399, 5124.4399, 5128.4399, ...], ... ], [ [8192.4404, 8196.4404, 8200.4404, ...], [8704.4404, 8708.4404, 8712.4404, ...], [9216.4404, 9220.4404, 9224.4404, ...], ... ], ] sum = 124535.867188 ggml_debug: wqkv_clamped-30 (view) = (f32) VIEW(wqkv_clamped-30{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0150, 4.0150, 8.0150, ...], [32768.0156, 32772.0156, 32776.0156, ...], [65536.0156, 65540.0156, 65544.0156, ...], ], ] sum = 294948.125000 ggml_debug: Vcur-30 = (f32) CONT(wqkv_clamped-30 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0150, 4.0150, 8.0150, ...], [4096.0151, 4100.0151, 4104.0151, ...], [8192.0146, 8196.0146, 8200.0146, ...], ], ] sum = 36900.136719 ggml_debug: k_cache_view-30 = (f16) VIEW(cache_k_l30{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-30 (copy of Kcur-30) = (f16) CPY(Kcur-30{128, 8, 3, 1}, k_cache_view-30{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.4402, 0.4407, 0.4412, ...], ], ] sum = 1.322021 ggml_debug: v_cur_t-30 = (f32) TRANSPOSE(Vcur-30{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0150, 4096.0151, 8192.0146], [ 4.0150, 4100.0151, 8196.0146], [ 8.0150, 4104.0151, 8200.0146], ... ], ] sum = 36900.136719 ggml_debug: v_cache_view-30 = (f16) VIEW(cache_v_l30{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-30 (copy of v_cur_t-30) = (f16) CPY(v_cur_t-30{3, 1024, 1, 1}, v_cache_view-30{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0150, 0.0150, 0.0150], [ 0.0299, 0.0299, 0.0300], [ 0.0598, 0.0599, 0.0599], ... ], ] sum = 0.314346 ggml_debug: v-30 = (f16) VIEW(cache_v_l30{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0150, 0.0150, 0.0150, ...], [ 0.0299, 0.0299, 0.0300, ...], [ 0.0598, 0.0599, 0.0599, ...], ... ], [ [ 0.0150, 0.0150, 0.0150, ...], [ 0.0299, 0.0299, 0.0300, ...], [ 0.0598, 0.0599, 0.0599, ...], ... ], [ [ 0.0150, 0.0150, 0.0150, ...], [ 0.0299, 0.0299, 0.0300, ...], [ 0.0598, 0.0599, 0.0599, ...], ... ], ... ] sum = 0.943039 ggml_debug: k-30 = (f16) VIEW(cache_k_l30{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.4402, 0.4407, 0.4412, ...], [ 1.7607, 1.7627, 1.7646, ...], [ 7.0430, 7.0508, 7.0586, ...], ... ], [ [ 0.5054, 0.5063, 0.5073, ...], [ 2.0215, 2.0254, 2.0293, ...], [ 8.0859, 8.1016, 8.1172, ...], ... ], [ [ 0.6304, 0.6313, 0.6323, ...], [ 2.5215, 2.5254, 2.5293, ...], [ 10.0859, 10.1016, 10.1172, ...], ... ], ... ] sum = 99.437256 ggml_debug: q-30 = (f32) PERMUTE(Qcur-30{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.8536, 3.1464, 7.1464, ...], [24575.1465, 24579.1465, 24583.1465, ...], [49151.1445, 49155.1445, 49159.1445, ...], ], [ [511.1464, 515.1464, 519.1464, ...], [25087.1465, 25091.1465, 25095.1465, ...], [49663.1445, 49667.1445, 49671.1445, ...], ], [ [1023.1464, 1027.1465, 1031.1465, ...], [25599.1465, 25603.1465, 25607.1465, ...], [50175.1445, 50179.1445, 50183.1445, ...], ], ... ] sum = 677460.937500 ggml_debug: kq-30 = (f32) MUL_MAT(k-30{128, 32, 8, 1}, q-30{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 20.9375, 24.9375, 28.9375, ...], [148.9375, 152.9375, 156.9375, ...], [276.9375, 280.9375, 284.9375, ...], ], [ [404.9375, 408.9375, 412.9375, ...], [532.9375, 536.9375, 540.9375, ...], [660.9375, 664.9375, 668.9375, ...], ], [ [788.9375, 792.9375, 796.9375, ...], [916.9375, 920.9375, 924.9375, ...], [1044.9375, 1048.9375, 1052.9375, ...], ], ... ] sum = 14497.312500 ggml_debug: kq_soft_max_ext-30 = (f32) SOFT_MAX(kq-30{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-30 = (f32) MUL_MAT(v-30{32, 128, 8, 1}, kq_soft_max_ext-30{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0150, 4.0150, 8.0150, ...], [512.0150, 516.0150, 520.0150, ...], [1024.0149, 1028.0149, 1032.0149, ...], ], [ [1536.0149, 1540.0149, 1544.0149, ...], [2048.0149, 2052.0149, 2056.0149, ...], [2560.0149, 2564.0149, 2568.0149, ...], ], [ [3072.0149, 3076.0149, 3080.0149, ...], [3584.0149, 3588.0149, 3592.0149, ...], [4096.0151, 4100.0151, 4104.0151, ...], ], ... ] sum = 55404.410156 ggml_debug: kqv_merged-30 = (f32) PERMUTE(kqv-30{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0150, 4.0150, 8.0150, ...], [1536.0149, 1540.0149, 1544.0149, ...], [3072.0149, 3076.0149, 3080.0149, ...], ... ], [ [512.0150, 516.0150, 520.0150, ...], [2048.0149, 2052.0149, 2056.0149, ...], [3584.0149, 3588.0149, 3592.0149, ...], ... ], [ [1024.0149, 1028.0149, 1032.0149, ...], [2560.0149, 2564.0149, 2568.0149, ...], [4096.0151, 4100.0151, 4104.0151, ...], ... ], ] sum = 55404.410156 ggml_debug: kqv_merged_cont-30 = (f32) CONT(kqv_merged-30{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0150, 4.0150, 8.0150, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: kqv_out-30 = (f32) MUL_MAT(blk.30.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-30{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.3480, 4.3480, 8.3480, ...], [24576.3477, 24580.3477, 24584.3477, ...], [49152.3477, 49156.3477, 49160.3477, ...], ], ] sum = 221223.125000 ggml_debug: norm-30 = (f32) NORM(kqv_out-30{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.7860, 4.7860, 8.7860, ...], [24576.7852, 24580.7852, 24584.7852, ...], [49152.7852, 49156.7852, 49160.7852, ...], ], ] sum = 221227.062500 ggml_debug: attn_out_norm-30 = (f32) MUL(norm-30{6144, 3, 1, 1}, blk.30.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.3961, 4.3961, 8.3961, ...], [24576.3965, 24580.3965, 24584.3965, ...], [49152.3945, 49156.3945, 49160.3945, ...], ], ] sum = 221223.546875 ggml_debug: ffn_moe_logits-30 = (f32) MUL_MAT(blk.30.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.4839, 4.4839, 8.4839, ...], [ 64.4839, 68.4839, 72.4839, ...], [128.4839, 132.4839, 136.4839, ...], ], ] sum = 616.354980 ggml_debug: ffn_moe_probs-30 = (f32) SOFT_MAX(ffn_moe_logits-30{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0937, 4.0937, 8.0937, ...], [ 64.0937, 68.0937, 72.0937, ...], [128.0937, 132.0937, 136.0937, ...], ], ] sum = 612.843140 ggml_debug: ffn_moe_argsort-30 = (i32) ARGSORT(ffn_moe_probs-30{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 12.0000, 16.0000, 20.0000, ...], [ 76.0000, 80.0000, 84.0000, ...], [140.0000, 144.0000, 148.0000, ...], ], ] sum = 720.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-30{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 12.0000, 16.0000, 20.0000, ...], [ 76.0000, 80.0000, 84.0000, ...], [140.0000, 144.0000, 148.0000, ...], ], ] sum = 720.000000 ggml_debug: ffn_moe_up-30 = (f32) MUL_MAT_ID(blk.30.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4474, 4.4474, 8.4474, ...], [43008.4492, 43012.4492, 43016.4492, ...], [86016.4453, 86020.4453, 86024.4453, ...], ], ] sum = 387112.000000 ggml_debug: ffn_moe_gate-30 = (f32) MUL_MAT_ID(blk.30.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.0738, 2.9262, 6.9262, ...], [43006.9258, 43010.9258, 43014.9258, ...], [86014.9297, 86018.9297, 86022.9297, ...], ], ] sum = 387098.343750 ggml_debug: ffn_moe_silu-30 = (f32) UNARY(ffn_moe_gate-30{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2735, 3.7265, 7.7265, ...], [43007.7266, 43011.7266, 43015.7266, ...], [86015.7266, 86019.7266, 86023.7266, ...], ], ] sum = 387105.531250 ggml_debug: ffn_moe_gate_par-30 = (f32) MUL(ffn_moe_up-30{10752, 3, 1, 1}, ffn_moe_silu-30{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1224, 3.8776, 7.8776, ...], [43007.8789, 43011.8789, 43015.8789, ...], [86015.8750, 86019.8750, 86023.8750, ...], ], ] sum = 387106.875000 ggml_debug: ffn_moe_down-30 = (f32) MUL_MAT_ID(blk.30.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-30{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0934, 4.0934, 8.0934, ...], [24576.0938, 24580.0938, 24584.0938, ...], [49152.0938, 49156.0938, 49160.0938, ...], ], ] sum = 221220.843750 ggml_debug: ffn_moe_probs-30 (reshaped) = (f32) RESHAPE(ffn_moe_probs-30{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0937], [ 4.0937], [ 8.0937], ... ], [ [ 64.0937], [ 68.0937], [ 72.0937], ... ], [ [128.0937], [132.0937], [136.0937], ... ], ] sum = 612.843140 ggml_debug: ffn_moe_weights-30 = (f32) GET_ROWS(ffn_moe_probs-30 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1001], [ 4.1001], [ 8.1001], ... ], [ [ 16.1001], [ 20.1001], [ 24.1001], ... ], [ [ 32.1001], [ 36.1001], [ 40.1001], ... ], ] sum = 180.900604 ggml_debug: ffn_moe_weights-30 (reshaped) = (f32) RESHAPE(ffn_moe_weights-30{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1001, 4.1001, 8.1001, ...], [ 16.1001, 20.1001, 24.1001, ...], [ 32.1001, 36.1001, 40.1001, ...], ], ] sum = 180.900604 ggml_debug: ffn_moe_weights_sum-30 = (f32) SUM_ROWS(ffn_moe_weights-30 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3557], [ 4.3557], [ 8.3557], ], ] sum = 13.067064 ggml_debug: ffn_moe_weights_norm-30 = (f32) DIV(ffn_moe_weights-30 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-30{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2813, 4.2813, 8.2813, ...], [ 16.2813, 20.2813, 24.2813, ...], [ 32.2813, 36.2813, 40.2813, ...], ], ] sum = 182.532013 ggml_debug: ffn_moe_weights_norm-30 (view) = (f32) VIEW(ffn_moe_weights_norm-30{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2813], [ 16.2813], [ 32.2813], ], ] sum = 48.844002 ggml_debug: ffn_moe_weighted-30 = (f32) MUL(ffn_moe_down-30{6144, 3, 1, 1}, ffn_moe_weights_norm-30 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0263, 4.0263, 8.0263, ...], [24576.0254, 24580.0254, 24584.0254, ...], [49152.0273, 49156.0273, 49160.0273, ...], ], ] sum = 221220.250000 ggml_debug: ffn_moe_up-30 = (f32) MUL_MAT_ID(blk.30.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2513, 4.2513, 8.2513, ...], [43008.2500, 43012.2500, 43016.2500, ...], [86016.2500, 86020.2500, 86024.2500, ...], ], ] sum = 387110.250000 ggml_debug: ffn_moe_gate-30 = (f32) MUL_MAT_ID(blk.30.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3433, 4.3433, 8.3433, ...], [43008.3438, 43012.3438, 43016.3438, ...], [86016.3438, 86020.3438, 86024.3438, ...], ], ] sum = 387111.093750 ggml_debug: ffn_moe_silu-30 = (f32) UNARY(ffn_moe_gate-30{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2008, 4.2008, 8.2008, ...], [43008.1992, 43012.1992, 43016.1992, ...], [86016.2031, 86020.2031, 86024.2031, ...], ], ] sum = 387109.812500 ggml_debug: ffn_moe_gate_par-30 = (f32) MUL(ffn_moe_up-30{10752, 3, 1, 1}, ffn_moe_silu-30{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0505, 4.0505, 8.0505, ...], [43008.0508, 43012.0508, 43016.0508, ...], [86016.0469, 86020.0469, 86024.0469, ...], ], ] sum = 387108.437500 ggml_debug: ffn_moe_down-30 = (f32) MUL_MAT_ID(blk.30.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-30{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0431, 3.9569, 7.9569, ...], [24575.9570, 24579.9570, 24583.9570, ...], [49151.9570, 49155.9570, 49159.9570, ...], ], ] sum = 221219.609375 ggml_debug: ffn_moe_weights_norm-30 (view) = (f32) VIEW(ffn_moe_weights_norm-30{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2634], [ 16.2634], [ 32.2634], ], ] sum = 48.790157 ggml_debug: ffn_moe_weighted-30 = (f32) MUL(ffn_moe_down-30{6144, 3, 1, 1}, ffn_moe_weights_norm-30 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0114, 3.9886, 7.9886, ...], [24575.9883, 24579.9883, 24583.9883, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: ffn_moe_out-30 = (f32) ADD(ffn_moe_weighted-30{6144, 3, 1, 1}, ffn_moe_weighted-30{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0149, 4.0149, 8.0149, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_up-30 = (f32) MUL_MAT_ID(blk.30.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.7039, 4.7039, 8.7039, ...], [43008.7031, 43012.7031, 43016.7031, ...], [86016.7031, 86020.7031, 86024.7031, ...], ], ] sum = 387114.312500 ggml_debug: ffn_moe_gate-30 = (f32) MUL_MAT_ID(blk.30.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3876, 4.3876, 8.3876, ...], [43008.3867, 43012.3867, 43016.3867, ...], [86016.3906, 86020.3906, 86024.3906, ...], ], ] sum = 387111.500000 ggml_debug: ffn_moe_silu-30 = (f32) UNARY(ffn_moe_gate-30{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2309, 4.2309, 8.2309, ...], [43008.2305, 43012.2305, 43016.2305, ...], [86016.2344, 86020.2344, 86024.2344, ...], ], ] sum = 387110.125000 ggml_debug: ffn_moe_gate_par-30 = (f32) MUL(ffn_moe_up-30{10752, 3, 1, 1}, ffn_moe_silu-30{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1625, 4.1625, 8.1625, ...], [43008.1641, 43012.1641, 43016.1641, ...], [86016.1641, 86020.1641, 86024.1641, ...], ], ] sum = 387109.468750 ggml_debug: ffn_moe_down-30 = (f32) MUL_MAT_ID(blk.30.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-30{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4085, 3.5915, 7.5915, ...], [24575.5918, 24579.5918, 24583.5918, ...], [49151.5898, 49155.5898, 49159.5898, ...], ], ] sum = 221216.328125 ggml_debug: ffn_moe_weights_norm-30 (view) = (f32) VIEW(ffn_moe_weights_norm-30{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2371], [ 16.2371], [ 32.2371], ], ] sum = 48.711414 ggml_debug: ffn_moe_weighted-30 = (f32) MUL(ffn_moe_down-30{6144, 3, 1, 1}, ffn_moe_weights_norm-30 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0969, 3.9031, 7.9031, ...], [24575.9023, 24579.9023, 24583.9023, ...], [49151.9023, 49155.9023, 49159.9023, ...], ], ] sum = 221219.140625 ggml_debug: ffn_moe_out-30 = (f32) ADD(ffn_moe_out-30{6144, 3, 1, 1}, ffn_moe_weighted-30{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0820, 3.9180, 7.9180, ...], [24575.9180, 24579.9180, 24583.9180, ...], [49151.9180, 49155.9180, 49159.9180, ...], ], ] sum = 221219.265625 ggml_debug: ffn_moe_up-30 = (f32) MUL_MAT_ID(blk.30.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5356, 4.5356, 8.5356, ...], [43008.5352, 43012.5352, 43016.5352, ...], [86016.5391, 86020.5391, 86024.5391, ...], ], ] sum = 387112.812500 ggml_debug: ffn_moe_gate-30 = (f32) MUL_MAT_ID(blk.30.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-30{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0499, 4.0499, 8.0499, ...], [43008.0508, 43012.0508, 43016.0508, ...], [86016.0469, 86020.0469, 86024.0469, ...], ], ] sum = 387108.437500 ggml_debug: ffn_moe_silu-30 = (f32) UNARY(ffn_moe_gate-30{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0255, 4.0255, 8.0255, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.250000 ggml_debug: ffn_moe_gate_par-30 = (f32) MUL(ffn_moe_up-30{10752, 3, 1, 1}, ffn_moe_silu-30{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0137, 4.0137, 8.0137, ...], [43008.0156, 43012.0156, 43016.0156, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_down-30 = (f32) MUL_MAT_ID(blk.30.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-30{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3007, 3.6993, 7.6993, ...], [24575.6992, 24579.6992, 24583.6992, ...], [49151.6992, 49155.6992, 49159.6992, ...], ], ] sum = 221217.296875 ggml_debug: ffn_moe_weights_norm-30 (view) = (f32) VIEW(ffn_moe_weights_norm-30{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2181], [ 16.2181], [ 32.2181], ], ] sum = 48.654427 ggml_debug: ffn_moe_weighted-30 = (f32) MUL(ffn_moe_down-30{6144, 3, 1, 1}, ffn_moe_weights_norm-30 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0656, 3.9344, 7.9344, ...], [24575.9336, 24579.9336, 24583.9336, ...], [49151.9336, 49155.9336, 49159.9336, ...], ], ] sum = 221219.421875 ggml_debug: ffn_moe_out-30 = (f32) ADD(ffn_moe_out-30{6144, 3, 1, 1}, ffn_moe_weighted-30{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1476, 3.8524, 7.8524, ...], [24575.8516, 24579.8516, 24583.8516, ...], [49151.8516, 49155.8516, 49159.8516, ...], ], ] sum = 221218.656250 ggml_debug: ffn_inp-30 = (f32) ADD(kqv_out-30{6144, 3, 1, 1}, l_out-29{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.2096, 2.7904, 6.7904, ...], [24574.7910, 24578.7910, 24582.7910, ...], [49150.7891, 49154.7891, 49158.7891, ...], ], ] sum = 221209.093750 ggml_debug: l_out-30 = (f32) ADD(ffn_moe_out-30{6144, 3, 1, 1}, ffn_inp-30{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.3572, 2.6428, 6.6428, ...], [24574.6426, 24578.6426, 24582.6426, ...], [49150.6445, 49154.6445, 49158.6445, ...], ], ] sum = 221207.781250 ggml_debug: norm-31 = (f32) NORM(l_out-30{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.5768, 3.4232, 7.4232, ...], [24575.4238, 24579.4238, 24583.4238, ...], [49151.4219, 49155.4219, 49159.4219, ...], ], ] sum = 221214.796875 ggml_debug: attn_norm-31 = (f32) MUL(norm-31{6144, 3, 1, 1}, blk.31.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1600, 3.8400, 7.8400, ...], [24575.8398, 24579.8398, 24583.8398, ...], [49151.8398, 49155.8398, 49159.8398, ...], ], ] sum = 221218.562500 ggml_debug: wqkv-31 = (f32) MUL_MAT(blk.31.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-31{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.1408, 3.8592, 7.8592, ...], [32767.8594, 32771.8594, 32775.8594, ...], [65535.8594, 65539.8594, 65543.8594, ...], ], ] sum = 294946.750000 ggml_debug: wqkv_clamped-31 = (f32) CLAMP(wqkv-31{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.1408, 3.8592, 7.8592, ...], [32767.8594, 32771.8594, 32775.8594, ...], [65535.8594, 65539.8594, 65543.8594, ...], ], ] sum = 294946.750000 ggml_debug: wqkv_clamped-31 (view) = (f32) VIEW(wqkv_clamped-31{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1408, 3.8592, 7.8592, ...], [32767.8594, 32771.8594, 32775.8594, ...], [65535.8594, 65539.8594, 65543.8594, ...], ], ] sum = 294946.750000 ggml_debug: Qcur-31 = (f32) CONT(wqkv_clamped-31 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1408, 3.8592, 7.8592, ...], [24575.8594, 24579.8594, 24583.8594, ...], [49151.8594, 49155.8594, 49159.8594, ...], ], ] sum = 221218.734375 ggml_debug: Qcur-31 (reshaped) = (f32) RESHAPE(Qcur-31{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1408, 3.8592, 7.8592, ...], [511.8592, 515.8592, 519.8592, ...], [1023.8592, 1027.8593, 1031.8593, ...], ... ], [ [24575.8594, 24579.8594, 24583.8594, ...], [25087.8594, 25091.8594, 25095.8594, ...], [25599.8594, 25603.8594, 25607.8594, ...], ... ], [ [49151.8594, 49155.8594, 49159.8594, ...], [49663.8594, 49667.8594, 49671.8594, ...], [50175.8594, 50179.8594, 50183.8594, ...], ... ], ] sum = 677480.312500 ggml_debug: Qcur-31 = (f32) ROPE(Qcur-31 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.1408, 3.8592, 7.8592, ...], [511.8592, 515.8592, 519.8592, ...], [1023.8592, 1027.8593, 1031.8593, ...], ... ], [ [24575.8594, 24579.8594, 24583.8594, ...], [25087.8594, 25091.8594, 25095.8594, ...], [25599.8594, 25603.8594, 25607.8594, ...], ... ], [ [49151.8594, 49155.8594, 49159.8594, ...], [49663.8594, 49667.8594, 49671.8594, ...], [50175.8594, 50179.8594, 50183.8594, ...], ... ], ] sum = 677480.312500 ggml_debug: wqkv_clamped-31 (view) = (f32) VIEW(wqkv_clamped-31{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0003, 4.0003, 8.0003, ...], [32768.0000, 32772.0000, 32776.0000, ...], [65536.0000, 65540.0000, 65544.0000, ...], ], ] sum = 294948.000000 ggml_debug: Kcur-31 = (f32) CONT(wqkv_clamped-31 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0003, 4.0003, 8.0003, ...], [4096.0005, 4100.0005, 4104.0005, ...], [8192.0000, 8196.0000, 8200.0000, ...], ], ] sum = 36900.000000 ggml_debug: Kcur-31 (reshaped) = (f32) RESHAPE(Kcur-31{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.0003, 4.0003, 8.0003, ...], [512.0003, 516.0003, 520.0003, ...], [1024.0002, 1028.0002, 1032.0002, ...], ... ], [ [4096.0005, 4100.0005, 4104.0005, ...], [4608.0005, 4612.0005, 4616.0005, ...], [5120.0005, 5124.0005, 5128.0005, ...], ... ], [ [8192.0000, 8196.0000, 8200.0000, ...], [8704.0000, 8708.0000, 8712.0000, ...], [9216.0000, 9220.0000, 9224.0000, ...], ... ], ] sum = 124524.000000 ggml_debug: Kcur-31 = (f32) ROPE(Kcur-31 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.0003, 4.0003, 8.0003, ...], [512.0003, 516.0003, 520.0003, ...], [1024.0002, 1028.0002, 1032.0002, ...], ... ], [ [4096.0005, 4100.0005, 4104.0005, ...], [4608.0005, 4612.0005, 4616.0005, ...], [5120.0005, 5124.0005, 5128.0005, ...], ... ], [ [8192.0000, 8196.0000, 8200.0000, ...], [8704.0000, 8708.0000, 8712.0000, ...], [9216.0000, 9220.0000, 9224.0000, ...], ... ], ] sum = 124524.000000 ggml_debug: wqkv_clamped-31 (view) = (f32) VIEW(wqkv_clamped-31{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1079, 3.8921, 7.8921, ...], [32767.8926, 32771.8906, 32775.8906, ...], [65535.8906, 65539.8906, 65543.8906, ...], ], ] sum = 294947.031250 ggml_debug: Vcur-31 = (f32) CONT(wqkv_clamped-31 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1079, 3.8921, 7.8921, ...], [4095.8921, 4099.8921, 4103.8921, ...], [8191.8921, 8195.8926, 8199.8926, ...], ], ] sum = 36899.031250 ggml_debug: k_cache_view-31 = (f16) VIEW(cache_k_l31{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-31 (copy of Kcur-31) = (f16) CPY(Kcur-31{128, 8, 3, 1}, k_cache_view-31{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.0003, 0.0003, 0.0003, ...], ], ] sum = 0.000896 ggml_debug: v_cur_t-31 = (f32) TRANSPOSE(Vcur-31{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.1079, 4095.8921, 8191.8921], [ 3.8921, 4099.8921, 8195.8926], [ 7.8921, 4103.8921, 8199.8926], ... ], ] sum = 36899.031250 ggml_debug: v_cache_view-31 = (f16) VIEW(cache_v_l31{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-31 (copy of v_cur_t-31) = (f16) CPY(v_cur_t-31{3, 1024, 1, 1}, v_cache_view-31{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.1078, -0.1080, -0.1081], [ -0.2157, -0.2159, -0.2162], [ -0.4314, -0.4319, -0.4324], ... ], ] sum = -2.267395 ggml_debug: v-31 = (f16) VIEW(cache_v_l31{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.1078, -0.1080, -0.1081, ...], [ -0.2157, -0.2159, -0.2162, ...], [ -0.4314, -0.4319, -0.4324, ...], ... ], [ [ -0.1078, -0.1080, -0.1081, ...], [ -0.2157, -0.2159, -0.2162, ...], [ -0.4314, -0.4319, -0.4324, ...], ... ], [ [ -0.1078, -0.1080, -0.1081, ...], [ -0.2157, -0.2159, -0.2162, ...], [ -0.4314, -0.4319, -0.4324, ...], ... ], ... ] sum = -6.802185 ggml_debug: k-31 = (f16) VIEW(cache_k_l31{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.0003, 0.0003, 0.0003, ...], [ 0.0012, 0.0012, 0.0012, ...], [ 0.0048, 0.0048, 0.0048, ...], ... ], [ [ 0.0004, 0.0004, 0.0004, ...], [ 0.0014, 0.0014, 0.0014, ...], [ 0.0057, 0.0058, 0.0058, ...], ... ], [ [ 0.0004, 0.0004, 0.0004, ...], [ 0.0017, 0.0017, 0.0017, ...], [ 0.0067, 0.0067, 0.0067, ...], ... ], ... ] sum = 0.067997 ggml_debug: q-31 = (f32) PERMUTE(Qcur-31{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.1408, 3.8592, 7.8592, ...], [24575.8594, 24579.8594, 24583.8594, ...], [49151.8594, 49155.8594, 49159.8594, ...], ], [ [511.8592, 515.8592, 519.8592, ...], [25087.8594, 25091.8594, 25095.8594, ...], [49663.8594, 49667.8594, 49671.8594, ...], ], [ [1023.8592, 1027.8593, 1031.8593, ...], [25599.8594, 25603.8594, 25607.8594, ...], [50175.8594, 50179.8594, 50183.8594, ...], ], ... ] sum = 677480.250000 ggml_debug: kq-31 = (f32) MUL_MAT(k-31{128, 32, 8, 1}, q-31{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 26.6719, 30.6719, 34.6719, ...], [154.6719, 158.6719, 162.6719, ...], [282.6719, 286.6719, 290.6719, ...], ], [ [410.6719, 414.6719, 418.6719, ...], [538.6719, 542.6719, 546.6719, ...], [666.6719, 670.6719, 674.6719, ...], ], [ [794.6719, 798.6719, 802.6719, ...], [922.6719, 926.6719, 930.6719, ...], [1050.6719, 1054.6719, 1058.6719, ...], ], ... ] sum = 14652.140625 ggml_debug: kq_soft_max_ext-31 = (f32) SOFT_MAX(kq-31{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-31 = (f32) MUL_MAT(v-31{32, 128, 8, 1}, kq_soft_max_ext-31{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.1078, 3.8922, 7.8922, ...], [511.8922, 515.8922, 519.8922, ...], [1023.8922, 1027.8921, 1031.8921, ...], ], [ [1535.8921, 1539.8921, 1543.8921, ...], [2047.8921, 2051.8921, 2055.8921, ...], [2559.8921, 2563.8921, 2567.8921, ...], ], [ [3071.8921, 3075.8921, 3079.8921, ...], [3583.8921, 3587.8921, 3591.8921, ...], [4095.8921, 4099.8921, 4103.8921, ...], ], ... ] sum = 55401.082031 ggml_debug: kqv_merged-31 = (f32) PERMUTE(kqv-31{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1078, 3.8922, 7.8922, ...], [1535.8921, 1539.8921, 1543.8921, ...], [3071.8921, 3075.8921, 3079.8921, ...], ... ], [ [511.8922, 515.8922, 519.8922, ...], [2047.8921, 2051.8921, 2055.8921, ...], [3583.8921, 3587.8921, 3591.8921, ...], ... ], [ [1023.8922, 1027.8921, 1031.8921, ...], [2559.8921, 2563.8921, 2567.8921, ...], [4095.8921, 4099.8921, 4103.8921, ...], ... ], ] sum = 55401.078125 ggml_debug: kqv_merged_cont-31 = (f32) CONT(kqv_merged-31{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1078, 3.8922, 7.8922, ...], [24575.8926, 24579.8926, 24583.8926, ...], [49151.8906, 49155.8906, 49159.8906, ...], ], ] sum = 221219.015625 ggml_debug: kqv_out-31 = (f32) MUL_MAT(blk.31.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-31{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4836, 3.5164, 7.5164, ...], [24575.5156, 24579.5156, 24583.5156, ...], [49151.5156, 49155.5156, 49159.5156, ...], ], ] sum = 221215.640625 ggml_debug: norm-31 = (f32) NORM(kqv_out-31{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.0592, 2.9408, 6.9408, ...], [24574.9414, 24578.9414, 24582.9414, ...], [49150.9414, 49154.9414, 49158.9414, ...], ], ] sum = 221210.453125 ggml_debug: attn_out_norm-31 = (f32) MUL(norm-31{6144, 3, 1, 1}, blk.31.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.5544, 3.4456, 7.4456, ...], [24575.4453, 24579.4453, 24583.4453, ...], [49151.4453, 49155.4453, 49159.4453, ...], ], ] sum = 221215.000000 ggml_debug: ffn_moe_logits-31 = (f32) MUL_MAT(blk.31.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.2203, 3.7797, 7.7797, ...], [ 63.7797, 67.7797, 71.7797, ...], [127.7797, 131.7797, 135.7797, ...], ], ] sum = 610.016968 ggml_debug: ffn_moe_probs-31 = (f32) SOFT_MAX(ffn_moe_logits-31{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0474, 4.0474, 8.0474, ...], [ 64.0474, 68.0474, 72.0474, ...], [128.0475, 132.0475, 136.0475, ...], ], ] sum = 612.427063 ggml_debug: ffn_moe_argsort-31 = (i32) ARGSORT(ffn_moe_probs-31{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 9.0000, 13.0000, 17.0000, ...], [ 73.0000, 77.0000, 81.0000, ...], [137.0000, 141.0000, 145.0000, ...], ], ] sum = 693.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-31{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 9.0000, 13.0000, 17.0000, ...], [ 73.0000, 77.0000, 81.0000, ...], [137.0000, 141.0000, 145.0000, ...], ], ] sum = 693.000000 ggml_debug: ffn_moe_up-31 = (f32) MUL_MAT_ID(blk.31.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4442, 4.4442, 8.4442, ...], [43008.4453, 43012.4453, 43016.4453, ...], [86016.4453, 86020.4453, 86024.4453, ...], ], ] sum = 387112.000000 ggml_debug: ffn_moe_gate-31 = (f32) MUL_MAT_ID(blk.31.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2659, 3.7341, 7.7341, ...], [43007.7344, 43011.7344, 43015.7344, ...], [86015.7344, 86019.7344, 86023.7344, ...], ], ] sum = 387105.625000 ggml_debug: ffn_moe_silu-31 = (f32) UNARY(ffn_moe_gate-31{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1154, 3.8846, 7.8846, ...], [43007.8828, 43011.8828, 43015.8828, ...], [86015.8828, 86019.8828, 86023.8828, ...], ], ] sum = 387106.937500 ggml_debug: ffn_moe_gate_par-31 = (f32) MUL(ffn_moe_up-31{10752, 3, 1, 1}, ffn_moe_silu-31{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0512, 3.9488, 7.9488, ...], [43007.9492, 43011.9492, 43015.9492, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.531250 ggml_debug: ffn_moe_down-31 = (f32) MUL_MAT_ID(blk.31.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-31{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1354, 4.1354, 8.1354, ...], [24576.1348, 24580.1348, 24584.1348, ...], [49152.1367, 49156.1367, 49160.1367, ...], ], ] sum = 221221.234375 ggml_debug: ffn_moe_probs-31 (reshaped) = (f32) RESHAPE(ffn_moe_probs-31{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0474], [ 4.0474], [ 8.0474], ... ], [ [ 64.0474], [ 68.0474], [ 72.0474], ... ], [ [128.0475], [132.0475], [136.0475], ... ], ] sum = 612.427063 ggml_debug: ffn_moe_weights-31 = (f32) GET_ROWS(ffn_moe_probs-31 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1264], [ 4.1264], [ 8.1264], ... ], [ [ 16.1264], [ 20.1264], [ 24.1264], ... ], [ [ 32.1264], [ 36.1264], [ 40.1264], ... ], ] sum = 181.137955 ggml_debug: ffn_moe_weights-31 (reshaped) = (f32) RESHAPE(ffn_moe_weights-31{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1264, 4.1264, 8.1264, ...], [ 16.1264, 20.1264, 24.1264, ...], [ 32.1264, 36.1264, 40.1264, ...], ], ] sum = 181.137955 ggml_debug: ffn_moe_weights_sum-31 = (f32) SUM_ROWS(ffn_moe_weights-31 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3496], [ 4.3496], [ 8.3496], ], ] sum = 13.048886 ggml_debug: ffn_moe_weights_norm-31 = (f32) DIV(ffn_moe_weights-31 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-31{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3616, 4.3616, 8.3616, ...], [ 16.3616, 20.3616, 24.3616, ...], [ 32.3616, 36.3616, 40.3616, ...], ], ] sum = 183.254761 ggml_debug: ffn_moe_weights_norm-31 (view) = (f32) VIEW(ffn_moe_weights_norm-31{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3616], [ 16.3616], [ 32.3616], ], ] sum = 49.084923 ggml_debug: ffn_moe_weighted-31 = (f32) MUL(ffn_moe_down-31{6144, 3, 1, 1}, ffn_moe_weights_norm-31 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0490, 4.0490, 8.0490, ...], [24576.0488, 24580.0488, 24584.0488, ...], [49152.0508, 49156.0508, 49160.0508, ...], ], ] sum = 221220.437500 ggml_debug: ffn_moe_up-31 = (f32) MUL_MAT_ID(blk.31.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2101, 3.7899, 7.7899, ...], [43007.7891, 43011.7891, 43015.7891, ...], [86015.7891, 86019.7891, 86023.7891, ...], ], ] sum = 387106.093750 ggml_debug: ffn_moe_gate-31 = (f32) MUL_MAT_ID(blk.31.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0001, 3.9999, 7.9999, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_silu-31 = (f32) UNARY(ffn_moe_gate-31{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0001, 3.9999, 7.9999, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_gate_par-31 = (f32) MUL(ffn_moe_up-31{10752, 3, 1, 1}, ffn_moe_silu-31{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-31 = (f32) MUL_MAT_ID(blk.31.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-31{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2002, 3.7998, 7.7998, ...], [24575.8008, 24579.8008, 24583.8008, ...], [49151.8008, 49155.8008, 49159.8008, ...], ], ] sum = 221218.187500 ggml_debug: ffn_moe_weights_norm-31 (view) = (f32) VIEW(ffn_moe_weights_norm-31{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2230], [ 16.2230], [ 32.2230], ], ] sum = 48.668892 ggml_debug: ffn_moe_weighted-31 = (f32) MUL(ffn_moe_down-31{6144, 3, 1, 1}, ffn_moe_weights_norm-31 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0446, 3.9554, 7.9554, ...], [24575.9551, 24579.9551, 24583.9551, ...], [49151.9570, 49155.9570, 49159.9570, ...], ], ] sum = 221219.593750 ggml_debug: ffn_moe_out-31 = (f32) ADD(ffn_moe_weighted-31{6144, 3, 1, 1}, ffn_moe_weighted-31{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0043, 4.0043, 8.0043, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_up-31 = (f32) MUL_MAT_ID(blk.31.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.7955, 4.7955, 8.7955, ...], [43008.7969, 43012.7969, 43016.7969, ...], [86016.7969, 86020.7969, 86024.7969, ...], ], ] sum = 387115.187500 ggml_debug: ffn_moe_gate-31 = (f32) MUL_MAT_ID(blk.31.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2955, 3.7045, 7.7045, ...], [43007.7031, 43011.7031, 43015.7031, ...], [86015.7031, 86019.7031, 86023.7031, ...], ], ] sum = 387105.312500 ggml_debug: ffn_moe_silu-31 = (f32) UNARY(ffn_moe_gate-31{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1261, 3.8739, 7.8739, ...], [43007.8750, 43011.8750, 43015.8750, ...], [86015.8750, 86019.8750, 86023.8750, ...], ], ] sum = 387106.875000 ggml_debug: ffn_moe_gate_par-31 = (f32) MUL(ffn_moe_up-31{10752, 3, 1, 1}, ffn_moe_silu-31{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1003, 3.8997, 7.8997, ...], [43007.8984, 43011.8984, 43015.8984, ...], [86015.8984, 86019.8984, 86023.8984, ...], ], ] sum = 387107.093750 ggml_debug: ffn_moe_down-31 = (f32) MUL_MAT_ID(blk.31.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-31{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0485, 4.0485, 8.0485, ...], [24576.0488, 24580.0488, 24584.0488, ...], [49152.0469, 49156.0469, 49160.0469, ...], ], ] sum = 221220.421875 ggml_debug: ffn_moe_weights_norm-31 (view) = (f32) VIEW(ffn_moe_weights_norm-31{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2174], [ 16.2174], [ 32.2174], ], ] sum = 48.652279 ggml_debug: ffn_moe_weighted-31 = (f32) MUL(ffn_moe_down-31{6144, 3, 1, 1}, ffn_moe_weights_norm-31 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0105, 4.0105, 8.0105, ...], [24576.0098, 24580.0098, 24584.0098, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.109375 ggml_debug: ffn_moe_out-31 = (f32) ADD(ffn_moe_out-31{6144, 3, 1, 1}, ffn_moe_weighted-31{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0149, 4.0149, 8.0149, ...], [24576.0156, 24580.0156, 24584.0156, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: ffn_moe_up-31 = (f32) MUL_MAT_ID(blk.31.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.4278, 3.5722, 7.5722, ...], [43007.5703, 43011.5703, 43015.5703, ...], [86015.5703, 86019.5703, 86023.5703, ...], ], ] sum = 387104.125000 ggml_debug: ffn_moe_gate-31 = (f32) MUL_MAT_ID(blk.31.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-31{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0626, 3.9374, 7.9374, ...], [43007.9375, 43011.9375, 43015.9375, ...], [86015.9375, 86019.9375, 86023.9375, ...], ], ] sum = 387107.437500 ggml_debug: ffn_moe_silu-31 = (f32) UNARY(ffn_moe_gate-31{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0303, 3.9697, 7.9697, ...], [43007.9688, 43011.9688, 43015.9688, ...], [86015.9688, 86019.9688, 86023.9688, ...], ], ] sum = 387107.718750 ggml_debug: ffn_moe_gate_par-31 = (f32) MUL(ffn_moe_up-31{10752, 3, 1, 1}, ffn_moe_silu-31{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0130, 4.0130, 8.0130, ...], [43008.0117, 43012.0117, 43016.0117, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_down-31 = (f32) MUL_MAT_ID(blk.31.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-31{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0384, 3.9616, 7.9616, ...], [24575.9609, 24579.9609, 24583.9609, ...], [49151.9609, 49155.9609, 49159.9609, ...], ], ] sum = 221219.656250 ggml_debug: ffn_moe_weights_norm-31 (view) = (f32) VIEW(ffn_moe_weights_norm-31{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1980], [ 16.1980], [ 32.1980], ], ] sum = 48.593906 ggml_debug: ffn_moe_weighted-31 = (f32) MUL(ffn_moe_down-31{6144, 3, 1, 1}, ffn_moe_weights_norm-31 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0076, 3.9924, 7.9924, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_out-31 = (f32) ADD(ffn_moe_out-31{6144, 3, 1, 1}, ffn_moe_weighted-31{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0073, 4.0073, 8.0073, ...], [24576.0078, 24580.0078, 24584.0078, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_inp-31 = (f32) ADD(kqv_out-31{6144, 3, 1, 1}, l_out-30{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.8407, 2.1593, 6.1593, ...], [24574.1602, 24578.1602, 24582.1602, ...], [49150.1602, 49154.1602, 49158.1602, ...], ], ] sum = 221203.421875 ggml_debug: l_out-31 = (f32) ADD(ffn_moe_out-31{6144, 3, 1, 1}, ffn_inp-31{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.8334, 2.1666, 6.1666, ...], [24574.1660, 24578.1660, 24582.1660, ...], [49150.1680, 49154.1680, 49158.1680, ...], ], ] sum = 221203.515625 ggml_debug: norm-32 = (f32) NORM(l_out-31{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.8425, 3.1575, 7.1575, ...], [24575.1582, 24579.1582, 24583.1582, ...], [49151.1562, 49155.1562, 49159.1562, ...], ], ] sum = 221212.406250 ggml_debug: attn_norm-32 = (f32) MUL(norm-32{6144, 3, 1, 1}, blk.32.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2583, 3.7417, 7.7417, ...], [24575.7422, 24579.7422, 24583.7422, ...], [49151.7422, 49155.7422, 49159.7422, ...], ], ] sum = 221217.687500 ggml_debug: wqkv-32 = (f32) MUL_MAT(blk.32.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-32{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -1.7610, 2.2390, 6.2390, ...], [32766.2383, 32770.2383, 32774.2383, ...], [65534.2383, 65538.2422, 65542.2422, ...], ], ] sum = 294932.156250 ggml_debug: wqkv_clamped-32 = (f32) CLAMP(wqkv-32{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -1.7610, 2.2390, 6.2390, ...], [32766.2383, 32770.2383, 32774.2383, ...], [65534.2383, 65538.2422, 65542.2422, ...], ], ] sum = 294932.156250 ggml_debug: wqkv_clamped-32 (view) = (f32) VIEW(wqkv_clamped-32{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.7610, 2.2390, 6.2390, ...], [32766.2383, 32770.2383, 32774.2383, ...], [65534.2383, 65538.2422, 65542.2422, ...], ], ] sum = 294932.156250 ggml_debug: Qcur-32 = (f32) CONT(wqkv_clamped-32 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.7610, 2.2390, 6.2390, ...], [24574.2383, 24578.2383, 24582.2383, ...], [49150.2383, 49154.2383, 49158.2383, ...], ], ] sum = 221204.140625 ggml_debug: Qcur-32 (reshaped) = (f32) RESHAPE(Qcur-32{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -1.7610, 2.2390, 6.2390, ...], [510.2390, 514.2390, 518.2390, ...], [1022.2390, 1026.2390, 1030.2390, ...], ... ], [ [24574.2383, 24578.2383, 24582.2383, ...], [25086.2383, 25090.2383, 25094.2383, ...], [25598.2383, 25602.2383, 25606.2383, ...], ... ], [ [49150.2383, 49154.2383, 49158.2383, ...], [49662.2383, 49666.2383, 49670.2383, ...], [50174.2383, 50178.2383, 50182.2383, ...], ... ], ] sum = 677436.500000 ggml_debug: Qcur-32 = (f32) ROPE(Qcur-32 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -1.7610, 2.2390, 6.2390, ...], [510.2390, 514.2390, 518.2390, ...], [1022.2390, 1026.2390, 1030.2390, ...], ... ], [ [24574.2383, 24578.2383, 24582.2383, ...], [25086.2383, 25090.2383, 25094.2383, ...], [25598.2383, 25602.2383, 25606.2383, ...], ... ], [ [49150.2383, 49154.2383, 49158.2383, ...], [49662.2383, 49666.2383, 49670.2383, ...], [50174.2383, 50178.2383, 50182.2383, ...], ... ], ] sum = 677436.500000 ggml_debug: wqkv_clamped-32 (view) = (f32) VIEW(wqkv_clamped-32{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 3.9973, 7.9973, 11.9973, ...], [32771.9961, 32775.9961, 32779.9961, ...], [65540.0000, 65544.0000, 65548.0000, ...], ], ] sum = 294984.000000 ggml_debug: Kcur-32 = (f32) CONT(wqkv_clamped-32 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 3.9973, 7.9973, 11.9973, ...], [4099.9976, 4103.9976, 4107.9976, ...], [8195.9971, 8199.9971, 8203.9971, ...], ], ] sum = 36935.972656 ggml_debug: Kcur-32 (reshaped) = (f32) RESHAPE(Kcur-32{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 3.9973, 7.9973, 11.9973, ...], [515.9973, 519.9973, 523.9973, ...], [1027.9973, 1031.9973, 1035.9973, ...], ... ], [ [4099.9976, 4103.9976, 4107.9976, ...], [4611.9976, 4615.9976, 4619.9976, ...], [5123.9976, 5127.9976, 5131.9976, ...], ... ], [ [8195.9971, 8199.9971, 8203.9971, ...], [8707.9971, 8711.9971, 8715.9971, ...], [9219.9971, 9223.9971, 9227.9971, ...], ... ], ] sum = 124631.945312 ggml_debug: Kcur-32 = (f32) ROPE(Kcur-32 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 3.9973, 7.9973, 11.9973, ...], [515.9973, 519.9973, 523.9973, ...], [1027.9973, 1031.9973, 1035.9973, ...], ... ], [ [4099.9976, 4103.9976, 4107.9976, ...], [4611.9976, 4615.9976, 4619.9976, ...], [5123.9976, 5127.9976, 5131.9976, ...], ... ], [ [8195.9971, 8199.9971, 8203.9971, ...], [8707.9971, 8711.9971, 8715.9971, ...], [9219.9971, 9223.9971, 9227.9971, ...], ... ], ] sum = 124631.945312 ggml_debug: wqkv_clamped-32 (view) = (f32) VIEW(wqkv_clamped-32{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0170, 4.0170, 8.0170, ...], [32768.0156, 32772.0156, 32776.0156, ...], [65536.0156, 65540.0156, 65544.0156, ...], ], ] sum = 294948.125000 ggml_debug: Vcur-32 = (f32) CONT(wqkv_clamped-32 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.0170, 4.0170, 8.0170, ...], [4096.0171, 4100.0171, 4104.0171, ...], [8192.0166, 8196.0166, 8200.0166, ...], ], ] sum = 36900.152344 ggml_debug: k_cache_view-32 = (f16) VIEW(cache_k_l32{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-32 (copy of Kcur-32) = (f16) CPY(Kcur-32{128, 8, 3, 1}, k_cache_view-32{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 3.9980, 4.0039, 4.0117, ...], ], ] sum = 12.013672 ggml_debug: v_cur_t-32 = (f32) TRANSPOSE(Vcur-32{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0170, 4096.0171, 8192.0166], [ 4.0170, 4100.0171, 8196.0166], [ 8.0170, 4104.0171, 8200.0166], ... ], ] sum = 36900.152344 ggml_debug: v_cache_view-32 = (f16) VIEW(cache_v_l32{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-32 (copy of v_cur_t-32) = (f16) CPY(v_cur_t-32{3, 1024, 1, 1}, v_cache_view-32{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.0170, 0.0170, 0.0171], [ 0.0340, 0.0341, 0.0341], [ 0.0681, 0.0682, 0.0683], ... ], ] sum = 0.357925 ggml_debug: v-32 = (f16) VIEW(cache_v_l32{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.0170, 0.0170, 0.0171, ...], [ 0.0340, 0.0341, 0.0341, ...], [ 0.0681, 0.0682, 0.0683, ...], ... ], [ [ 0.0170, 0.0170, 0.0171, ...], [ 0.0340, 0.0341, 0.0341, ...], [ 0.0681, 0.0682, 0.0683, ...], ... ], [ [ 0.0170, 0.0170, 0.0171, ...], [ 0.0340, 0.0341, 0.0341, ...], [ 0.0681, 0.0682, 0.0683, ...], ... ], ... ] sum = 1.073776 ggml_debug: k-32 = (f16) VIEW(cache_k_l32{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 3.9980, 4.0039, 4.0117, ...], [ 15.9922, 16.0156, 16.0469, ...], [ 63.9688, 64.0625, 64.1875, ...], ... ], [ [ 4.9961, 5.0039, 5.0117, ...], [ 19.9844, 20.0156, 20.0469, ...], [ 79.9375, 80.0625, 80.1875, ...], ... ], [ [ 5.9961, 6.0039, 6.0117, ...], [ 23.9844, 24.0156, 24.0469, ...], [ 95.9375, 96.0625, 96.1875, ...], ... ], ... ] sum = 945.779297 ggml_debug: q-32 = (f32) PERMUTE(Qcur-32{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -1.7610, 2.2390, 6.2390, ...], [24574.2383, 24578.2383, 24582.2383, ...], [49150.2383, 49154.2383, 49158.2383, ...], ], [ [510.2390, 514.2390, 518.2390, ...], [25086.2383, 25090.2383, 25094.2383, ...], [49662.2383, 49666.2383, 49670.2383, ...], ], [ [1022.2390, 1026.2390, 1030.2390, ...], [25598.2383, 25602.2383, 25606.2383, ...], [50174.2383, 50178.2383, 50182.2383, ...], ], ... ] sum = 677436.562500 ggml_debug: kq-32 = (f32) MUL_MAT(k-32{128, 32, 8, 1}, q-32{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 4.6328, 8.6328, 12.6328, ...], [132.6328, 136.6328, 140.6328, ...], [260.6328, 264.6328, 268.6328, ...], ], [ [388.6328, 392.6328, 396.6328, ...], [516.6328, 520.6328, 524.6328, ...], [644.6328, 648.6328, 652.6328, ...], ], [ [772.6328, 776.6328, 780.6328, ...], [900.6328, 904.6328, 908.6328, ...], [1028.6328, 1032.6328, 1036.6328, ...], ], ... ] sum = 14057.085938 ggml_debug: kq_soft_max_ext-32 = (f32) SOFT_MAX(kq-32{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-32 = (f32) MUL_MAT(v-32{32, 128, 8, 1}, kq_soft_max_ext-32{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.0170, 4.0170, 8.0170, ...], [512.0170, 516.0170, 520.0170, ...], [1024.0170, 1028.0170, 1032.0170, ...], ], [ [1536.0170, 1540.0170, 1544.0170, ...], [2048.0171, 2052.0171, 2056.0171, ...], [2560.0171, 2564.0171, 2568.0171, ...], ], [ [3072.0171, 3076.0171, 3080.0171, ...], [3584.0171, 3588.0171, 3592.0171, ...], [4096.0171, 4100.0171, 4104.0171, ...], ], ... ] sum = 55404.457031 ggml_debug: kqv_merged-32 = (f32) PERMUTE(kqv-32{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0170, 4.0170, 8.0170, ...], [1536.0170, 1540.0170, 1544.0170, ...], [3072.0171, 3076.0171, 3080.0171, ...], ... ], [ [512.0170, 516.0170, 520.0170, ...], [2048.0171, 2052.0171, 2056.0171, ...], [3584.0171, 3588.0171, 3592.0171, ...], ... ], [ [1024.0170, 1028.0170, 1032.0170, ...], [2560.0171, 2564.0171, 2568.0171, ...], [4096.0171, 4100.0171, 4104.0171, ...], ... ], ] sum = 55404.449219 ggml_debug: kqv_merged_cont-32 = (f32) CONT(kqv_merged-32{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0170, 4.0170, 8.0170, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0156, 49156.0156, 49160.0156, ...], ], ] sum = 221220.140625 ggml_debug: kqv_out-32 = (f32) MUL_MAT(blk.32.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-32{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0108, 3.9892, 7.9892, ...], [24575.9883, 24579.9883, 24583.9883, ...], [49151.9883, 49155.9883, 49159.9883, ...], ], ] sum = 221219.890625 ggml_debug: norm-32 = (f32) NORM(kqv_out-32{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0285, 3.9715, 7.9715, ...], [24575.9707, 24579.9707, 24583.9707, ...], [49151.9727, 49155.9727, 49159.9727, ...], ], ] sum = 221219.734375 ggml_debug: attn_out_norm-32 = (f32) MUL(norm-32{6144, 3, 1, 1}, blk.32.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0155, 3.9845, 7.9845, ...], [24575.9844, 24579.9844, 24583.9844, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_moe_logits-32 = (f32) MUL_MAT(blk.32.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.4119, 4.4119, 8.4119, ...], [ 64.4119, 68.4119, 72.4119, ...], [128.4119, 132.4119, 136.4119, ...], ], ] sum = 615.706787 ggml_debug: ffn_moe_probs-32 = (f32) SOFT_MAX(ffn_moe_logits-32{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0829, 4.0829, 8.0829, ...], [ 64.0829, 68.0829, 72.0829, ...], [128.0829, 132.0829, 136.0829, ...], ], ] sum = 612.745850 ggml_debug: ffn_moe_argsort-32 = (i32) ARGSORT(ffn_moe_probs-32{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 14.0000, 18.0000, 22.0000, ...], [ 78.0000, 82.0000, 86.0000, ...], [142.0000, 146.0000, 150.0000, ...], ], ] sum = 738.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-32{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 14.0000, 18.0000, 22.0000, ...], [ 78.0000, 82.0000, 86.0000, ...], [142.0000, 146.0000, 150.0000, ...], ], ] sum = 738.000000 ggml_debug: ffn_moe_up-32 = (f32) MUL_MAT_ID(blk.32.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0412, 4.0412, 8.0412, ...], [43008.0430, 43012.0430, 43016.0430, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.343750 ggml_debug: ffn_moe_gate-32 = (f32) MUL_MAT_ID(blk.32.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.7285, 4.7285, 8.7285, ...], [43008.7266, 43012.7266, 43016.7266, ...], [86016.7266, 86020.7266, 86024.7266, ...], ], ] sum = 387114.531250 ggml_debug: ffn_moe_silu-32 = (f32) UNARY(ffn_moe_gate-32{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.4913, 4.4913, 8.4913, ...], [43008.4922, 43012.4922, 43016.4922, ...], [86016.4922, 86020.4922, 86024.4922, ...], ], ] sum = 387112.437500 ggml_debug: ffn_moe_gate_par-32 = (f32) MUL(ffn_moe_up-32{10752, 3, 1, 1}, ffn_moe_silu-32{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0202, 4.0202, 8.0202, ...], [43008.0195, 43012.0195, 43016.0195, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.218750 ggml_debug: ffn_moe_down-32 = (f32) MUL_MAT_ID(blk.32.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-32{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2819, 3.7181, 7.7181, ...], [24575.7188, 24579.7188, 24583.7188, ...], [49151.7188, 49155.7188, 49159.7188, ...], ], ] sum = 221217.468750 ggml_debug: ffn_moe_probs-32 (reshaped) = (f32) RESHAPE(ffn_moe_probs-32{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0829], [ 4.0829], [ 8.0829], ... ], [ [ 64.0829], [ 68.0829], [ 72.0829], ... ], [ [128.0829], [132.0829], [136.0829], ... ], ] sum = 612.745850 ggml_debug: ffn_moe_weights-32 = (f32) GET_ROWS(ffn_moe_probs-32 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1549], [ 4.1549], [ 8.1549], ... ], [ [ 16.1549], [ 20.1549], [ 24.1549], ... ], [ [ 32.1549], [ 36.1549], [ 40.1549], ... ], ] sum = 181.393753 ggml_debug: ffn_moe_weights-32 (reshaped) = (f32) RESHAPE(ffn_moe_weights-32{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1549, 4.1549, 8.1549, ...], [ 16.1549, 20.1549, 24.1549, ...], [ 32.1549, 36.1549, 40.1549, ...], ], ] sum = 181.393753 ggml_debug: ffn_moe_weights_sum-32 = (f32) SUM_ROWS(ffn_moe_weights-32 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.4229], [ 4.4229], [ 8.4229], ], ] sum = 13.268730 ggml_debug: ffn_moe_weights_norm-32 = (f32) DIV(ffn_moe_weights-32 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-32{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3662, 4.3662, 8.3662, ...], [ 16.3662, 20.3662, 24.3662, ...], [ 32.3662, 36.3662, 40.3662, ...], ], ] sum = 183.295639 ggml_debug: ffn_moe_weights_norm-32 (view) = (f32) VIEW(ffn_moe_weights_norm-32{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3662], [ 16.3662], [ 32.3662], ], ] sum = 49.098549 ggml_debug: ffn_moe_weighted-32 = (f32) MUL(ffn_moe_down-32{6144, 3, 1, 1}, ffn_moe_weights_norm-32 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1032, 3.8968, 7.8968, ...], [24575.8965, 24579.8965, 24583.8965, ...], [49151.8984, 49155.8984, 49159.8984, ...], ], ] sum = 221219.093750 ggml_debug: ffn_moe_up-32 = (f32) MUL_MAT_ID(blk.32.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.9182, 4.9182, 8.9182, ...], [43008.9180, 43012.9180, 43016.9180, ...], [86016.9219, 86020.9219, 86024.9219, ...], ], ] sum = 387116.312500 ggml_debug: ffn_moe_gate-32 = (f32) MUL_MAT_ID(blk.32.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1760, 3.8240, 7.8240, ...], [43007.8242, 43011.8242, 43015.8242, ...], [86015.8203, 86019.8203, 86023.8203, ...], ], ] sum = 387106.406250 ggml_debug: ffn_moe_silu-32 = (f32) UNARY(ffn_moe_gate-32{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0803, 3.9197, 7.9197, ...], [43007.9180, 43011.9180, 43015.9180, ...], [86015.9219, 86019.9219, 86023.9219, ...], ], ] sum = 387107.312500 ggml_debug: ffn_moe_gate_par-32 = (f32) MUL(ffn_moe_up-32{10752, 3, 1, 1}, ffn_moe_silu-32{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0737, 3.9263, 7.9263, ...], [43007.9258, 43011.9258, 43015.9258, ...], [86015.9297, 86019.9297, 86023.9297, ...], ], ] sum = 387107.343750 ggml_debug: ffn_moe_down-32 = (f32) MUL_MAT_ID(blk.32.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-32{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1339, 4.1339, 8.1339, ...], [24576.1348, 24580.1348, 24584.1348, ...], [49152.1328, 49156.1328, 49160.1328, ...], ], ] sum = 221221.187500 ggml_debug: ffn_moe_weights_norm-32 (view) = (f32) VIEW(ffn_moe_weights_norm-32{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2354], [ 16.2354], [ 32.2354], ], ] sum = 48.706169 ggml_debug: ffn_moe_weighted-32 = (f32) MUL(ffn_moe_down-32{6144, 3, 1, 1}, ffn_moe_weights_norm-32 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0315, 4.0315, 8.0315, ...], [24576.0312, 24580.0312, 24584.0312, ...], [49152.0312, 49156.0312, 49160.0312, ...], ], ] sum = 221220.281250 ggml_debug: ffn_moe_out-32 = (f32) ADD(ffn_moe_weighted-32{6144, 3, 1, 1}, ffn_moe_weighted-32{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0717, 3.9283, 7.9283, ...], [24575.9277, 24579.9277, 24583.9277, ...], [49151.9297, 49155.9297, 49159.9297, ...], ], ] sum = 221219.375000 ggml_debug: ffn_moe_up-32 = (f32) MUL_MAT_ID(blk.32.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0803, 3.9197, 7.9197, ...], [43007.9180, 43011.9180, 43015.9180, ...], [86015.9219, 86019.9219, 86023.9219, ...], ], ] sum = 387107.312500 ggml_debug: ffn_moe_gate-32 = (f32) MUL_MAT_ID(blk.32.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2737, 3.7263, 7.7263, ...], [43007.7266, 43011.7266, 43015.7266, ...], [86015.7266, 86019.7266, 86023.7266, ...], ], ] sum = 387105.531250 ggml_debug: ffn_moe_silu-32 = (f32) UNARY(ffn_moe_gate-32{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1183, 3.8817, 7.8817, ...], [43007.8828, 43011.8828, 43015.8828, ...], [86015.8828, 86019.8828, 86023.8828, ...], ], ] sum = 387106.937500 ggml_debug: ffn_moe_gate_par-32 = (f32) MUL(ffn_moe_up-32{10752, 3, 1, 1}, ffn_moe_silu-32{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0095, 4.0095, 8.0095, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_down-32 = (f32) MUL_MAT_ID(blk.32.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-32{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1278, 4.1278, 8.1278, ...], [24576.1270, 24580.1270, 24584.1270, ...], [49152.1289, 49156.1289, 49160.1289, ...], ], ] sum = 221221.140625 ggml_debug: ffn_moe_weights_norm-32 (view) = (f32) VIEW(ffn_moe_weights_norm-32{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2025], [ 16.2025], [ 32.2025], ], ] sum = 48.607418 ggml_debug: ffn_moe_weighted-32 = (f32) MUL(ffn_moe_down-32{6144, 3, 1, 1}, ffn_moe_weights_norm-32 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0259, 4.0259, 8.0259, ...], [24576.0254, 24580.0254, 24584.0254, ...], [49152.0273, 49156.0273, 49160.0273, ...], ], ] sum = 221220.250000 ggml_debug: ffn_moe_out-32 = (f32) ADD(ffn_moe_out-32{6144, 3, 1, 1}, ffn_moe_weighted-32{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0458, 3.9542, 7.9542, ...], [24575.9551, 24579.9551, 24583.9551, ...], [49151.9531, 49155.9531, 49159.9531, ...], ], ] sum = 221219.578125 ggml_debug: ffn_moe_up-32 = (f32) MUL_MAT_ID(blk.32.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5856, 3.4144, 7.4144, ...], [43007.4141, 43011.4141, 43015.4141, ...], [86015.4141, 86019.4141, 86023.4141, ...], ], ] sum = 387102.718750 ggml_debug: ffn_moe_gate-32 = (f32) MUL_MAT_ID(blk.32.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-32{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5742, 4.5742, 8.5742, ...], [43008.5742, 43012.5742, 43016.5742, ...], [86016.5781, 86020.5781, 86024.5781, ...], ], ] sum = 387113.187500 ggml_debug: ffn_moe_silu-32 = (f32) UNARY(ffn_moe_gate-32{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.3674, 4.3674, 8.3674, ...], [43008.3672, 43012.3672, 43016.3672, ...], [86016.3672, 86020.3672, 86024.3672, ...], ], ] sum = 387111.312500 ggml_debug: ffn_moe_gate_par-32 = (f32) MUL(ffn_moe_up-32{10752, 3, 1, 1}, ffn_moe_silu-32{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2151, 3.7849, 7.7849, ...], [43007.7852, 43011.7852, 43015.7852, ...], [86015.7812, 86019.7812, 86023.7812, ...], ], ] sum = 387106.031250 ggml_debug: ffn_moe_down-32 = (f32) MUL_MAT_ID(blk.32.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-32{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.8253, 4.8253, 8.8253, ...], [24576.8262, 24580.8262, 24584.8262, ...], [49152.8242, 49156.8242, 49160.8242, ...], ], ] sum = 221227.437500 ggml_debug: ffn_moe_weights_norm-32 (view) = (f32) VIEW(ffn_moe_weights_norm-32{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1960], [ 16.1960], [ 32.1960], ], ] sum = 48.587868 ggml_debug: ffn_moe_weighted-32 = (f32) MUL(ffn_moe_down-32{6144, 3, 1, 1}, ffn_moe_weights_norm-32 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1617, 4.1617, 8.1617, ...], [24576.1621, 24580.1621, 24584.1621, ...], [49152.1602, 49156.1602, 49160.1602, ...], ], ] sum = 221221.437500 ggml_debug: ffn_moe_out-32 = (f32) ADD(ffn_moe_out-32{6144, 3, 1, 1}, ffn_moe_weighted-32{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1159, 4.1159, 8.1159, ...], [24576.1152, 24580.1152, 24584.1152, ...], [49152.1172, 49156.1172, 49160.1172, ...], ], ] sum = 221221.062500 ggml_debug: ffn_inp-32 = (f32) ADD(kqv_out-32{6144, 3, 1, 1}, l_out-31{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.8443, 2.1557, 6.1557, ...], [24574.1562, 24578.1562, 24582.1562, ...], [49150.1562, 49154.1562, 49158.1562, ...], ], ] sum = 221203.406250 ggml_debug: l_out-32 = (f32) ADD(ffn_moe_out-32{6144, 3, 1, 1}, ffn_inp-32{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.7284, 2.2716, 6.2716, ...], [24574.2715, 24578.2715, 24582.2715, ...], [49150.2734, 49154.2734, 49158.2734, ...], ], ] sum = 221204.468750 ggml_debug: norm-33 = (f32) NORM(l_out-32{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.8158, 3.1842, 7.1842, ...], [24575.1836, 24579.1836, 24583.1836, ...], [49151.1836, 49155.1836, 49159.1836, ...], ], ] sum = 221212.671875 ggml_debug: attn_norm-33 = (f32) MUL(norm-33{6144, 3, 1, 1}, blk.33.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2326, 3.7674, 7.7674, ...], [24575.7676, 24579.7676, 24583.7676, ...], [49151.7656, 49155.7656, 49159.7656, ...], ], ] sum = 221217.890625 ggml_debug: wqkv-33 = (f32) MUL_MAT(blk.33.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-33{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.0671, 3.9329, 7.9329, ...], [32767.9336, 32771.9336, 32775.9336, ...], [65535.9336, 65539.9297, 65543.9297, ...], ], ] sum = 294947.406250 ggml_debug: wqkv_clamped-33 = (f32) CLAMP(wqkv-33{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.0671, 3.9329, 7.9329, ...], [32767.9336, 32771.9336, 32775.9336, ...], [65535.9336, 65539.9297, 65543.9297, ...], ], ] sum = 294947.406250 ggml_debug: wqkv_clamped-33 (view) = (f32) VIEW(wqkv_clamped-33{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0671, 3.9329, 7.9329, ...], [32767.9336, 32771.9336, 32775.9336, ...], [65535.9336, 65539.9297, 65543.9297, ...], ], ] sum = 294947.406250 ggml_debug: Qcur-33 = (f32) CONT(wqkv_clamped-33 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0671, 3.9329, 7.9329, ...], [24575.9336, 24579.9336, 24583.9336, ...], [49151.9336, 49155.9336, 49159.9336, ...], ], ] sum = 221219.406250 ggml_debug: Qcur-33 (reshaped) = (f32) RESHAPE(Qcur-33{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0671, 3.9329, 7.9329, ...], [511.9330, 515.9329, 519.9329, ...], [1023.9329, 1027.9330, 1031.9330, ...], ... ], [ [24575.9336, 24579.9336, 24583.9336, ...], [25087.9336, 25091.9336, 25095.9336, ...], [25599.9336, 25603.9336, 25607.9336, ...], ... ], [ [49151.9336, 49155.9336, 49159.9336, ...], [49663.9336, 49667.9336, 49671.9336, ...], [50175.9336, 50179.9336, 50183.9336, ...], ... ], ] sum = 677482.250000 ggml_debug: Qcur-33 = (f32) ROPE(Qcur-33 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.0671, 3.9329, 7.9329, ...], [511.9330, 515.9329, 519.9329, ...], [1023.9329, 1027.9330, 1031.9330, ...], ... ], [ [24575.9336, 24579.9336, 24583.9336, ...], [25087.9336, 25091.9336, 25095.9336, ...], [25599.9336, 25603.9336, 25607.9336, ...], ... ], [ [49151.9336, 49155.9336, 49159.9336, ...], [49663.9336, 49667.9336, 49671.9336, ...], [50175.9336, 50179.9336, 50183.9336, ...], ... ], ] sum = 677482.250000 ggml_debug: wqkv_clamped-33 (view) = (f32) VIEW(wqkv_clamped-33{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2357, 3.7643, 7.7643, ...], [32767.7637, 32771.7656, 32775.7656, ...], [65535.7656, 65539.7656, 65543.7656, ...], ], ] sum = 294945.875000 ggml_debug: Kcur-33 = (f32) CONT(wqkv_clamped-33 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2357, 3.7643, 7.7643, ...], [4095.7644, 4099.7642, 4103.7642, ...], [8191.7642, 8195.7646, 8199.7646, ...], ], ] sum = 36897.878906 ggml_debug: Kcur-33 (reshaped) = (f32) RESHAPE(Kcur-33{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -0.2357, 3.7643, 7.7643, ...], [511.7643, 515.7643, 519.7643, ...], [1023.7643, 1027.7643, 1031.7643, ...], ... ], [ [4095.7644, 4099.7642, 4103.7642, ...], [4607.7642, 4611.7642, 4615.7642, ...], [5119.7642, 5123.7642, 5127.7642, ...], ... ], [ [8191.7642, 8195.7646, 8199.7646, ...], [8703.7646, 8707.7646, 8711.7646, ...], [9215.7646, 9219.7646, 9223.7646, ...], ... ], ] sum = 124517.648438 ggml_debug: Kcur-33 = (f32) ROPE(Kcur-33 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -0.2357, 3.7643, 7.7643, ...], [511.7643, 515.7643, 519.7643, ...], [1023.7643, 1027.7643, 1031.7643, ...], ... ], [ [4095.7644, 4099.7642, 4103.7642, ...], [4607.7642, 4611.7642, 4615.7642, ...], [5119.7642, 5123.7642, 5127.7642, ...], ... ], [ [8191.7642, 8195.7646, 8199.7646, ...], [8703.7646, 8707.7646, 8711.7646, ...], [9215.7646, 9219.7646, 9223.7646, ...], ... ], ] sum = 124517.648438 ggml_debug: wqkv_clamped-33 (view) = (f32) VIEW(wqkv_clamped-33{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.6489, 4.6489, 8.6489, ...], [32768.6484, 32772.6484, 32776.6484, ...], [65536.6484, 65540.6484, 65544.6484, ...], ], ] sum = 294953.843750 ggml_debug: Vcur-33 = (f32) CONT(wqkv_clamped-33 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.6489, 4.6489, 8.6489, ...], [4096.6489, 4100.6489, 4104.6489, ...], [8192.6484, 8196.6484, 8200.6484, ...], ], ] sum = 36905.835938 ggml_debug: k_cache_view-33 = (f16) VIEW(cache_k_l33{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-33 (copy of Kcur-33) = (f16) CPY(Kcur-33{128, 8, 3, 1}, k_cache_view-33{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -0.2357, -0.2360, -0.2362, ...], ], ] sum = -0.707886 ggml_debug: v_cur_t-33 = (f32) TRANSPOSE(Vcur-33{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.6489, 4096.6489, 8192.6484], [ 4.6489, 4100.6489, 8196.6484], [ 8.6489, 4104.6489, 8200.6484], ... ], ] sum = 36905.835938 ggml_debug: v_cache_view-33 = (f16) VIEW(cache_v_l33{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-33 (copy of v_cur_t-33) = (f16) CPY(v_cur_t-33{3, 1024, 1, 1}, v_cache_view-33{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.6489, 0.6499, 0.6509], [ 1.2979, 1.2998, 1.3018], [ 2.5957, 2.5996, 2.6035], ... ], ] sum = 13.647949 ggml_debug: v-33 = (f16) VIEW(cache_v_l33{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.6489, 0.6499, 0.6509, ...], [ 1.2979, 1.2998, 1.3018, ...], [ 2.5957, 2.5996, 2.6035, ...], ... ], [ [ 0.6489, 0.6499, 0.6509, ...], [ 1.2979, 1.2998, 1.3018, ...], [ 2.5957, 2.5996, 2.6035, ...], ... ], [ [ 0.6489, 0.6499, 0.6509, ...], [ 1.2979, 1.2998, 1.3018, ...], [ 2.5957, 2.5996, 2.6035, ...], ... ], ... ] sum = 40.943848 ggml_debug: k-33 = (f16) VIEW(cache_k_l33{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -0.2357, -0.2360, -0.2362, ...], [ -0.9429, -0.9438, -0.9448, ...], [ -3.7715, -3.7754, -3.7793, ...], ... ], [ [ -0.2839, -0.2844, -0.2849, ...], [ -1.1357, -1.1377, -1.1396, ...], [ -4.5430, -4.5508, -4.5586, ...], ... ], [ [ -0.3464, -0.3469, -0.3474, ...], [ -1.3857, -1.3877, -1.3896, ...], [ -5.5430, -5.5508, -5.5586, ...], ... ], ... ] sum = -54.640503 ggml_debug: q-33 = (f32) PERMUTE(Qcur-33{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.0671, 3.9329, 7.9329, ...], [24575.9336, 24579.9336, 24583.9336, ...], [49151.9336, 49155.9336, 49159.9336, ...], ], [ [511.9330, 515.9329, 519.9329, ...], [25087.9336, 25091.9336, 25095.9336, ...], [49663.9336, 49667.9336, 49671.9336, ...], ], [ [1023.9329, 1027.9330, 1031.9330, ...], [25599.9336, 25603.9336, 25607.9336, ...], [50175.9336, 50179.9336, 50183.9336, ...], ], ... ] sum = 677482.250000 ggml_debug: kq-33 = (f32) MUL_MAT(k-33{128, 32, 8, 1}, q-33{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ -8.7578, -4.7578, -0.7578, ...], [119.2422, 123.2422, 127.2422, ...], [247.2422, 251.2422, 255.2422, ...], ], [ [375.2422, 379.2422, 383.2422, ...], [503.2422, 507.2422, 511.2422, ...], [631.2422, 635.2422, 639.2422, ...], ], [ [759.2422, 763.2422, 767.2422, ...], [887.2422, 891.2422, 895.2422, ...], [1015.2422, 1019.2422, 1023.2422, ...], ], ... ] sum = 13695.539062 ggml_debug: kq_soft_max_ext-33 = (f32) SOFT_MAX(kq-33{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-33 = (f32) MUL_MAT(v-33{32, 128, 8, 1}, kq_soft_max_ext-33{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.6489, 4.6489, 8.6489, ...], [512.6489, 516.6489, 520.6489, ...], [1024.6489, 1028.6489, 1032.6489, ...], ], [ [1536.6489, 1540.6489, 1544.6489, ...], [2048.6489, 2052.6489, 2056.6489, ...], [2560.6489, 2564.6489, 2568.6489, ...], ], [ [3072.6489, 3076.6489, 3080.6489, ...], [3584.6489, 3588.6489, 3592.6489, ...], [4096.6489, 4100.6489, 4104.6489, ...], ], ... ] sum = 55421.515625 ggml_debug: kqv_merged-33 = (f32) PERMUTE(kqv-33{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.6489, 4.6489, 8.6489, ...], [1536.6489, 1540.6489, 1544.6489, ...], [3072.6489, 3076.6489, 3080.6489, ...], ... ], [ [512.6489, 516.6489, 520.6489, ...], [2048.6489, 2052.6489, 2056.6489, ...], [3584.6489, 3588.6489, 3592.6489, ...], ... ], [ [1024.6489, 1028.6489, 1032.6489, ...], [2560.6489, 2564.6489, 2568.6489, ...], [4096.6489, 4100.6489, 4104.6489, ...], ... ], ] sum = 55421.511719 ggml_debug: kqv_merged_cont-33 = (f32) CONT(kqv_merged-33{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.6489, 4.6489, 8.6489, ...], [24576.6484, 24580.6484, 24584.6484, ...], [49152.6484, 49156.6484, 49160.6484, ...], ], ] sum = 221225.843750 ggml_debug: kqv_out-33 = (f32) MUL_MAT(blk.33.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-33{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0520, 3.9480, 7.9480, ...], [24575.9473, 24579.9473, 24583.9473, ...], [49151.9492, 49155.9492, 49159.9492, ...], ], ] sum = 221219.546875 ggml_debug: norm-33 = (f32) NORM(kqv_out-33{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1209, 3.8791, 7.8791, ...], [24575.8789, 24579.8789, 24583.8789, ...], [49151.8789, 49155.8789, 49159.8789, ...], ], ] sum = 221218.906250 ggml_debug: attn_out_norm-33 = (f32) MUL(norm-33{6144, 3, 1, 1}, blk.33.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0680, 3.9320, 7.9320, ...], [24575.9316, 24579.9316, 24583.9316, ...], [49151.9336, 49155.9336, 49159.9336, ...], ], ] sum = 221219.406250 ggml_debug: ffn_moe_logits-33 = (f32) MUL_MAT(blk.33.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.2006, 3.7994, 7.7994, ...], [ 63.7994, 67.7994, 71.7994, ...], [127.7994, 131.7994, 135.7994, ...], ], ] sum = 610.194946 ggml_debug: ffn_moe_probs-33 = (f32) SOFT_MAX(ffn_moe_logits-33{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0430, 4.0430, 8.0430, ...], [ 64.0430, 68.0430, 72.0430, ...], [128.0430, 132.0430, 136.0430, ...], ], ] sum = 612.387207 ggml_debug: ffn_moe_argsort-33 = (i32) ARGSORT(ffn_moe_probs-33{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 4.0000, 8.0000, 12.0000, ...], [ 68.0000, 72.0000, 76.0000, ...], [132.0000, 136.0000, 140.0000, ...], ], ] sum = 648.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-33{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 4.0000, 8.0000, 12.0000, ...], [ 68.0000, 72.0000, 76.0000, ...], [132.0000, 136.0000, 140.0000, ...], ], ] sum = 648.000000 ggml_debug: ffn_moe_up-33 = (f32) MUL_MAT_ID(blk.33.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.9725, 3.0275, 7.0275, ...], [43007.0273, 43011.0273, 43015.0273, ...], [86015.0312, 86019.0312, 86023.0312, ...], ], ] sum = 387099.281250 ggml_debug: ffn_moe_gate-33 = (f32) MUL_MAT_ID(blk.33.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.0800, 2.9200, 6.9200, ...], [43006.9219, 43010.9219, 43014.9219, ...], [86014.9219, 86018.9219, 86022.9219, ...], ], ] sum = 387098.312500 ggml_debug: ffn_moe_silu-33 = (f32) UNARY(ffn_moe_gate-33{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2738, 3.7262, 7.7262, ...], [43007.7266, 43011.7266, 43015.7266, ...], [86015.7266, 86019.7266, 86023.7266, ...], ], ] sum = 387105.531250 ggml_debug: ffn_moe_gate_par-33 = (f32) MUL(ffn_moe_up-33{10752, 3, 1, 1}, ffn_moe_silu-33{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2663, 4.2663, 8.2663, ...], [43008.2656, 43012.2656, 43016.2656, ...], [86016.2656, 86020.2656, 86024.2656, ...], ], ] sum = 387110.375000 ggml_debug: ffn_moe_down-33 = (f32) MUL_MAT_ID(blk.33.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-33{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 2.8461, 6.8461, 10.8461, ...], [24578.8457, 24582.8457, 24586.8457, ...], [49154.8477, 49158.8477, 49162.8477, ...], ], ] sum = 221245.609375 ggml_debug: ffn_moe_probs-33 (reshaped) = (f32) RESHAPE(ffn_moe_probs-33{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0430], [ 4.0430], [ 8.0430], ... ], [ [ 64.0430], [ 68.0430], [ 72.0430], ... ], [ [128.0430], [132.0430], [136.0430], ... ], ] sum = 612.387207 ggml_debug: ffn_moe_weights-33 = (f32) GET_ROWS(ffn_moe_probs-33 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1882], [ 4.1882], [ 8.1882], ... ], [ [ 16.1882], [ 20.1882], [ 24.1882], ... ], [ [ 32.1882], [ 36.1882], [ 40.1882], ... ], ] sum = 181.694214 ggml_debug: ffn_moe_weights-33 (reshaped) = (f32) RESHAPE(ffn_moe_weights-33{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1882, 4.1882, 8.1882, ...], [ 16.1882, 20.1882, 24.1882, ...], [ 32.1882, 36.1882, 40.1882, ...], ], ] sum = 181.694214 ggml_debug: ffn_moe_weights_sum-33 = (f32) SUM_ROWS(ffn_moe_weights-33 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.5079], [ 4.5079], [ 8.5079], ], ] sum = 13.523735 ggml_debug: ffn_moe_weights_norm-33 = (f32) DIV(ffn_moe_weights-33 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-33{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3706, 4.3706, 8.3706, ...], [ 16.3706, 20.3706, 24.3706, ...], [ 32.3706, 36.3706, 40.3706, ...], ], ] sum = 183.335617 ggml_debug: ffn_moe_weights_norm-33 (view) = (f32) VIEW(ffn_moe_weights_norm-33{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3706], [ 16.3706], [ 32.3706], ], ] sum = 49.111874 ggml_debug: ffn_moe_weighted-33 = (f32) MUL(ffn_moe_down-33{6144, 3, 1, 1}, ffn_moe_weights_norm-33 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 1.0549, 5.0549, 9.0549, ...], [24577.0547, 24581.0547, 24585.0547, ...], [49153.0547, 49157.0547, 49161.0547, ...], ], ] sum = 221229.500000 ggml_debug: ffn_moe_up-33 = (f32) MUL_MAT_ID(blk.33.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5784, 3.4216, 7.4216, ...], [43007.4219, 43011.4219, 43015.4219, ...], [86015.4219, 86019.4219, 86023.4219, ...], ], ] sum = 387102.812500 ggml_debug: ffn_moe_gate-33 = (f32) MUL_MAT_ID(blk.33.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3844, 3.6156, 7.6156, ...], [43007.6172, 43011.6172, 43015.6172, ...], [86015.6172, 86019.6172, 86023.6172, ...], ], ] sum = 387104.562500 ggml_debug: ffn_moe_silu-33 = (f32) UNARY(ffn_moe_gate-33{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1557, 3.8443, 7.8443, ...], [43007.8438, 43011.8438, 43015.8438, ...], [86015.8438, 86019.8438, 86023.8438, ...], ], ] sum = 387106.593750 ggml_debug: ffn_moe_gate_par-33 = (f32) MUL(ffn_moe_up-33{10752, 3, 1, 1}, ffn_moe_silu-33{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0901, 4.0901, 8.0901, ...], [43008.0898, 43012.0898, 43016.0898, ...], [86016.0938, 86020.0938, 86024.0938, ...], ], ] sum = 387108.843750 ggml_debug: ffn_moe_down-33 = (f32) MUL_MAT_ID(blk.33.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-33{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.6003, 3.3997, 7.3997, ...], [24575.4004, 24579.4004, 24583.4004, ...], [49151.3984, 49155.3984, 49159.3984, ...], ], ] sum = 221214.593750 ggml_debug: ffn_moe_weights_norm-33 (view) = (f32) VIEW(ffn_moe_weights_norm-33{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2416], [ 16.2416], [ 32.2416], ], ] sum = 48.724926 ggml_debug: ffn_moe_weighted-33 = (f32) MUL(ffn_moe_down-33{6144, 3, 1, 1}, ffn_moe_weights_norm-33 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1451, 3.8549, 7.8549, ...], [24575.8555, 24579.8555, 24583.8555, ...], [49151.8555, 49155.8555, 49159.8555, ...], ], ] sum = 221218.703125 ggml_debug: ffn_moe_out-33 = (f32) ADD(ffn_moe_weighted-33{6144, 3, 1, 1}, ffn_moe_weighted-33{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.9098, 4.9098, 8.9098, ...], [24576.9102, 24580.9102, 24584.9102, ...], [49152.9102, 49156.9102, 49160.9102, ...], ], ] sum = 221228.171875 ggml_debug: ffn_moe_up-33 = (f32) MUL_MAT_ID(blk.33.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.5102, 4.5102, 8.5102, ...], [43008.5117, 43012.5117, 43016.5117, ...], [86016.5078, 86020.5078, 86024.5078, ...], ], ] sum = 387112.562500 ggml_debug: ffn_moe_gate-33 = (f32) MUL_MAT_ID(blk.33.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.9868, 3.0132, 7.0132, ...], [43007.0117, 43011.0117, 43015.0117, ...], [86015.0156, 86019.0156, 86023.0156, ...], ], ] sum = 387099.125000 ggml_debug: ffn_moe_silu-33 = (f32) UNARY(ffn_moe_gate-33{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2680, 3.7320, 7.7320, ...], [43007.7305, 43011.7305, 43015.7305, ...], [86015.7344, 86019.7344, 86023.7344, ...], ], ] sum = 387105.625000 ggml_debug: ffn_moe_gate_par-33 = (f32) MUL(ffn_moe_up-33{10752, 3, 1, 1}, ffn_moe_silu-33{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1367, 3.8633, 7.8633, ...], [43007.8633, 43011.8633, 43015.8633, ...], [86015.8594, 86019.8594, 86023.8594, ...], ], ] sum = 387106.750000 ggml_debug: ffn_moe_down-33 = (f32) MUL_MAT_ID(blk.33.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-33{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 1.9556, 5.9556, 9.9556, ...], [24577.9551, 24581.9551, 24585.9551, ...], [49153.9570, 49157.9570, 49161.9570, ...], ], ] sum = 221237.593750 ggml_debug: ffn_moe_weights_norm-33 (view) = (f32) VIEW(ffn_moe_weights_norm-33{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2072], [ 16.2072], [ 32.2072], ], ] sum = 48.621578 ggml_debug: ffn_moe_weighted-33 = (f32) MUL(ffn_moe_down-33{6144, 3, 1, 1}, ffn_moe_weights_norm-33 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.4052, 4.4052, 8.4052, ...], [24576.4043, 24580.4043, 24584.4043, ...], [49152.4062, 49156.4062, 49160.4062, ...], ], ] sum = 221223.656250 ggml_debug: ffn_moe_out-33 = (f32) ADD(ffn_moe_out-33{6144, 3, 1, 1}, ffn_moe_weighted-33{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 1.3150, 5.3150, 9.3150, ...], [24577.3145, 24581.3145, 24585.3145, ...], [49153.3164, 49157.3164, 49161.3164, ...], ], ] sum = 221231.828125 ggml_debug: ffn_moe_up-33 = (f32) MUL_MAT_ID(blk.33.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0900, 4.0900, 8.0900, ...], [43008.0898, 43012.0898, 43016.0898, ...], [86016.0938, 86020.0938, 86024.0938, ...], ], ] sum = 387108.843750 ggml_debug: ffn_moe_gate-33 = (f32) MUL_MAT_ID(blk.33.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-33{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4387, 4.4387, 8.4387, ...], [43008.4375, 43012.4375, 43016.4375, ...], [86016.4375, 86020.4375, 86024.4375, ...], ], ] sum = 387111.937500 ggml_debug: ffn_moe_silu-33 = (f32) UNARY(ffn_moe_gate-33{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2667, 4.2667, 8.2667, ...], [43008.2656, 43012.2656, 43016.2656, ...], [86016.2656, 86020.2656, 86024.2656, ...], ], ] sum = 387110.375000 ggml_debug: ffn_moe_gate_par-33 = (f32) MUL(ffn_moe_up-33{10752, 3, 1, 1}, ffn_moe_silu-33{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0240, 4.0240, 8.0240, ...], [43008.0234, 43012.0234, 43016.0234, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.218750 ggml_debug: ffn_moe_down-33 = (f32) MUL_MAT_ID(blk.33.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-33{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0936, 3.9064, 7.9064, ...], [24575.9062, 24579.9062, 24583.9062, ...], [49151.9062, 49155.9062, 49159.9062, ...], ], ] sum = 221219.156250 ggml_debug: ffn_moe_weights_norm-33 (view) = (f32) VIEW(ffn_moe_weights_norm-33{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1805], [ 16.1805], [ 32.1805], ], ] sum = 48.541622 ggml_debug: ffn_moe_weighted-33 = (f32) MUL(ffn_moe_down-33{6144, 3, 1, 1}, ffn_moe_weights_norm-33 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0169, 3.9831, 7.9831, ...], [24575.9824, 24579.9824, 24583.9824, ...], [49151.9844, 49155.9844, 49159.9844, ...], ], ] sum = 221219.859375 ggml_debug: ffn_moe_out-33 = (f32) ADD(ffn_moe_out-33{6144, 3, 1, 1}, ffn_moe_weighted-33{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 1.2981, 5.2981, 9.2981, ...], [24577.2988, 24581.2988, 24585.2988, ...], [49153.2969, 49157.2969, 49161.2969, ...], ], ] sum = 221231.671875 ggml_debug: ffn_inp-33 = (f32) ADD(kqv_out-33{6144, 3, 1, 1}, l_out-32{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.7804, 2.2196, 6.2196, ...], [24574.2188, 24578.2188, 24582.2188, ...], [49150.2188, 49154.2188, 49158.2188, ...], ], ] sum = 221203.968750 ggml_debug: l_out-33 = (f32) ADD(ffn_moe_out-33{6144, 3, 1, 1}, ffn_inp-33{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4823, 3.5177, 7.5177, ...], [24575.5176, 24579.5176, 24583.5176, ...], [49151.5195, 49155.5195, 49159.5195, ...], ], ] sum = 221215.656250 ggml_debug: norm-34 = (f32) NORM(l_out-33{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2215, 3.7785, 7.7785, ...], [24575.7793, 24579.7793, 24583.7793, ...], [49151.7773, 49155.7773, 49159.7773, ...], ], ] sum = 221218.015625 ggml_debug: attn_norm-34 = (f32) MUL(norm-34{6144, 3, 1, 1}, blk.34.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0688, 3.9312, 7.9312, ...], [24575.9316, 24579.9316, 24583.9316, ...], [49151.9297, 49155.9297, 49159.9297, ...], ], ] sum = 221219.375000 ggml_debug: wqkv-34 = (f32) MUL_MAT(blk.34.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-34{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 1.5957, 5.5957, 9.5957, ...], [32769.5977, 32773.5977, 32777.5977, ...], [65537.5938, 65541.5938, 65545.5938, ...], ], ] sum = 294962.375000 ggml_debug: wqkv_clamped-34 = (f32) CLAMP(wqkv-34{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 1.5957, 5.5957, 9.5957, ...], [32769.5977, 32773.5977, 32777.5977, ...], [65537.5938, 65541.5938, 65545.5938, ...], ], ] sum = 294962.375000 ggml_debug: wqkv_clamped-34 (view) = (f32) VIEW(wqkv_clamped-34{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 1.5957, 5.5957, 9.5957, ...], [32769.5977, 32773.5977, 32777.5977, ...], [65537.5938, 65541.5938, 65545.5938, ...], ], ] sum = 294962.375000 ggml_debug: Qcur-34 = (f32) CONT(wqkv_clamped-34 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 1.5957, 5.5957, 9.5957, ...], [24577.5957, 24581.5957, 24585.5957, ...], [49153.5977, 49157.5977, 49161.5977, ...], ], ] sum = 221234.359375 ggml_debug: Qcur-34 (reshaped) = (f32) RESHAPE(Qcur-34{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 1.5957, 5.5957, 9.5957, ...], [513.5957, 517.5957, 521.5957, ...], [1025.5957, 1029.5957, 1033.5957, ...], ... ], [ [24577.5957, 24581.5957, 24585.5957, ...], [25089.5957, 25093.5957, 25097.5957, ...], [25601.5957, 25605.5957, 25609.5957, ...], ... ], [ [49153.5977, 49157.5977, 49161.5977, ...], [49665.5977, 49669.5977, 49673.5977, ...], [50177.5977, 50181.5977, 50185.5977, ...], ... ], ] sum = 677527.187500 ggml_debug: Qcur-34 = (f32) ROPE(Qcur-34 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 1.5957, 5.5957, 9.5957, ...], [513.5957, 517.5957, 521.5957, ...], [1025.5957, 1029.5957, 1033.5957, ...], ... ], [ [24577.5957, 24581.5957, 24585.5957, ...], [25089.5957, 25093.5957, 25097.5957, ...], [25601.5957, 25605.5957, 25609.5957, ...], ... ], [ [49153.5977, 49157.5977, 49161.5977, ...], [49665.5977, 49669.5977, 49673.5977, ...], [50177.5977, 50181.5977, 50185.5977, ...], ... ], ] sum = 677527.187500 ggml_debug: wqkv_clamped-34 (view) = (f32) VIEW(wqkv_clamped-34{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 4.3143, 8.3143, 12.3143, ...], [32772.3125, 32776.3125, 32780.3125, ...], [65540.3125, 65544.3125, 65548.3125, ...], ], ] sum = 294986.812500 ggml_debug: Kcur-34 = (f32) CONT(wqkv_clamped-34 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 4.3143, 8.3143, 12.3143, ...], [4100.3145, 4104.3145, 4108.3145, ...], [8196.3145, 8200.3145, 8204.3145, ...], ], ] sum = 36938.828125 ggml_debug: Kcur-34 (reshaped) = (f32) RESHAPE(Kcur-34{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 4.3143, 8.3143, 12.3143, ...], [516.3143, 520.3143, 524.3143, ...], [1028.3143, 1032.3143, 1036.3143, ...], ... ], [ [4100.3145, 4104.3145, 4108.3145, ...], [4612.3145, 4616.3145, 4620.3145, ...], [5124.3145, 5128.3145, 5132.3145, ...], ... ], [ [8196.3145, 8200.3145, 8204.3145, ...], [8708.3145, 8712.3145, 8716.3145, ...], [9220.3145, 9224.3145, 9228.3145, ...], ... ], ] sum = 124640.468750 ggml_debug: Kcur-34 = (f32) ROPE(Kcur-34 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 4.3143, 8.3143, 12.3143, ...], [516.3143, 520.3143, 524.3143, ...], [1028.3143, 1032.3143, 1036.3143, ...], ... ], [ [4100.3145, 4104.3145, 4108.3145, ...], [4612.3145, 4616.3145, 4620.3145, ...], [5124.3145, 5128.3145, 5132.3145, ...], ... ], [ [8196.3145, 8200.3145, 8204.3145, ...], [8708.3145, 8712.3145, 8716.3145, ...], [9220.3145, 9224.3145, 9228.3145, ...], ... ], ] sum = 124640.468750 ggml_debug: wqkv_clamped-34 (view) = (f32) VIEW(wqkv_clamped-34{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.9814, 4.9814, 8.9814, ...], [32768.9805, 32772.9805, 32776.9805, ...], [65536.9844, 65540.9844, 65544.9844, ...], ], ] sum = 294956.843750 ggml_debug: Vcur-34 = (f32) CONT(wqkv_clamped-34 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.9814, 4.9814, 8.9814, ...], [4096.9814, 4100.9814, 4104.9814, ...], [8192.9814, 8196.9814, 8200.9814, ...], ], ] sum = 36908.832031 ggml_debug: k_cache_view-34 = (f16) VIEW(cache_k_l34{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-34 (copy of Kcur-34) = (f16) CPY(Kcur-34{128, 8, 3, 1}, k_cache_view-34{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 4.3125, 4.3203, 4.3281, ...], ], ] sum = 12.960938 ggml_debug: v_cur_t-34 = (f32) TRANSPOSE(Vcur-34{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.9814, 4096.9814, 8192.9814], [ 4.9814, 4100.9814, 8196.9814], [ 8.9814, 4104.9814, 8200.9814], ... ], ] sum = 36908.828125 ggml_debug: v_cache_view-34 = (f16) VIEW(cache_v_l34{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-34 (copy of v_cur_t-34) = (f16) CPY(v_cur_t-34{3, 1024, 1, 1}, v_cache_view-34{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.9814, 0.9824, 0.9834], [ 1.9629, 1.9648, 1.9668], [ 3.9258, 3.9297, 3.9336], ... ], ] sum = 20.630859 ggml_debug: v-34 = (f16) VIEW(cache_v_l34{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.9814, 0.9824, 0.9834, ...], [ 1.9629, 1.9648, 1.9668, ...], [ 3.9258, 3.9297, 3.9336, ...], ... ], [ [ 0.9814, 0.9824, 0.9834, ...], [ 1.9629, 1.9648, 1.9668, ...], [ 3.9258, 3.9297, 3.9336, ...], ... ], [ [ 0.9814, 0.9824, 0.9834, ...], [ 1.9629, 1.9648, 1.9668, ...], [ 3.9258, 3.9297, 3.9336, ...], ... ], ... ] sum = 61.892578 ggml_debug: k-34 = (f16) VIEW(cache_k_l34{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 4.3125, 4.3203, 4.3281, ...], [ 17.2500, 17.2812, 17.3125, ...], [ 69.0000, 69.1250, 69.2500, ...], ... ], [ [ 5.3125, 5.3203, 5.3281, ...], [ 21.2500, 21.2812, 21.3125, ...], [ 85.0000, 85.1250, 85.2500, ...], ... ], [ [ 6.3125, 6.3203, 6.3281, ...], [ 25.2500, 25.2812, 25.3125, ...], [101.0000, 101.1250, 101.2500, ...], ... ], ... ] sum = 1005.539062 ggml_debug: q-34 = (f32) PERMUTE(Qcur-34{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 1.5957, 5.5957, 9.5957, ...], [24577.5957, 24581.5957, 24585.5957, ...], [49153.5977, 49157.5977, 49161.5977, ...], ], [ [513.5957, 517.5957, 521.5957, ...], [25089.5957, 25093.5957, 25097.5957, ...], [49665.5977, 49669.5977, 49673.5977, ...], ], [ [1025.5957, 1029.5957, 1033.5957, ...], [25601.5957, 25605.5957, 25609.5957, ...], [50177.5977, 50181.5977, 50185.5977, ...], ], ... ] sum = 677527.187500 ggml_debug: kq-34 = (f32) MUL_MAT(k-34{128, 32, 8, 1}, q-34{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 78.0000, 82.0000, 86.0000, ...], [206.0000, 210.0000, 214.0000, ...], [334.0000, 338.0000, 342.0000, ...], ], [ [462.0000, 466.0000, 470.0000, ...], [590.0000, 594.0000, 598.0000, ...], [718.0000, 722.0000, 726.0000, ...], ], [ [846.0000, 850.0000, 854.0000, ...], [974.0000, 978.0000, 982.0000, ...], [1102.0000, 1106.0000, 1110.0000, ...], ], ... ] sum = 16038.000000 ggml_debug: kq_soft_max_ext-34 = (f32) SOFT_MAX(kq-34{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-34 = (f32) MUL_MAT(v-34{32, 128, 8, 1}, kq_soft_max_ext-34{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.9814, 4.9814, 8.9814, ...], [512.9814, 516.9814, 520.9814, ...], [1024.9814, 1028.9814, 1032.9814, ...], ], [ [1536.9814, 1540.9814, 1544.9814, ...], [2048.9814, 2052.9814, 2056.9814, ...], [2560.9814, 2564.9814, 2568.9814, ...], ], [ [3072.9814, 3076.9814, 3080.9814, ...], [3584.9814, 3588.9814, 3592.9814, ...], [4096.9814, 4100.9814, 4104.9814, ...], ], ... ] sum = 55430.488281 ggml_debug: kqv_merged-34 = (f32) PERMUTE(kqv-34{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.9814, 4.9814, 8.9814, ...], [1536.9814, 1540.9814, 1544.9814, ...], [3072.9814, 3076.9814, 3080.9814, ...], ... ], [ [512.9814, 516.9814, 520.9814, ...], [2048.9814, 2052.9814, 2056.9814, ...], [3584.9814, 3588.9814, 3592.9814, ...], ... ], [ [1024.9814, 1028.9814, 1032.9814, ...], [2560.9814, 2564.9814, 2568.9814, ...], [4096.9814, 4100.9814, 4104.9814, ...], ... ], ] sum = 55430.484375 ggml_debug: kqv_merged_cont-34 = (f32) CONT(kqv_merged-34{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.9814, 4.9814, 8.9814, ...], [24576.9805, 24580.9805, 24584.9805, ...], [49152.9805, 49156.9805, 49160.9805, ...], ], ] sum = 221228.843750 ggml_debug: kqv_out-34 = (f32) MUL_MAT(blk.34.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-34{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1002, 4.1002, 8.1002, ...], [24576.0996, 24580.0996, 24584.0996, ...], [49152.1016, 49156.1016, 49160.1016, ...], ], ] sum = 221220.906250 ggml_debug: norm-34 = (f32) NORM(kqv_out-34{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.1661, 4.1661, 8.1661, ...], [24576.1660, 24580.1660, 24584.1660, ...], [49152.1680, 49156.1680, 49160.1680, ...], ], ] sum = 221221.500000 ggml_debug: attn_out_norm-34 = (f32) MUL(norm-34{6144, 3, 1, 1}, blk.34.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0973, 4.0973, 8.0973, ...], [24576.0977, 24580.0977, 24584.0977, ...], [49152.0977, 49156.0977, 49160.0977, ...], ], ] sum = 221220.875000 ggml_debug: ffn_moe_logits-34 = (f32) MUL_MAT(blk.34.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.1993, 4.1993, 8.1993, ...], [ 64.1993, 68.1993, 72.1993, ...], [128.1993, 132.1993, 136.1993, ...], ], ] sum = 613.794067 ggml_debug: ffn_moe_probs-34 = (f32) SOFT_MAX(ffn_moe_logits-34{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0673, 4.0673, 8.0673, ...], [ 64.0673, 68.0673, 72.0673, ...], [128.0673, 132.0673, 136.0673, ...], ], ] sum = 612.605408 ggml_debug: ffn_moe_argsort-34 = (i32) ARGSORT(ffn_moe_probs-34{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 14.0000, 18.0000, 22.0000, ...], [ 78.0000, 82.0000, 86.0000, ...], [142.0000, 146.0000, 150.0000, ...], ], ] sum = 738.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-34{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 14.0000, 18.0000, 22.0000, ...], [ 78.0000, 82.0000, 86.0000, ...], [142.0000, 146.0000, 150.0000, ...], ], ] sum = 738.000000 ggml_debug: ffn_moe_up-34 = (f32) MUL_MAT_ID(blk.34.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.9474, 3.0526, 7.0526, ...], [43007.0508, 43011.0508, 43015.0508, ...], [86015.0547, 86019.0547, 86023.0547, ...], ], ] sum = 387099.500000 ggml_debug: ffn_moe_gate-34 = (f32) MUL_MAT_ID(blk.34.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6284, 4.6284, 8.6284, ...], [43008.6289, 43012.6289, 43016.6289, ...], [86016.6250, 86020.6250, 86024.6250, ...], ], ] sum = 387113.625000 ggml_debug: ffn_moe_silu-34 = (f32) UNARY(ffn_moe_gate-34{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.4098, 4.4098, 8.4098, ...], [43008.4102, 43012.4102, 43016.4102, ...], [86016.4062, 86020.4062, 86024.4062, ...], ], ] sum = 387111.656250 ggml_debug: ffn_moe_gate_par-34 = (f32) MUL(ffn_moe_up-34{10752, 3, 1, 1}, ffn_moe_silu-34{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3883, 3.6117, 7.6117, ...], [43007.6133, 43011.6133, 43015.6133, ...], [86015.6094, 86019.6094, 86023.6094, ...], ], ] sum = 387104.500000 ggml_debug: ffn_moe_down-34 = (f32) MUL_MAT_ID(blk.34.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-34{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.3225, 4.3225, 8.3225, ...], [24576.3223, 24580.3223, 24584.3223, ...], [49152.3242, 49156.3242, 49160.3242, ...], ], ] sum = 221222.906250 ggml_debug: ffn_moe_probs-34 (reshaped) = (f32) RESHAPE(ffn_moe_probs-34{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0673], [ 4.0673], [ 8.0673], ... ], [ [ 64.0673], [ 68.0673], [ 72.0673], ... ], [ [128.0673], [132.0673], [136.0673], ... ], ] sum = 612.605408 ggml_debug: ffn_moe_weights-34 = (f32) GET_ROWS(ffn_moe_probs-34 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1726], [ 4.1726], [ 8.1726], ... ], [ [ 16.1726], [ 20.1726], [ 24.1726], ... ], [ [ 32.1726], [ 36.1726], [ 40.1726], ... ], ] sum = 181.553436 ggml_debug: ffn_moe_weights-34 (reshaped) = (f32) RESHAPE(ffn_moe_weights-34{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1726, 4.1726, 8.1726, ...], [ 16.1726, 20.1726, 24.1726, ...], [ 32.1726, 36.1726, 40.1726, ...], ], ] sum = 181.553436 ggml_debug: ffn_moe_weights_sum-34 = (f32) SUM_ROWS(ffn_moe_weights-34 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.4177], [ 4.4177], [ 8.4177], ], ] sum = 13.253238 ggml_debug: ffn_moe_weights_norm-34 = (f32) DIV(ffn_moe_weights-34 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-34{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.4132, 4.4132, 8.4132, ...], [ 16.4132, 20.4132, 24.4132, ...], [ 32.4132, 36.4132, 40.4132, ...], ], ] sum = 183.718597 ggml_debug: ffn_moe_weights_norm-34 (view) = (f32) VIEW(ffn_moe_weights_norm-34{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.4132], [ 16.4132], [ 32.4132], ], ] sum = 49.239536 ggml_debug: ffn_moe_weighted-34 = (f32) MUL(ffn_moe_down-34{6144, 3, 1, 1}, ffn_moe_weights_norm-34 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1333, 4.1333, 8.1333, ...], [24576.1328, 24580.1328, 24584.1328, ...], [49152.1328, 49156.1328, 49160.1328, ...], ], ] sum = 221221.187500 ggml_debug: ffn_moe_up-34 = (f32) MUL_MAT_ID(blk.34.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.9910, 2.0090, 6.0090, ...], [43006.0078, 43010.0078, 43014.0078, ...], [86014.0078, 86018.0078, 86022.0078, ...], ], ] sum = 387090.062500 ggml_debug: ffn_moe_gate-34 = (f32) MUL_MAT_ID(blk.34.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.1362, 4.1362, 8.1362, ...], [43008.1367, 43012.1367, 43016.1367, ...], [86016.1328, 86020.1328, 86024.1328, ...], ], ] sum = 387109.218750 ggml_debug: ffn_moe_silu-34 = (f32) UNARY(ffn_moe_gate-34{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0727, 4.0727, 8.0727, ...], [43008.0742, 43012.0742, 43016.0742, ...], [86016.0703, 86020.0703, 86024.0703, ...], ], ] sum = 387108.625000 ggml_debug: ffn_moe_gate_par-34 = (f32) MUL(ffn_moe_up-34{10752, 3, 1, 1}, ffn_moe_silu-34{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1448, 3.8552, 7.8552, ...], [43007.8555, 43011.8555, 43015.8555, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.687500 ggml_debug: ffn_moe_down-34 = (f32) MUL_MAT_ID(blk.34.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-34{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1135, 3.8865, 7.8865, ...], [24575.8867, 24579.8867, 24583.8867, ...], [49151.8867, 49155.8867, 49159.8867, ...], ], ] sum = 221218.984375 ggml_debug: ffn_moe_weights_norm-34 (view) = (f32) VIEW(ffn_moe_weights_norm-34{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2094], [ 16.2094], [ 32.2094], ], ] sum = 48.628166 ggml_debug: ffn_moe_weighted-34 = (f32) MUL(ffn_moe_down-34{6144, 3, 1, 1}, ffn_moe_weights_norm-34 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0238, 3.9762, 7.9762, ...], [24575.9766, 24579.9766, 24583.9766, ...], [49151.9766, 49155.9766, 49159.9766, ...], ], ] sum = 221219.781250 ggml_debug: ffn_moe_out-34 = (f32) ADD(ffn_moe_weighted-34{6144, 3, 1, 1}, ffn_moe_weighted-34{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1095, 4.1095, 8.1095, ...], [24576.1094, 24580.1094, 24584.1094, ...], [49152.1094, 49156.1094, 49160.1094, ...], ], ] sum = 221220.984375 ggml_debug: ffn_moe_up-34 = (f32) MUL_MAT_ID(blk.34.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.9265, 3.0735, 7.0735, ...], [43007.0742, 43011.0742, 43015.0742, ...], [86015.0703, 86019.0703, 86023.0703, ...], ], ] sum = 387099.625000 ggml_debug: ffn_moe_gate-34 = (f32) MUL_MAT_ID(blk.34.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0671, 4.0671, 8.0671, ...], [43008.0664, 43012.0664, 43016.0664, ...], [86016.0703, 86020.0703, 86024.0703, ...], ], ] sum = 387108.593750 ggml_debug: ffn_moe_silu-34 = (f32) UNARY(ffn_moe_gate-34{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0347, 4.0347, 8.0347, ...], [43008.0352, 43012.0352, 43016.0352, ...], [86016.0312, 86020.0312, 86024.0312, ...], ], ] sum = 387108.281250 ggml_debug: ffn_moe_gate_par-34 = (f32) MUL(ffn_moe_up-34{10752, 3, 1, 1}, ffn_moe_silu-34{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0321, 3.9679, 7.9679, ...], [43007.9688, 43011.9688, 43015.9688, ...], [86015.9688, 86019.9688, 86023.9688, ...], ], ] sum = 387107.718750 ggml_debug: ffn_moe_down-34 = (f32) MUL_MAT_ID(blk.34.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-34{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.5200, 3.4800, 7.4800, ...], [24575.4805, 24579.4805, 24583.4805, ...], [49151.4805, 49155.4805, 49159.4805, ...], ], ] sum = 221215.328125 ggml_debug: ffn_moe_weights_norm-34 (view) = (f32) VIEW(ffn_moe_weights_norm-34{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2014], [ 16.2014], [ 32.2014], ], ] sum = 48.604099 ggml_debug: ffn_moe_weighted-34 = (f32) MUL(ffn_moe_down-34{6144, 3, 1, 1}, ffn_moe_weights_norm-34 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.1047, 3.8953, 7.8953, ...], [24575.8945, 24579.8945, 24583.8945, ...], [49151.8945, 49155.8945, 49159.8945, ...], ], ] sum = 221219.046875 ggml_debug: ffn_moe_out-34 = (f32) ADD(ffn_moe_out-34{6144, 3, 1, 1}, ffn_moe_weighted-34{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0048, 4.0048, 8.0048, ...], [24576.0039, 24580.0039, 24584.0039, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.031250 ggml_debug: ffn_moe_up-34 = (f32) MUL_MAT_ID(blk.34.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3879, 3.6121, 7.6121, ...], [43007.6133, 43011.6133, 43015.6133, ...], [86015.6094, 86019.6094, 86023.6094, ...], ], ] sum = 387104.500000 ggml_debug: ffn_moe_gate-34 = (f32) MUL_MAT_ID(blk.34.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-34{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3283, 4.3283, 8.3283, ...], [43008.3281, 43012.3281, 43016.3281, ...], [86016.3281, 86020.3281, 86024.3281, ...], ], ] sum = 387110.937500 ggml_debug: ffn_moe_silu-34 = (f32) UNARY(ffn_moe_gate-34{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1908, 4.1908, 8.1908, ...], [43008.1914, 43012.1914, 43016.1914, ...], [86016.1875, 86020.1875, 86024.1875, ...], ], ] sum = 387109.687500 ggml_debug: ffn_moe_gate_par-34 = (f32) MUL(ffn_moe_up-34{10752, 3, 1, 1}, ffn_moe_silu-34{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0740, 3.9260, 7.9260, ...], [43007.9258, 43011.9258, 43015.9258, ...], [86015.9297, 86019.9297, 86023.9297, ...], ], ] sum = 387107.343750 ggml_debug: ffn_moe_down-34 = (f32) MUL_MAT_ID(blk.34.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-34{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4179, 3.5821, 7.5821, ...], [24575.5820, 24579.5820, 24583.5820, ...], [49151.5820, 49155.5820, 49159.5820, ...], ], ] sum = 221216.234375 ggml_debug: ffn_moe_weights_norm-34 (view) = (f32) VIEW(ffn_moe_weights_norm-34{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1761], [ 16.1761], [ 32.1761], ], ] sum = 48.528194 ggml_debug: ffn_moe_weighted-34 = (f32) MUL(ffn_moe_down-34{6144, 3, 1, 1}, ffn_moe_weights_norm-34 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0736, 3.9264, 7.9264, ...], [24575.9258, 24579.9258, 24583.9258, ...], [49151.9258, 49155.9258, 49159.9258, ...], ], ] sum = 221219.328125 ggml_debug: ffn_moe_out-34 = (f32) ADD(ffn_moe_out-34{6144, 3, 1, 1}, ffn_moe_weighted-34{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0688, 3.9312, 7.9312, ...], [24575.9316, 24579.9316, 24583.9316, ...], [49151.9297, 49155.9297, 49159.9297, ...], ], ] sum = 221219.375000 ggml_debug: ffn_inp-34 = (f32) ADD(kqv_out-34{6144, 3, 1, 1}, l_out-33{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.3822, 3.6178, 7.6178, ...], [24575.6172, 24579.6172, 24583.6172, ...], [49151.6172, 49155.6172, 49159.6172, ...], ], ] sum = 221216.562500 ggml_debug: l_out-34 = (f32) ADD(ffn_moe_out-34{6144, 3, 1, 1}, ffn_inp-34{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4509, 3.5491, 7.5491, ...], [24575.5488, 24579.5488, 24583.5488, ...], [49151.5508, 49155.5508, 49159.5508, ...], ], ] sum = 221215.937500 ggml_debug: norm-35 = (f32) NORM(l_out-34{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1991, 3.8009, 7.8009, ...], [24575.8008, 24579.8008, 24583.8008, ...], [49151.8008, 49155.8008, 49159.8008, ...], ], ] sum = 221218.203125 ggml_debug: attn_norm-35 = (f32) MUL(norm-35{6144, 3, 1, 1}, blk.35.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0548, 3.9452, 7.9452, ...], [24575.9453, 24579.9453, 24583.9453, ...], [49151.9453, 49155.9453, 49159.9453, ...], ], ] sum = 221219.500000 ggml_debug: wqkv-35 = (f32) MUL_MAT(blk.35.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-35{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.4697, 4.4697, 8.4697, ...], [32768.4688, 32772.4688, 32776.4688, ...], [65536.4688, 65540.4688, 65544.4688, ...], ], ] sum = 294952.218750 ggml_debug: wqkv_clamped-35 = (f32) CLAMP(wqkv-35{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.4697, 4.4697, 8.4697, ...], [32768.4688, 32772.4688, 32776.4688, ...], [65536.4688, 65540.4688, 65544.4688, ...], ], ] sum = 294952.218750 ggml_debug: wqkv_clamped-35 (view) = (f32) VIEW(wqkv_clamped-35{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4697, 4.4697, 8.4697, ...], [32768.4688, 32772.4688, 32776.4688, ...], [65536.4688, 65540.4688, 65544.4688, ...], ], ] sum = 294952.218750 ggml_debug: Qcur-35 = (f32) CONT(wqkv_clamped-35 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4697, 4.4697, 8.4697, ...], [24576.4707, 24580.4707, 24584.4707, ...], [49152.4688, 49156.4688, 49160.4688, ...], ], ] sum = 221224.218750 ggml_debug: Qcur-35 (reshaped) = (f32) RESHAPE(Qcur-35{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.4697, 4.4697, 8.4697, ...], [512.4697, 516.4697, 520.4697, ...], [1024.4697, 1028.4697, 1032.4697, ...], ... ], [ [24576.4707, 24580.4707, 24584.4707, ...], [25088.4707, 25092.4707, 25096.4707, ...], [25600.4707, 25604.4707, 25608.4707, ...], ... ], [ [49152.4688, 49156.4688, 49160.4688, ...], [49664.4688, 49668.4688, 49672.4688, ...], [50176.4688, 50180.4688, 50184.4688, ...], ... ], ] sum = 677496.750000 ggml_debug: Qcur-35 = (f32) ROPE(Qcur-35 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.4697, 4.4697, 8.4697, ...], [512.4697, 516.4697, 520.4697, ...], [1024.4697, 1028.4697, 1032.4697, ...], ... ], [ [24576.4707, 24580.4707, 24584.4707, ...], [25088.4707, 25092.4707, 25096.4707, ...], [25600.4707, 25604.4707, 25608.4707, ...], ... ], [ [49152.4688, 49156.4688, 49160.4688, ...], [49664.4688, 49668.4688, 49672.4688, ...], [50176.4688, 50180.4688, 50184.4688, ...], ... ], ] sum = 677496.750000 ggml_debug: wqkv_clamped-35 (view) = (f32) VIEW(wqkv_clamped-35{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.7786, 6.7786, 10.7786, ...], [32770.7773, 32774.7773, 32778.7773, ...], [65538.7812, 65542.7812, 65546.7812, ...], ], ] sum = 294973.000000 ggml_debug: Kcur-35 = (f32) CONT(wqkv_clamped-35 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.7786, 6.7786, 10.7786, ...], [4098.7788, 4102.7788, 4106.7788, ...], [8194.7783, 8198.7783, 8202.7783, ...], ], ] sum = 36925.003906 ggml_debug: Kcur-35 (reshaped) = (f32) RESHAPE(Kcur-35{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 2.7786, 6.7786, 10.7786, ...], [514.7786, 518.7786, 522.7786, ...], [1026.7787, 1030.7787, 1034.7787, ...], ... ], [ [4098.7788, 4102.7788, 4106.7788, ...], [4610.7788, 4614.7788, 4618.7788, ...], [5122.7788, 5126.7788, 5130.7788, ...], ... ], [ [8194.7783, 8198.7783, 8202.7783, ...], [8706.7783, 8710.7783, 8714.7783, ...], [9218.7783, 9222.7783, 9226.7783, ...], ... ], ] sum = 124599.039062 ggml_debug: Kcur-35 = (f32) ROPE(Kcur-35 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 2.7786, 6.7786, 10.7786, ...], [514.7786, 518.7786, 522.7786, ...], [1026.7787, 1030.7787, 1034.7787, ...], ... ], [ [4098.7788, 4102.7788, 4106.7788, ...], [4610.7788, 4614.7788, 4618.7788, ...], [5122.7788, 5126.7788, 5130.7788, ...], ... ], [ [8194.7783, 8198.7783, 8202.7783, ...], [8706.7783, 8710.7783, 8714.7783, ...], [9218.7783, 9222.7783, 9226.7783, ...], ... ], ] sum = 124599.039062 ggml_debug: wqkv_clamped-35 (view) = (f32) VIEW(wqkv_clamped-35{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.2054, 4.2054, 8.2054, ...], [32768.2070, 32772.2070, 32776.2070, ...], [65536.2031, 65540.2031, 65544.2031, ...], ], ] sum = 294949.843750 ggml_debug: Vcur-35 = (f32) CONT(wqkv_clamped-35 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.2054, 4.2054, 8.2054, ...], [4096.2056, 4100.2056, 4104.2056, ...], [8192.2051, 8196.2051, 8200.2051, ...], ], ] sum = 36901.847656 ggml_debug: k_cache_view-35 = (f16) VIEW(cache_k_l35{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-35 (copy of Kcur-35) = (f16) CPY(Kcur-35{128, 8, 3, 1}, k_cache_view-35{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 2.7793, 2.7832, 2.7871, ...], ], ] sum = 8.349609 ggml_debug: v_cur_t-35 = (f32) TRANSPOSE(Vcur-35{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.2054, 4096.2056, 8192.2051], [ 4.2054, 4100.2056, 8196.2051], [ 8.2054, 4104.2056, 8200.2051], ... ], ] sum = 36901.847656 ggml_debug: v_cache_view-35 = (f16) VIEW(cache_v_l35{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-35 (copy of v_cur_t-35) = (f16) CPY(v_cur_t-35{3, 1024, 1, 1}, v_cache_view-35{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.2054, 0.2057, 0.2059], [ 0.4109, 0.4114, 0.4119], [ 0.8218, 0.8228, 0.8237], ... ], ] sum = 4.319458 ggml_debug: v-35 = (f16) VIEW(cache_v_l35{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.2054, 0.2057, 0.2059, ...], [ 0.4109, 0.4114, 0.4119, ...], [ 0.8218, 0.8228, 0.8237, ...], ... ], [ [ 0.2054, 0.2057, 0.2059, ...], [ 0.4109, 0.4114, 0.4119, ...], [ 0.8218, 0.8228, 0.8237, ...], ... ], [ [ 0.2054, 0.2057, 0.2059, ...], [ 0.4109, 0.4114, 0.4119, ...], [ 0.8218, 0.8228, 0.8237, ...], ... ], ... ] sum = 12.958374 ggml_debug: k-35 = (f16) VIEW(cache_k_l35{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 2.7793, 2.7832, 2.7871, ...], [ 11.1172, 11.1328, 11.1484, ...], [ 44.4688, 44.5312, 44.5938, ...], ... ], [ [ 3.2793, 3.2832, 3.2871, ...], [ 13.1172, 13.1328, 13.1484, ...], [ 52.4688, 52.5312, 52.5938, ...], ... ], [ [ 3.7793, 3.7832, 3.7871, ...], [ 15.1172, 15.1328, 15.1484, ...], [ 60.4688, 60.5312, 60.5938, ...], ... ], ... ] sum = 620.525391 ggml_debug: q-35 = (f32) PERMUTE(Qcur-35{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.4697, 4.4697, 8.4697, ...], [24576.4707, 24580.4707, 24584.4707, ...], [49152.4688, 49156.4688, 49160.4688, ...], ], [ [512.4697, 516.4697, 520.4697, ...], [25088.4707, 25092.4707, 25096.4707, ...], [49664.4688, 49668.4688, 49672.4688, ...], ], [ [1024.4697, 1028.4697, 1032.4697, ...], [25600.4707, 25604.4707, 25608.4707, ...], [50176.4688, 50180.4688, 50184.4688, ...], ], ... ] sum = 677496.750000 ggml_debug: kq-35 = (f32) MUL_MAT(k-35{128, 32, 8, 1}, q-35{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ -1.2432, 2.7568, 6.7568, ...], [126.7568, 130.7568, 134.7568, ...], [254.7568, 258.7568, 262.7568, ...], ], [ [382.7568, 386.7568, 390.7568, ...], [510.7568, 514.7568, 518.7568, ...], [638.7568, 642.7568, 646.7568, ...], ], [ [766.7568, 770.7568, 774.7568, ...], [894.7568, 898.7568, 902.7568, ...], [1022.7568, 1026.7568, 1030.7568, ...], ], ... ] sum = 13898.434570 ggml_debug: kq_soft_max_ext-35 = (f32) SOFT_MAX(kq-35{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-35 = (f32) MUL_MAT(v-35{32, 128, 8, 1}, kq_soft_max_ext-35{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.2054, 4.2054, 8.2054, ...], [512.2054, 516.2054, 520.2054, ...], [1024.2054, 1028.2054, 1032.2054, ...], ], [ [1536.2054, 1540.2054, 1544.2054, ...], [2048.2056, 2052.2056, 2056.2056, ...], [2560.2056, 2564.2056, 2568.2056, ...], ], [ [3072.2056, 3076.2056, 3080.2056, ...], [3584.2056, 3588.2056, 3592.2056, ...], [4096.2056, 4100.2056, 4104.2056, ...], ], ... ] sum = 55409.554688 ggml_debug: kqv_merged-35 = (f32) PERMUTE(kqv-35{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.2054, 4.2054, 8.2054, ...], [1536.2054, 1540.2054, 1544.2054, ...], [3072.2056, 3076.2056, 3080.2056, ...], ... ], [ [512.2054, 516.2054, 520.2054, ...], [2048.2056, 2052.2056, 2056.2056, ...], [3584.2056, 3588.2056, 3592.2056, ...], ... ], [ [1024.2054, 1028.2054, 1032.2054, ...], [2560.2056, 2564.2056, 2568.2056, ...], [4096.2056, 4100.2056, 4104.2056, ...], ... ], ] sum = 55409.558594 ggml_debug: kqv_merged_cont-35 = (f32) CONT(kqv_merged-35{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.2054, 4.2054, 8.2054, ...], [24576.2051, 24580.2051, 24584.2051, ...], [49152.2070, 49156.2070, 49160.2070, ...], ], ] sum = 221221.843750 ggml_debug: kqv_out-35 = (f32) MUL_MAT(blk.35.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-35{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2314, 3.7686, 7.7686, ...], [24575.7695, 24579.7695, 24583.7695, ...], [49151.7695, 49155.7695, 49159.7695, ...], ], ] sum = 221217.906250 ggml_debug: norm-35 = (f32) NORM(kqv_out-35{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.3054, 3.6946, 7.6946, ...], [24575.6953, 24579.6953, 24583.6953, ...], [49151.6953, 49155.6953, 49159.6953, ...], ], ] sum = 221217.250000 ggml_debug: attn_out_norm-35 = (f32) MUL(norm-35{6144, 3, 1, 1}, blk.35.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2172, 3.7828, 7.7828, ...], [24575.7832, 24579.7832, 24583.7832, ...], [49151.7812, 49155.7812, 49159.7812, ...], ], ] sum = 221218.031250 ggml_debug: ffn_moe_logits-35 = (f32) MUL_MAT(blk.35.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.7500, 4.7500, 8.7500, ...], [ 64.7500, 68.7500, 72.7500, ...], [128.7500, 132.7500, 136.7500, ...], ], ] sum = 618.750000 ggml_debug: ffn_moe_probs-35 = (f32) SOFT_MAX(ffn_moe_logits-35{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.1048, 4.1048, 8.1048, ...], [ 64.1048, 68.1048, 72.1048, ...], [128.1048, 132.1048, 136.1048, ...], ], ] sum = 612.942932 ggml_debug: ffn_moe_argsort-35 = (i32) ARGSORT(ffn_moe_probs-35{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-35{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0000, 4.0000, 8.0000, ...], [ 64.0000, 68.0000, 72.0000, ...], [128.0000, 132.0000, 136.0000, ...], ], ] sum = 612.000000 ggml_debug: ffn_moe_up-35 = (f32) MUL_MAT_ID(blk.35.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -3.1376, 0.8624, 4.8624, ...], [43004.8633, 43008.8633, 43012.8633, ...], [86012.8594, 86016.8594, 86020.8594, ...], ], ] sum = 387079.750000 ggml_debug: ffn_moe_gate-35 = (f32) MUL_MAT_ID(blk.35.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.8724, 4.8724, 8.8724, ...], [43008.8711, 43012.8711, 43016.8711, ...], [86016.8750, 86020.8750, 86024.8750, ...], ], ] sum = 387115.875000 ggml_debug: ffn_moe_silu-35 = (f32) UNARY(ffn_moe_gate-35{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.6153, 4.6153, 8.6153, ...], [43008.6172, 43012.6172, 43016.6172, ...], [86016.6172, 86020.6172, 86024.6172, ...], ], ] sum = 387113.562500 ggml_debug: ffn_moe_gate_par-35 = (f32) MUL(ffn_moe_up-35{10752, 3, 1, 1}, ffn_moe_silu-35{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.9304, 2.0696, 6.0696, ...], [43006.0703, 43010.0703, 43014.0703, ...], [86014.0703, 86018.0703, 86022.0703, ...], ], ] sum = 387090.625000 ggml_debug: ffn_moe_down-35 = (f32) MUL_MAT_ID(blk.35.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-35{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 2.2550, 6.2550, 10.2550, ...], [24578.2559, 24582.2559, 24586.2559, ...], [49154.2539, 49158.2539, 49162.2539, ...], ], ] sum = 221240.281250 ggml_debug: ffn_moe_probs-35 (reshaped) = (f32) RESHAPE(ffn_moe_probs-35{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.1048], [ 4.1048], [ 8.1048], ... ], [ [ 64.1048], [ 68.1048], [ 72.1048], ... ], [ [128.1048], [132.1048], [136.1048], ... ], ] sum = 612.942932 ggml_debug: ffn_moe_weights-35 = (f32) GET_ROWS(ffn_moe_probs-35 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1048], [ 4.1048], [ 8.1048], ... ], [ [ 16.1048], [ 20.1048], [ 24.1048], ... ], [ [ 32.1048], [ 36.1048], [ 40.1048], ... ], ] sum = 180.942963 ggml_debug: ffn_moe_weights-35 (reshaped) = (f32) RESHAPE(ffn_moe_weights-35{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1048, 4.1048, 8.1048, ...], [ 16.1048, 20.1048, 24.1048, ...], [ 32.1048, 36.1048, 40.1048, ...], ], ] sum = 180.942963 ggml_debug: ffn_moe_weights_sum-35 = (f32) SUM_ROWS(ffn_moe_weights-35 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3799], [ 4.3799], [ 8.3799], ], ] sum = 13.139795 ggml_debug: ffn_moe_weights_norm-35 = (f32) DIV(ffn_moe_weights-35 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-35{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2758, 4.2758, 8.2758, ...], [ 16.2758, 20.2758, 24.2758, ...], [ 32.2758, 36.2758, 40.2758, ...], ], ] sum = 182.481949 ggml_debug: ffn_moe_weights_norm-35 (view) = (f32) VIEW(ffn_moe_weights_norm-35{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2758], [ 16.2758], [ 32.2758], ], ] sum = 48.827316 ggml_debug: ffn_moe_weighted-35 = (f32) MUL(ffn_moe_down-35{6144, 3, 1, 1}, ffn_moe_weights_norm-35 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.6219, 4.6219, 8.6219, ...], [24576.6211, 24580.6211, 24584.6211, ...], [49152.6211, 49156.6211, 49160.6211, ...], ], ] sum = 221225.609375 ggml_debug: ffn_moe_up-35 = (f32) MUL_MAT_ID(blk.35.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -2.2447, 1.7553, 5.7553, ...], [43005.7539, 43009.7539, 43013.7539, ...], [86013.7578, 86017.7578, 86021.7578, ...], ], ] sum = 387087.781250 ggml_debug: ffn_moe_gate-35 = (f32) MUL_MAT_ID(blk.35.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 1.1576, 5.1576, 9.1576, ...], [43009.1562, 43013.1562, 43017.1562, ...], [86017.1562, 86021.1562, 86025.1562, ...], ], ] sum = 387118.406250 ggml_debug: ffn_moe_silu-35 = (f32) UNARY(ffn_moe_gate-35{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.8808, 4.8808, 8.8808, ...], [43008.8789, 43012.8789, 43016.8789, ...], [86016.8828, 86020.8828, 86024.8828, ...], ], ] sum = 387115.906250 ggml_debug: ffn_moe_gate_par-35 = (f32) MUL(ffn_moe_up-35{10752, 3, 1, 1}, ffn_moe_silu-35{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.9771, 2.0229, 6.0229, ...], [43006.0234, 43010.0234, 43014.0234, ...], [86014.0234, 86018.0234, 86022.0234, ...], ], ] sum = 387090.218750 ggml_debug: ffn_moe_down-35 = (f32) MUL_MAT_ID(blk.35.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-35{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 3.4764, 7.4764, 11.4764, ...], [24579.4766, 24583.4766, 24587.4766, ...], [49155.4766, 49159.4766, 49163.4766, ...], ], ] sum = 221251.281250 ggml_debug: ffn_moe_weights_norm-35 (view) = (f32) VIEW(ffn_moe_weights_norm-35{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2657], [ 16.2657], [ 32.2657], ], ] sum = 48.797176 ggml_debug: ffn_moe_weighted-35 = (f32) MUL(ffn_moe_down-35{6144, 3, 1, 1}, ffn_moe_weights_norm-35 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.9238, 4.9238, 8.9238, ...], [24576.9238, 24580.9238, 24584.9238, ...], [49152.9219, 49156.9219, 49160.9219, ...], ], ] sum = 221228.296875 ggml_debug: ffn_moe_out-35 = (f32) ADD(ffn_moe_weighted-35{6144, 3, 1, 1}, ffn_moe_weighted-35{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 1.5456, 5.5456, 9.5456, ...], [24577.5449, 24581.5449, 24585.5449, ...], [49153.5469, 49157.5469, 49161.5469, ...], ], ] sum = 221233.921875 ggml_debug: ffn_moe_up-35 = (f32) MUL_MAT_ID(blk.35.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -2.6838, 1.3162, 5.3162, ...], [43005.3164, 43009.3164, 43013.3164, ...], [86013.3125, 86017.3125, 86021.3125, ...], ], ] sum = 387083.812500 ggml_debug: ffn_moe_gate-35 = (f32) MUL_MAT_ID(blk.35.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -2.6345, 1.3655, 5.3655, ...], [43005.3672, 43009.3672, 43013.3672, ...], [86013.3672, 86017.3672, 86021.3672, ...], ], ] sum = 387084.312500 ggml_debug: ffn_moe_silu-35 = (f32) UNARY(ffn_moe_gate-35{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1764, 3.8236, 7.8236, ...], [43007.8242, 43011.8242, 43015.8242, ...], [86015.8203, 86019.8203, 86023.8203, ...], ], ] sum = 387106.406250 ggml_debug: ffn_moe_gate_par-35 = (f32) MUL(ffn_moe_up-35{10752, 3, 1, 1}, ffn_moe_silu-35{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4734, 4.4734, 8.4734, ...], [43008.4727, 43012.4727, 43016.4727, ...], [86016.4766, 86020.4766, 86024.4766, ...], ], ] sum = 387112.250000 ggml_debug: ffn_moe_down-35 = (f32) MUL_MAT_ID(blk.35.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-35{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 6.5578, 10.5578, 14.5578, ...], [24582.5586, 24586.5586, 24590.5586, ...], [49158.5586, 49162.5586, 49166.5586, ...], ], ] sum = 221279.031250 ggml_debug: ffn_moe_weights_norm-35 (view) = (f32) VIEW(ffn_moe_weights_norm-35{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2318], [ 16.2318], [ 32.2318], ], ] sum = 48.695309 ggml_debug: ffn_moe_weighted-35 = (f32) MUL(ffn_moe_down-35{6144, 3, 1, 1}, ffn_moe_weights_norm-35 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 1.5199, 5.5199, 9.5199, ...], [24577.5195, 24581.5195, 24585.5195, ...], [49153.5195, 49157.5195, 49161.5195, ...], ], ] sum = 221233.671875 ggml_debug: ffn_moe_out-35 = (f32) ADD(ffn_moe_out-35{6144, 3, 1, 1}, ffn_moe_weighted-35{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 3.0655, 7.0655, 11.0655, ...], [24579.0664, 24583.0664, 24587.0664, ...], [49155.0664, 49159.0664, 49163.0664, ...], ], ] sum = 221247.578125 ggml_debug: ffn_moe_up-35 = (f32) MUL_MAT_ID(blk.35.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.6951, 2.3049, 6.3049, ...], [43006.3047, 43010.3047, 43014.3047, ...], [86014.3047, 86018.3047, 86022.3047, ...], ], ] sum = 387092.750000 ggml_debug: ffn_moe_gate-35 = (f32) MUL_MAT_ID(blk.35.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-35{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.3314, 2.6686, 6.6686, ...], [43006.6680, 43010.6680, 43014.6680, ...], [86014.6719, 86018.6719, 86022.6719, ...], ], ] sum = 387096.062500 ggml_debug: ffn_moe_silu-35 = (f32) UNARY(ffn_moe_gate-35{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.2782, 3.7218, 7.7218, ...], [43007.7227, 43011.7227, 43015.7227, ...], [86015.7188, 86019.7188, 86023.7188, ...], ], ] sum = 387105.468750 ggml_debug: ffn_moe_gate_par-35 = (f32) MUL(ffn_moe_up-35{10752, 3, 1, 1}, ffn_moe_silu-35{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4715, 4.4715, 8.4715, ...], [43008.4727, 43012.4727, 43016.4727, ...], [86016.4688, 86020.4688, 86024.4688, ...], ], ] sum = 387112.218750 ggml_debug: ffn_moe_down-35 = (f32) MUL_MAT_ID(blk.35.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-35{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.5203, 4.5203, 8.5203, ...], [24576.5195, 24580.5195, 24584.5195, ...], [49152.5195, 49156.5195, 49160.5195, ...], ], ] sum = 221224.671875 ggml_debug: ffn_moe_weights_norm-35 (view) = (f32) VIEW(ffn_moe_weights_norm-35{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2267], [ 16.2267], [ 32.2267], ], ] sum = 48.680199 ggml_debug: ffn_moe_weighted-35 = (f32) MUL(ffn_moe_down-35{6144, 3, 1, 1}, ffn_moe_weights_norm-35 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1180, 4.1180, 8.1180, ...], [24576.1172, 24580.1172, 24584.1172, ...], [49152.1172, 49156.1172, 49160.1172, ...], ], ] sum = 221221.062500 ggml_debug: ffn_moe_out-35 = (f32) ADD(ffn_moe_out-35{6144, 3, 1, 1}, ffn_moe_weighted-35{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 3.1835, 7.1835, 11.1835, ...], [24579.1836, 24583.1836, 24587.1836, ...], [49155.1836, 49159.1836, 49163.1836, ...], ], ] sum = 221248.656250 ggml_debug: ffn_inp-35 = (f32) ADD(kqv_out-35{6144, 3, 1, 1}, l_out-34{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.6823, 3.3177, 7.3177, ...], [24575.3184, 24579.3184, 24583.3184, ...], [49151.3164, 49155.3164, 49159.3164, ...], ], ] sum = 221213.843750 ggml_debug: l_out-35 = (f32) ADD(ffn_moe_out-35{6144, 3, 1, 1}, ffn_inp-35{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 2.5012, 6.5012, 10.5012, ...], [24578.5020, 24582.5020, 24586.5020, ...], [49154.5000, 49158.5000, 49162.5000, ...], ], ] sum = 221242.500000 ggml_debug: norm-36 = (f32) NORM(l_out-35{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4653, 4.4653, 8.4653, ...], [24576.4648, 24580.4648, 24584.4648, ...], [49152.4648, 49156.4648, 49160.4648, ...], ], ] sum = 221224.203125 ggml_debug: attn_norm-36 = (f32) MUL(norm-36{6144, 3, 1, 1}, blk.36.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1354, 4.1354, 8.1354, ...], [24576.1348, 24580.1348, 24584.1348, ...], [49152.1367, 49156.1367, 49160.1367, ...], ], ] sum = 221221.234375 ggml_debug: wqkv-36 = (f32) MUL_MAT(blk.36.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-36{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.4847, 4.4847, 8.4847, ...], [32768.4844, 32772.4844, 32776.4844, ...], [65536.4844, 65540.4844, 65544.4844, ...], ], ] sum = 294952.375000 ggml_debug: wqkv_clamped-36 = (f32) CLAMP(wqkv-36{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.4847, 4.4847, 8.4847, ...], [32768.4844, 32772.4844, 32776.4844, ...], [65536.4844, 65540.4844, 65544.4844, ...], ], ] sum = 294952.375000 ggml_debug: wqkv_clamped-36 (view) = (f32) VIEW(wqkv_clamped-36{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4847, 4.4847, 8.4847, ...], [32768.4844, 32772.4844, 32776.4844, ...], [65536.4844, 65540.4844, 65544.4844, ...], ], ] sum = 294952.375000 ggml_debug: Qcur-36 = (f32) CONT(wqkv_clamped-36 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4847, 4.4847, 8.4847, ...], [24576.4844, 24580.4844, 24584.4844, ...], [49152.4844, 49156.4844, 49160.4844, ...], ], ] sum = 221224.359375 ggml_debug: Qcur-36 (reshaped) = (f32) RESHAPE(Qcur-36{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.4847, 4.4847, 8.4847, ...], [512.4847, 516.4847, 520.4847, ...], [1024.4846, 1028.4846, 1032.4846, ...], ... ], [ [24576.4844, 24580.4844, 24584.4844, ...], [25088.4844, 25092.4844, 25096.4844, ...], [25600.4844, 25604.4844, 25608.4844, ...], ... ], [ [49152.4844, 49156.4844, 49160.4844, ...], [49664.4844, 49668.4844, 49672.4844, ...], [50176.4844, 50180.4844, 50184.4844, ...], ... ], ] sum = 677497.187500 ggml_debug: Qcur-36 = (f32) ROPE(Qcur-36 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.4847, 4.4847, 8.4847, ...], [512.4847, 516.4847, 520.4847, ...], [1024.4846, 1028.4846, 1032.4846, ...], ... ], [ [24576.4844, 24580.4844, 24584.4844, ...], [25088.4844, 25092.4844, 25096.4844, ...], [25600.4844, 25604.4844, 25608.4844, ...], ... ], [ [49152.4844, 49156.4844, 49160.4844, ...], [49664.4844, 49668.4844, 49672.4844, ...], [50176.4844, 50180.4844, 50184.4844, ...], ... ], ] sum = 677497.187500 ggml_debug: wqkv_clamped-36 (view) = (f32) VIEW(wqkv_clamped-36{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.7556, 6.7556, 10.7556, ...], [32770.7539, 32774.7539, 32778.7539, ...], [65538.7578, 65542.7578, 65546.7578, ...], ], ] sum = 294972.781250 ggml_debug: Kcur-36 = (f32) CONT(wqkv_clamped-36 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 2.7556, 6.7556, 10.7556, ...], [4098.7554, 4102.7554, 4106.7554, ...], [8194.7559, 8198.7559, 8202.7559, ...], ], ] sum = 36924.800781 ggml_debug: Kcur-36 (reshaped) = (f32) RESHAPE(Kcur-36{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 2.7556, 6.7556, 10.7556, ...], [514.7556, 518.7556, 522.7556, ...], [1026.7555, 1030.7555, 1034.7555, ...], ... ], [ [4098.7554, 4102.7554, 4106.7554, ...], [4610.7554, 4614.7554, 4618.7554, ...], [5122.7554, 5126.7554, 5130.7554, ...], ... ], [ [8194.7559, 8198.7559, 8202.7559, ...], [8706.7559, 8710.7559, 8714.7559, ...], [9218.7559, 9222.7559, 9226.7559, ...], ... ], ] sum = 124598.414062 ggml_debug: Kcur-36 = (f32) ROPE(Kcur-36 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 2.7556, 6.7556, 10.7556, ...], [514.7556, 518.7556, 522.7556, ...], [1026.7555, 1030.7555, 1034.7555, ...], ... ], [ [4098.7554, 4102.7554, 4106.7554, ...], [4610.7554, 4614.7554, 4618.7554, ...], [5122.7554, 5126.7554, 5130.7554, ...], ... ], [ [8194.7559, 8198.7559, 8202.7559, ...], [8706.7559, 8710.7559, 8714.7559, ...], [9218.7559, 9222.7559, 9226.7559, ...], ... ], ] sum = 124598.414062 ggml_debug: wqkv_clamped-36 (view) = (f32) VIEW(wqkv_clamped-36{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4392, 4.4392, 8.4392, ...], [32768.4375, 32772.4375, 32776.4375, ...], [65536.4375, 65540.4375, 65544.4375, ...], ], ] sum = 294951.937500 ggml_debug: Vcur-36 = (f32) CONT(wqkv_clamped-36 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.4392, 4.4392, 8.4392, ...], [4096.4395, 4100.4395, 4104.4395, ...], [8192.4395, 8196.4395, 8200.4395, ...], ], ] sum = 36903.953125 ggml_debug: k_cache_view-36 = (f16) VIEW(cache_k_l36{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-36 (copy of Kcur-36) = (f16) CPY(Kcur-36{128, 8, 3, 1}, k_cache_view-36{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 2.7559, 2.7598, 2.7637, ...], ], ] sum = 8.279297 ggml_debug: v_cur_t-36 = (f32) TRANSPOSE(Vcur-36{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.4392, 4096.4395, 8192.4395], [ 4.4392, 4100.4395, 8196.4395], [ 8.4392, 4104.4395, 8200.4395], ... ], ] sum = 36903.953125 ggml_debug: v_cache_view-36 = (f16) VIEW(cache_v_l36{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-36 (copy of v_cur_t-36) = (f16) CPY(v_cur_t-36{3, 1024, 1, 1}, v_cache_view-36{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ 0.4392, 0.4397, 0.4402], [ 0.8784, 0.8794, 0.8804], [ 1.7568, 1.7588, 1.7607], ... ], ] sum = 9.233643 ggml_debug: v-36 = (f16) VIEW(cache_v_l36{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ 0.4392, 0.4397, 0.4402, ...], [ 0.8784, 0.8794, 0.8804, ...], [ 1.7568, 1.7588, 1.7607, ...], ... ], [ [ 0.4392, 0.4397, 0.4402, ...], [ 0.8784, 0.8794, 0.8804, ...], [ 1.7568, 1.7588, 1.7607, ...], ... ], [ [ 0.4392, 0.4397, 0.4402, ...], [ 0.8784, 0.8794, 0.8804, ...], [ 1.7568, 1.7588, 1.7607, ...], ... ], ... ] sum = 27.700928 ggml_debug: k-36 = (f16) VIEW(cache_k_l36{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 2.7559, 2.7598, 2.7637, ...], [ 11.0234, 11.0391, 11.0547, ...], [ 44.0938, 44.1562, 44.2188, ...], ... ], [ [ 3.2559, 3.2598, 3.2637, ...], [ 13.0234, 13.0391, 13.0547, ...], [ 52.0938, 52.1562, 52.2188, ...], ... ], [ [ 3.7559, 3.7598, 3.7637, ...], [ 15.0234, 15.0391, 15.0547, ...], [ 60.0938, 60.1562, 60.2188, ...], ... ], ... ] sum = 616.095703 ggml_debug: q-36 = (f32) PERMUTE(Qcur-36{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.4847, 4.4847, 8.4847, ...], [24576.4844, 24580.4844, 24584.4844, ...], [49152.4844, 49156.4844, 49160.4844, ...], ], [ [512.4847, 516.4847, 520.4847, ...], [25088.4844, 25092.4844, 25096.4844, ...], [49664.4844, 49668.4844, 49672.4844, ...], ], [ [1024.4846, 1028.4846, 1032.4846, ...], [25600.4844, 25604.4844, 25608.4844, ...], [50176.4844, 50180.4844, 50184.4844, ...], ], ... ] sum = 677497.250000 ggml_debug: kq-36 = (f32) MUL_MAT(k-36{128, 32, 8, 1}, q-36{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 12.2031, 16.2031, 20.2031, ...], [140.2031, 144.2031, 148.2031, ...], [268.2031, 272.2031, 276.2031, ...], ], [ [396.2031, 400.2031, 404.2031, ...], [524.2031, 528.2031, 532.2031, ...], [652.2031, 656.2031, 660.2031, ...], ], [ [780.2031, 784.2031, 788.2031, ...], [908.2031, 912.2031, 916.2031, ...], [1036.2031, 1040.2031, 1044.2031, ...], ], ... ] sum = 14261.484375 ggml_debug: kq_soft_max_ext-36 = (f32) SOFT_MAX(kq-36{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-36 = (f32) MUL_MAT(v-36{32, 128, 8, 1}, kq_soft_max_ext-36{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ 0.4392, 4.4392, 8.4392, ...], [512.4392, 516.4392, 520.4392, ...], [1024.4392, 1028.4392, 1032.4392, ...], ], [ [1536.4392, 1540.4392, 1544.4392, ...], [2048.4392, 2052.4392, 2056.4392, ...], [2560.4392, 2564.4392, 2568.4392, ...], ], [ [3072.4392, 3076.4392, 3080.4392, ...], [3584.4392, 3588.4392, 3592.4392, ...], [4096.4395, 4100.4395, 4104.4395, ...], ], ... ] sum = 55415.851562 ggml_debug: kqv_merged-36 = (f32) PERMUTE(kqv-36{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ 0.4392, 4.4392, 8.4392, ...], [1536.4392, 1540.4392, 1544.4392, ...], [3072.4392, 3076.4392, 3080.4392, ...], ... ], [ [512.4392, 516.4392, 520.4392, ...], [2048.4392, 2052.4392, 2056.4392, ...], [3584.4392, 3588.4392, 3592.4392, ...], ... ], [ [1024.4392, 1028.4392, 1032.4392, ...], [2560.4392, 2564.4392, 2568.4392, ...], [4096.4395, 4100.4395, 4104.4395, ...], ... ], ] sum = 55415.851562 ggml_debug: kqv_merged_cont-36 = (f32) CONT(kqv_merged-36{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.4392, 4.4392, 8.4392, ...], [24576.4395, 24580.4395, 24584.4395, ...], [49152.4375, 49156.4375, 49160.4375, ...], ], ] sum = 221223.937500 ggml_debug: kqv_out-36 = (f32) MUL_MAT(blk.36.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-36{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.9694, 4.9694, 8.9694, ...], [24576.9688, 24580.9688, 24584.9688, ...], [49152.9688, 49156.9688, 49160.9688, ...], ], ] sum = 221228.718750 ggml_debug: norm-36 = (f32) NORM(kqv_out-36{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.8512, 4.8512, 8.8512, ...], [24576.8516, 24580.8516, 24584.8516, ...], [49152.8516, 49156.8516, 49160.8516, ...], ], ] sum = 221227.656250 ggml_debug: attn_out_norm-36 = (f32) MUL(norm-36{6144, 3, 1, 1}, blk.36.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.5919, 4.5919, 8.5919, ...], [24576.5918, 24580.5918, 24584.5918, ...], [49152.5938, 49156.5938, 49160.5938, ...], ], ] sum = 221225.343750 ggml_debug: ffn_moe_logits-36 = (f32) MUL_MAT(blk.36.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ 0.2301, 4.2301, 8.2301, ...], [ 64.2301, 68.2301, 72.2301, ...], [128.2301, 132.2301, 136.2301, ...], ], ] sum = 614.070923 ggml_debug: ffn_moe_probs-36 = (f32) SOFT_MAX(ffn_moe_logits-36{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0780, 4.0780, 8.0780, ...], [ 64.0780, 68.0780, 72.0780, ...], [128.0780, 132.0780, 136.0780, ...], ], ] sum = 612.702087 ggml_debug: ffn_moe_argsort-36 = (i32) ARGSORT(ffn_moe_probs-36{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 10.0000, 14.0000, 18.0000, ...], [ 74.0000, 78.0000, 82.0000, ...], [138.0000, 142.0000, 146.0000, ...], ], ] sum = 702.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-36{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 10.0000, 14.0000, 18.0000, ...], [ 74.0000, 78.0000, 82.0000, ...], [138.0000, 142.0000, 146.0000, ...], ], ] sum = 702.000000 ggml_debug: ffn_moe_up-36 = (f32) MUL_MAT_ID(blk.36.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.6514, 3.3486, 7.3486, ...], [43007.3477, 43011.3477, 43015.3477, ...], [86015.3516, 86019.3516, 86023.3516, ...], ], ] sum = 387102.125000 ggml_debug: ffn_moe_gate-36 = (f32) MUL_MAT_ID(blk.36.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0517, 3.9483, 7.9483, ...], [43007.9492, 43011.9492, 43015.9492, ...], [86015.9453, 86019.9453, 86023.9453, ...], ], ] sum = 387107.500000 ggml_debug: ffn_moe_silu-36 = (f32) UNARY(ffn_moe_gate-36{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0252, 3.9748, 7.9748, ...], [43007.9766, 43011.9766, 43015.9766, ...], [86015.9766, 86019.9766, 86023.9766, ...], ], ] sum = 387107.781250 ggml_debug: ffn_moe_gate_par-36 = (f32) MUL(ffn_moe_up-36{10752, 3, 1, 1}, ffn_moe_silu-36{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0164, 4.0164, 8.0164, ...], [43008.0156, 43012.0156, 43016.0156, ...], [86016.0156, 86020.0156, 86024.0156, ...], ], ] sum = 387108.125000 ggml_debug: ffn_moe_down-36 = (f32) MUL_MAT_ID(blk.36.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-36{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.4064, 4.4064, 8.4064, ...], [24576.4062, 24580.4062, 24584.4062, ...], [49152.4062, 49156.4062, 49160.4062, ...], ], ] sum = 221223.656250 ggml_debug: ffn_moe_probs-36 (reshaped) = (f32) RESHAPE(ffn_moe_probs-36{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0780], [ 4.0780], [ 8.0780], ... ], [ [ 64.0780], [ 68.0780], [ 72.0780], ... ], [ [128.0780], [132.0780], [136.0780], ... ], ] sum = 612.702087 ggml_debug: ffn_moe_weights-36 = (f32) GET_ROWS(ffn_moe_probs-36 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.0807], [ 4.0807], [ 8.0807], ... ], [ [ 16.0807], [ 20.0807], [ 24.0807], ... ], [ [ 32.0807], [ 36.0807], [ 40.0807], ... ], ] sum = 180.726608 ggml_debug: ffn_moe_weights-36 (reshaped) = (f32) RESHAPE(ffn_moe_weights-36{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.0807, 4.0807, 8.0807, ...], [ 16.0807, 20.0807, 24.0807, ...], [ 32.0807, 36.0807, 40.0807, ...], ], ] sum = 180.726608 ggml_debug: ffn_moe_weights_sum-36 = (f32) SUM_ROWS(ffn_moe_weights-36 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3164], [ 4.3164], [ 8.3164], ], ] sum = 12.949282 ggml_debug: ffn_moe_weights_norm-36 = (f32) DIV(ffn_moe_weights-36 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-36{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.2551, 4.2551, 8.2551, ...], [ 16.2551, 20.2551, 24.2551, ...], [ 32.2551, 36.2551, 40.2551, ...], ], ] sum = 182.296280 ggml_debug: ffn_moe_weights_norm-36 (view) = (f32) VIEW(ffn_moe_weights_norm-36{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2551], [ 16.2551], [ 32.2551], ], ] sum = 48.765423 ggml_debug: ffn_moe_weighted-36 = (f32) MUL(ffn_moe_down-36{6144, 3, 1, 1}, ffn_moe_weights_norm-36 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1037, 4.1037, 8.1037, ...], [24576.1035, 24580.1035, 24584.1035, ...], [49152.1055, 49156.1055, 49160.1055, ...], ], ] sum = 221220.937500 ggml_debug: ffn_moe_up-36 = (f32) MUL_MAT_ID(blk.36.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0078, 4.0078, 8.0078, ...], [43008.0078, 43012.0078, 43016.0078, ...], [86016.0078, 86020.0078, 86024.0078, ...], ], ] sum = 387108.062500 ggml_debug: ffn_moe_gate-36 = (f32) MUL_MAT_ID(blk.36.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.1593, 3.8407, 7.8407, ...], [43007.8398, 43011.8398, 43015.8398, ...], [86015.8438, 86019.8438, 86023.8438, ...], ], ] sum = 387106.593750 ggml_debug: ffn_moe_silu-36 = (f32) UNARY(ffn_moe_gate-36{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.0733, 3.9267, 7.9267, ...], [43007.9258, 43011.9258, 43015.9258, ...], [86015.9297, 86019.9297, 86023.9297, ...], ], ] sum = 387107.375000 ggml_debug: ffn_moe_gate_par-36 = (f32) MUL(ffn_moe_up-36{10752, 3, 1, 1}, ffn_moe_silu-36{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0006, 3.9994, 7.9994, ...], [43008.0000, 43012.0000, 43016.0000, ...], [86016.0000, 86020.0000, 86024.0000, ...], ], ] sum = 387108.000000 ggml_debug: ffn_moe_down-36 = (f32) MUL_MAT_ID(blk.36.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-36{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0256, 4.0256, 8.0256, ...], [24576.0254, 24580.0254, 24584.0254, ...], [49152.0273, 49156.0273, 49160.0273, ...], ], ] sum = 221220.234375 ggml_debug: ffn_moe_weights_norm-36 (view) = (f32) VIEW(ffn_moe_weights_norm-36{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2509], [ 16.2509], [ 32.2509], ], ] sum = 48.752819 ggml_debug: ffn_moe_weighted-36 = (f32) MUL(ffn_moe_down-36{6144, 3, 1, 1}, ffn_moe_weights_norm-36 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0064, 4.0064, 8.0064, ...], [24576.0059, 24580.0059, 24584.0059, ...], [49152.0078, 49156.0078, 49160.0078, ...], ], ] sum = 221220.062500 ggml_debug: ffn_moe_out-36 = (f32) ADD(ffn_moe_weighted-36{6144, 3, 1, 1}, ffn_moe_weighted-36{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1101, 4.1101, 8.1101, ...], [24576.1094, 24580.1094, 24584.1094, ...], [49152.1094, 49156.1094, 49160.1094, ...], ], ] sum = 221220.984375 ggml_debug: ffn_moe_up-36 = (f32) MUL_MAT_ID(blk.36.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5878, 3.4122, 7.4122, ...], [43007.4141, 43011.4141, 43015.4141, ...], [86015.4141, 86019.4141, 86023.4141, ...], ], ] sum = 387102.718750 ggml_debug: ffn_moe_gate-36 = (f32) MUL_MAT_ID(blk.36.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3054, 3.6946, 7.6946, ...], [43007.6953, 43011.6953, 43015.6953, ...], [86015.6953, 86019.6953, 86023.6953, ...], ], ] sum = 387105.250000 ggml_debug: ffn_moe_silu-36 = (f32) UNARY(ffn_moe_gate-36{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1296, 3.8704, 7.8704, ...], [43007.8711, 43011.8711, 43015.8711, ...], [86015.8672, 86019.8672, 86023.8672, ...], ], ] sum = 387106.843750 ggml_debug: ffn_moe_gate_par-36 = (f32) MUL(ffn_moe_up-36{10752, 3, 1, 1}, ffn_moe_silu-36{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0762, 4.0762, 8.0762, ...], [43008.0742, 43012.0742, 43016.0742, ...], [86016.0781, 86020.0781, 86024.0781, ...], ], ] sum = 387108.687500 ggml_debug: ffn_moe_down-36 = (f32) MUL_MAT_ID(blk.36.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-36{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2434, 4.2434, 8.2434, ...], [24576.2441, 24580.2441, 24584.2441, ...], [49152.2422, 49156.2422, 49160.2422, ...], ], ] sum = 221222.187500 ggml_debug: ffn_moe_weights_norm-36 (view) = (f32) VIEW(ffn_moe_weights_norm-36{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2474], [ 16.2474], [ 32.2474], ], ] sum = 48.742146 ggml_debug: ffn_moe_weighted-36 = (f32) MUL(ffn_moe_down-36{6144, 3, 1, 1}, ffn_moe_weights_norm-36 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0602, 4.0602, 8.0602, ...], [24576.0605, 24580.0605, 24584.0605, ...], [49152.0586, 49156.0586, 49160.0586, ...], ], ] sum = 221220.546875 ggml_debug: ffn_moe_out-36 = (f32) ADD(ffn_moe_out-36{6144, 3, 1, 1}, ffn_moe_weighted-36{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1703, 4.1703, 8.1703, ...], [24576.1699, 24580.1699, 24584.1699, ...], [49152.1719, 49156.1719, 49160.1719, ...], ], ] sum = 221221.546875 ggml_debug: ffn_moe_up-36 = (f32) MUL_MAT_ID(blk.36.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.5424, 2.4576, 6.4576, ...], [43006.4570, 43010.4570, 43014.4570, ...], [86014.4609, 86018.4609, 86022.4609, ...], ], ] sum = 387094.125000 ggml_debug: ffn_moe_gate-36 = (f32) MUL_MAT_ID(blk.36.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-36{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3651, 3.6349, 7.6349, ...], [43007.6367, 43011.6367, 43015.6367, ...], [86015.6328, 86019.6328, 86023.6328, ...], ], ] sum = 387104.687500 ggml_debug: ffn_moe_silu-36 = (f32) UNARY(ffn_moe_gate-36{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ -0.1496, 3.8504, 7.8504, ...], [43007.8516, 43011.8516, 43015.8516, ...], [86015.8516, 86019.8516, 86023.8516, ...], ], ] sum = 387106.656250 ggml_debug: ffn_moe_gate_par-36 = (f32) MUL(ffn_moe_up-36{10752, 3, 1, 1}, ffn_moe_silu-36{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2307, 4.2307, 8.2307, ...], [43008.2305, 43012.2305, 43016.2305, ...], [86016.2344, 86020.2344, 86024.2344, ...], ], ] sum = 387110.125000 ggml_debug: ffn_moe_down-36 = (f32) MUL_MAT_ID(blk.36.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-36{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.4646, 4.4646, 8.4646, ...], [24576.4648, 24580.4648, 24584.4648, ...], [49152.4648, 49156.4648, 49160.4648, ...], ], ] sum = 221224.187500 ggml_debug: ffn_moe_weights_norm-36 (view) = (f32) VIEW(ffn_moe_weights_norm-36{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2465], [ 16.2465], [ 32.2465], ], ] sum = 48.739613 ggml_debug: ffn_moe_weighted-36 = (f32) MUL(ffn_moe_down-36{6144, 3, 1, 1}, ffn_moe_weights_norm-36 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1145, 4.1145, 8.1145, ...], [24576.1152, 24580.1152, 24584.1152, ...], [49152.1133, 49156.1133, 49160.1133, ...], ], ] sum = 221221.015625 ggml_debug: ffn_moe_out-36 = (f32) ADD(ffn_moe_out-36{6144, 3, 1, 1}, ffn_moe_weighted-36{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2848, 4.2848, 8.2848, ...], [24576.2852, 24580.2852, 24584.2852, ...], [49152.2852, 49156.2852, 49160.2852, ...], ], ] sum = 221222.562500 ggml_debug: ffn_inp-36 = (f32) ADD(kqv_out-36{6144, 3, 1, 1}, l_out-35{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 3.4706, 7.4706, 11.4706, ...], [24579.4707, 24583.4707, 24587.4707, ...], [49155.4688, 49159.4688, 49163.4688, ...], ], ] sum = 221251.218750 ggml_debug: l_out-36 = (f32) ADD(ffn_moe_out-36{6144, 3, 1, 1}, ffn_inp-36{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 3.7554, 7.7554, 11.7554, ...], [24579.7559, 24583.7559, 24587.7559, ...], [49155.7539, 49159.7539, 49163.7539, ...], ], ] sum = 221253.781250 ggml_debug: norm-37 = (f32) NORM(l_out-36{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.7809, 4.7809, 8.7809, ...], [24576.7812, 24580.7812, 24584.7812, ...], [49152.7812, 49156.7812, 49160.7812, ...], ], ] sum = 221227.031250 ggml_debug: attn_norm-37 = (f32) MUL(norm-37{6144, 3, 1, 1}, blk.37.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2166, 4.2166, 8.2166, ...], [24576.2168, 24580.2168, 24584.2168, ...], [49152.2148, 49156.2148, 49160.2148, ...], ], ] sum = 221221.953125 ggml_debug: wqkv-37 = (f32) MUL_MAT(blk.37.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-37{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -1.1214, 2.8786, 6.8786, ...], [32766.8789, 32770.8789, 32774.8789, ...], [65534.8789, 65538.8750, 65542.8750, ...], ], ] sum = 294937.875000 ggml_debug: wqkv_clamped-37 = (f32) CLAMP(wqkv-37{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -1.1214, 2.8786, 6.8786, ...], [32766.8789, 32770.8789, 32774.8789, ...], [65534.8789, 65538.8750, 65542.8750, ...], ], ] sum = 294937.875000 ggml_debug: wqkv_clamped-37 (view) = (f32) VIEW(wqkv_clamped-37{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.1214, 2.8786, 6.8786, ...], [32766.8789, 32770.8789, 32774.8789, ...], [65534.8789, 65538.8750, 65542.8750, ...], ], ] sum = 294937.875000 ggml_debug: Qcur-37 = (f32) CONT(wqkv_clamped-37 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.1214, 2.8786, 6.8786, ...], [24574.8789, 24578.8789, 24582.8789, ...], [49150.8789, 49154.8789, 49158.8789, ...], ], ] sum = 221209.890625 ggml_debug: Qcur-37 (reshaped) = (f32) RESHAPE(Qcur-37{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -1.1214, 2.8786, 6.8786, ...], [510.8786, 514.8786, 518.8786, ...], [1022.8786, 1026.8785, 1030.8785, ...], ... ], [ [24574.8789, 24578.8789, 24582.8789, ...], [25086.8789, 25090.8789, 25094.8789, ...], [25598.8789, 25602.8789, 25606.8789, ...], ... ], [ [49150.8789, 49154.8789, 49158.8789, ...], [49662.8789, 49666.8789, 49670.8789, ...], [50174.8789, 50178.8789, 50182.8789, ...], ... ], ] sum = 677453.687500 ggml_debug: Qcur-37 = (f32) ROPE(Qcur-37 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -1.1214, 2.8786, 6.8786, ...], [510.8786, 514.8786, 518.8786, ...], [1022.8786, 1026.8785, 1030.8785, ...], ... ], [ [24574.8789, 24578.8789, 24582.8789, ...], [25086.8789, 25090.8789, 25094.8789, ...], [25598.8789, 25602.8789, 25606.8789, ...], ... ], [ [49150.8789, 49154.8789, 49158.8789, ...], [49662.8789, 49666.8789, 49670.8789, ...], [50174.8789, 50178.8789, 50182.8789, ...], ... ], ] sum = 677453.687500 ggml_debug: wqkv_clamped-37 (view) = (f32) VIEW(wqkv_clamped-37{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.6297, 5.6297, 9.6297, ...], [32769.6289, 32773.6289, 32777.6289, ...], [65537.6328, 65541.6328, 65545.6328, ...], ], ] sum = 294962.656250 ggml_debug: Kcur-37 = (f32) CONT(wqkv_clamped-37 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 1.6297, 5.6297, 9.6297, ...], [4097.6299, 4101.6299, 4105.6299, ...], [8193.6299, 8197.6299, 8201.6299, ...], ], ] sum = 36914.667969 ggml_debug: Kcur-37 (reshaped) = (f32) RESHAPE(Kcur-37{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 1.6297, 5.6297, 9.6297, ...], [513.6297, 517.6297, 521.6297, ...], [1025.6298, 1029.6298, 1033.6298, ...], ... ], [ [4097.6299, 4101.6299, 4105.6299, ...], [4609.6299, 4613.6299, 4617.6299, ...], [5121.6299, 5125.6299, 5129.6299, ...], ... ], [ [8193.6299, 8197.6299, 8201.6299, ...], [8705.6299, 8709.6299, 8713.6299, ...], [9217.6299, 9221.6299, 9225.6299, ...], ... ], ] sum = 124568.015625 ggml_debug: Kcur-37 = (f32) ROPE(Kcur-37 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 1.6297, 5.6297, 9.6297, ...], [513.6297, 517.6297, 521.6297, ...], [1025.6298, 1029.6298, 1033.6298, ...], ... ], [ [4097.6299, 4101.6299, 4105.6299, ...], [4609.6299, 4613.6299, 4617.6299, ...], [5121.6299, 5125.6299, 5129.6299, ...], ... ], [ [8193.6299, 8197.6299, 8201.6299, ...], [8705.6299, 8709.6299, 8713.6299, ...], [9217.6299, 9221.6299, 9225.6299, ...], ... ], ] sum = 124568.015625 ggml_debug: wqkv_clamped-37 (view) = (f32) VIEW(wqkv_clamped-37{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.2367, 2.7633, 6.7633, ...], [32766.7637, 32770.7617, 32774.7617, ...], [65534.7617, 65538.7656, 65542.7656, ...], ], ] sum = 294936.875000 ggml_debug: Vcur-37 = (f32) CONT(wqkv_clamped-37 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -1.2367, 2.7633, 6.7633, ...], [4094.7632, 4098.7632, 4102.7632, ...], [8190.7632, 8194.7637, 8198.7637, ...], ], ] sum = 36888.871094 ggml_debug: k_cache_view-37 = (f16) VIEW(cache_k_l37{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-37 (copy of Kcur-37) = (f16) CPY(Kcur-37{128, 8, 3, 1}, k_cache_view-37{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 1.6299, 1.6318, 1.6338, ...], ], ] sum = 4.895508 ggml_debug: v_cur_t-37 = (f32) TRANSPOSE(Vcur-37{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -1.2367, 4094.7632, 8190.7632], [ 2.7633, 4098.7632, 8194.7637], [ 6.7633, 4102.7632, 8198.7637], ... ], ] sum = 36888.871094 ggml_debug: v_cache_view-37 = (f16) VIEW(cache_v_l37{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-37 (copy of v_cur_t-37) = (f16) CPY(v_cur_t-37{3, 1024, 1, 1}, v_cache_view-37{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -1.2363, -1.2383, -1.2402], [ -2.4727, -2.4766, -2.4805], [ -4.9453, -4.9531, -4.9609], ... ], ] sum = -26.003906 ggml_debug: v-37 = (f16) VIEW(cache_v_l37{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -1.2363, -1.2383, -1.2402, ...], [ -2.4727, -2.4766, -2.4805, ...], [ -4.9453, -4.9531, -4.9609, ...], ... ], [ [ -1.2363, -1.2383, -1.2402, ...], [ -2.4727, -2.4766, -2.4805, ...], [ -4.9453, -4.9531, -4.9609, ...], ... ], [ [ -1.2363, -1.2383, -1.2402, ...], [ -2.4727, -2.4766, -2.4805, ...], [ -4.9453, -4.9531, -4.9609, ...], ... ], ... ] sum = -78.011719 ggml_debug: k-37 = (f16) VIEW(cache_k_l37{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 1.6299, 1.6318, 1.6338, ...], [ 6.5195, 6.5273, 6.5352, ...], [ 26.0781, 26.1094, 26.1406, ...], ... ], [ [ 1.8799, 1.8818, 1.8838, ...], [ 7.5195, 7.5273, 7.5352, ...], [ 30.0781, 30.1094, 30.1406, ...], ... ], [ [ 2.2598, 2.2637, 2.2676, ...], [ 9.0391, 9.0547, 9.0703, ...], [ 36.1562, 36.2188, 36.2812, ...], ... ], ... ] sum = 363.972656 ggml_debug: q-37 = (f32) PERMUTE(Qcur-37{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -1.1214, 2.8786, 6.8786, ...], [24574.8789, 24578.8789, 24582.8789, ...], [49150.8789, 49154.8789, 49158.8789, ...], ], [ [510.8786, 514.8786, 518.8786, ...], [25086.8789, 25090.8789, 25094.8789, ...], [49662.8789, 49666.8789, 49670.8789, ...], ], [ [1022.8786, 1026.8785, 1030.8785, ...], [25598.8789, 25602.8789, 25606.8789, ...], [50174.8789, 50178.8789, 50182.8789, ...], ], ... ] sum = 677453.687500 ggml_debug: kq-37 = (f32) MUL_MAT(k-37{128, 32, 8, 1}, q-37{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 25.8750, 29.8750, 33.8750, ...], [153.8750, 157.8750, 161.8750, ...], [281.8750, 285.8750, 289.8750, ...], ], [ [409.8750, 413.8750, 417.8750, ...], [537.8750, 541.8750, 545.8750, ...], [665.8750, 669.8750, 673.8750, ...], ], [ [793.8750, 797.8750, 801.8750, ...], [921.8750, 925.8750, 929.8750, ...], [1049.8750, 1053.8750, 1057.8750, ...], ], ... ] sum = 14630.625000 ggml_debug: kq_soft_max_ext-37 = (f32) SOFT_MAX(kq-37{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-37 = (f32) MUL_MAT(v-37{32, 128, 8, 1}, kq_soft_max_ext-37{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -1.2363, 2.7637, 6.7637, ...], [510.7637, 514.7637, 518.7637, ...], [1022.7637, 1026.7637, 1030.7637, ...], ], [ [1534.7637, 1538.7637, 1542.7637, ...], [2046.7637, 2050.7637, 2054.7637, ...], [2558.7637, 2562.7637, 2566.7637, ...], ], [ [3070.7637, 3074.7637, 3078.7637, ...], [3582.7637, 3586.7637, 3590.7637, ...], [4094.7637, 4098.7637, 4102.7637, ...], ], ... ] sum = 55370.625000 ggml_debug: kqv_merged-37 = (f32) PERMUTE(kqv-37{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -1.2363, 2.7637, 6.7637, ...], [1534.7637, 1538.7637, 1542.7637, ...], [3070.7637, 3074.7637, 3078.7637, ...], ... ], [ [510.7637, 514.7637, 518.7637, ...], [2046.7637, 2050.7637, 2054.7637, ...], [3582.7637, 3586.7637, 3590.7637, ...], ... ], [ [1022.7637, 1026.7637, 1030.7637, ...], [2558.7637, 2562.7637, 2566.7637, ...], [4094.7637, 4098.7637, 4102.7637, ...], ... ], ] sum = 55370.632812 ggml_debug: kqv_merged_cont-37 = (f32) CONT(kqv_merged-37{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -1.2363, 2.7637, 6.7637, ...], [24574.7637, 24578.7637, 24582.7637, ...], [49150.7656, 49154.7656, 49158.7656, ...], ], ] sum = 221208.890625 ggml_debug: kqv_out-37 = (f32) MUL_MAT(blk.37.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-37{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.5856, 3.4144, 7.4144, ...], [24575.4141, 24579.4141, 24583.4141, ...], [49151.4141, 49155.4141, 49159.4141, ...], ], ] sum = 221214.718750 ggml_debug: norm-37 = (f32) NORM(kqv_out-37{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.9493, 3.0507, 7.0507, ...], [24575.0508, 24579.0508, 24583.0508, ...], [49151.0508, 49155.0508, 49159.0508, ...], ], ] sum = 221211.453125 ggml_debug: attn_out_norm-37 = (f32) MUL(norm-37{6144, 3, 1, 1}, blk.37.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.6934, 3.3066, 7.3066, ...], [24575.3066, 24579.3066, 24583.3066, ...], [49151.3047, 49155.3047, 49159.3047, ...], ], ] sum = 221213.750000 ggml_debug: ffn_moe_logits-37 = (f32) MUL_MAT(blk.37.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.1666, 3.8334, 7.8334, ...], [ 63.8334, 67.8334, 71.8334, ...], [127.8334, 131.8334, 135.8334, ...], ], ] sum = 610.500366 ggml_debug: ffn_moe_probs-37 = (f32) SOFT_MAX(ffn_moe_logits-37{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0587, 4.0587, 8.0587, ...], [ 64.0587, 68.0587, 72.0587, ...], [128.0587, 132.0587, 136.0587, ...], ], ] sum = 612.527954 ggml_debug: ffn_moe_argsort-37 = (i32) ARGSORT(ffn_moe_probs-37{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 10.0000, 14.0000, 18.0000, ...], [ 74.0000, 78.0000, 82.0000, ...], [138.0000, 142.0000, 146.0000, ...], ], ] sum = 702.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-37{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 10.0000, 14.0000, 18.0000, ...], [ 74.0000, 78.0000, 82.0000, ...], [138.0000, 142.0000, 146.0000, ...], ], ] sum = 702.000000 ggml_debug: ffn_moe_up-37 = (f32) MUL_MAT_ID(blk.37.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7978, 3.2022, 7.2022, ...], [43007.2031, 43011.2031, 43015.2031, ...], [86015.2031, 86019.2031, 86023.2031, ...], ], ] sum = 387100.812500 ggml_debug: ffn_moe_gate-37 = (f32) MUL_MAT_ID(blk.37.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2228, 4.2228, 8.2228, ...], [43008.2227, 43012.2227, 43016.2227, ...], [86016.2266, 86020.2266, 86024.2266, ...], ], ] sum = 387110.000000 ggml_debug: ffn_moe_silu-37 = (f32) UNARY(ffn_moe_gate-37{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.1238, 4.1238, 8.1238, ...], [43008.1250, 43012.1250, 43016.1250, ...], [86016.1250, 86020.1250, 86024.1250, ...], ], ] sum = 387109.125000 ggml_debug: ffn_moe_gate_par-37 = (f32) MUL(ffn_moe_up-37{10752, 3, 1, 1}, ffn_moe_silu-37{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.0987, 3.9013, 7.9013, ...], [43007.9023, 43011.9023, 43015.9023, ...], [86015.8984, 86019.8984, 86023.8984, ...], ], ] sum = 387107.125000 ggml_debug: ffn_moe_down-37 = (f32) MUL_MAT_ID(blk.37.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-37{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.7714, 4.7714, 8.7714, ...], [24576.7715, 24580.7715, 24584.7715, ...], [49152.7695, 49156.7695, 49160.7695, ...], ], ] sum = 221226.937500 ggml_debug: ffn_moe_probs-37 (reshaped) = (f32) RESHAPE(ffn_moe_probs-37{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0587], [ 4.0587], [ 8.0587], ... ], [ [ 64.0587], [ 68.0587], [ 72.0587], ... ], [ [128.0587], [132.0587], [136.0587], ... ], ] sum = 612.527954 ggml_debug: ffn_moe_weights-37 = (f32) GET_ROWS(ffn_moe_probs-37 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1582], [ 4.1582], [ 8.1582], ... ], [ [ 16.1582], [ 20.1582], [ 24.1582], ... ], [ [ 32.1582], [ 36.1582], [ 40.1582], ... ], ] sum = 181.424240 ggml_debug: ffn_moe_weights-37 (reshaped) = (f32) RESHAPE(ffn_moe_weights-37{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1582, 4.1582, 8.1582, ...], [ 16.1582, 20.1582, 24.1582, ...], [ 32.1582, 36.1582, 40.1582, ...], ], ] sum = 181.424240 ggml_debug: ffn_moe_weights_sum-37 = (f32) SUM_ROWS(ffn_moe_weights-37 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.4415], [ 4.4415], [ 8.4415], ], ] sum = 13.324609 ggml_debug: ffn_moe_weights_norm-37 = (f32) DIV(ffn_moe_weights-37 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-37{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3584, 4.3584, 8.3584, ...], [ 16.3584, 20.3584, 24.3584, ...], [ 32.3584, 36.3584, 40.3584, ...], ], ] sum = 183.225616 ggml_debug: ffn_moe_weights_norm-37 (view) = (f32) VIEW(ffn_moe_weights_norm-37{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3584], [ 16.3584], [ 32.3584], ], ] sum = 49.075207 ggml_debug: ffn_moe_weighted-37 = (f32) MUL(ffn_moe_down-37{6144, 3, 1, 1}, ffn_moe_weights_norm-37 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2765, 4.2765, 8.2765, ...], [24576.2773, 24580.2773, 24584.2773, ...], [49152.2773, 49156.2773, 49160.2773, ...], ], ] sum = 221222.500000 ggml_debug: ffn_moe_up-37 = (f32) MUL_MAT_ID(blk.37.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -1.5185, 2.4815, 6.4815, ...], [43006.4805, 43010.4805, 43014.4805, ...], [86014.4844, 86018.4844, 86022.4844, ...], ], ] sum = 387094.375000 ggml_debug: ffn_moe_gate-37 = (f32) MUL_MAT_ID(blk.37.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6527, 4.6527, 8.6527, ...], [43008.6523, 43012.6523, 43016.6523, ...], [86016.6562, 86020.6562, 86024.6562, ...], ], ] sum = 387113.906250 ggml_debug: ffn_moe_silu-37 = (f32) UNARY(ffn_moe_gate-37{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.4292, 4.4292, 8.4292, ...], [43008.4297, 43012.4297, 43016.4297, ...], [86016.4297, 86020.4297, 86024.4297, ...], ], ] sum = 387111.875000 ggml_debug: ffn_moe_gate_par-37 = (f32) MUL(ffn_moe_up-37{10752, 3, 1, 1}, ffn_moe_silu-37{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.6518, 3.3482, 7.3482, ...], [43007.3477, 43011.3477, 43015.3477, ...], [86015.3516, 86019.3516, 86023.3516, ...], ], ] sum = 387102.125000 ggml_debug: ffn_moe_down-37 = (f32) MUL_MAT_ID(blk.37.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-37{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.8254, 4.8254, 8.8254, ...], [24576.8262, 24580.8262, 24584.8262, ...], [49152.8242, 49156.8242, 49160.8242, ...], ], ] sum = 221227.437500 ggml_debug: ffn_moe_weights_norm-37 (view) = (f32) VIEW(ffn_moe_weights_norm-37{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.2733], [ 16.2733], [ 32.2733], ], ] sum = 48.819973 ggml_debug: ffn_moe_weighted-37 = (f32) MUL(ffn_moe_down-37{6144, 3, 1, 1}, ffn_moe_weights_norm-37 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2256, 4.2256, 8.2256, ...], [24576.2266, 24580.2266, 24584.2266, ...], [49152.2266, 49156.2266, 49160.2266, ...], ], ] sum = 221222.031250 ggml_debug: ffn_moe_out-37 = (f32) ADD(ffn_moe_weighted-37{6144, 3, 1, 1}, ffn_moe_weighted-37{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.5021, 4.5021, 8.5021, ...], [24576.5020, 24580.5020, 24584.5020, ...], [49152.5039, 49156.5039, 49160.5039, ...], ], ] sum = 221224.515625 ggml_debug: ffn_moe_up-37 = (f32) MUL_MAT_ID(blk.37.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.6102, 3.3898, 7.3898, ...], [43007.3906, 43011.3906, 43015.3906, ...], [86015.3906, 86019.3906, 86023.3906, ...], ], ] sum = 387102.500000 ggml_debug: ffn_moe_gate-37 = (f32) MUL_MAT_ID(blk.37.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 1.4857, 5.4857, 9.4857, ...], [43009.4844, 43013.4844, 43017.4844, ...], [86017.4844, 86021.4844, 86025.4844, ...], ], ] sum = 387121.375000 ggml_debug: ffn_moe_silu-37 = (f32) UNARY(ffn_moe_gate-37{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 1.2115, 5.2115, 9.2115, ...], [43009.2109, 43013.2109, 43017.2109, ...], [86017.2109, 86021.2109, 86025.2109, ...], ], ] sum = 387118.906250 ggml_debug: ffn_moe_gate_par-37 = (f32) MUL(ffn_moe_up-37{10752, 3, 1, 1}, ffn_moe_silu-37{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.7393, 3.2607, 7.2607, ...], [43007.2617, 43011.2617, 43015.2617, ...], [86015.2578, 86019.2578, 86023.2578, ...], ], ] sum = 387101.312500 ggml_debug: ffn_moe_down-37 = (f32) MUL_MAT_ID(blk.37.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-37{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -2.4474, 1.5526, 5.5526, ...], [24573.5527, 24577.5527, 24581.5527, ...], [49149.5508, 49153.5508, 49157.5508, ...], ], ] sum = 221197.968750 ggml_debug: ffn_moe_weights_norm-37 (view) = (f32) VIEW(ffn_moe_weights_norm-37{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1998], [ 16.1998], [ 32.1998], ], ] sum = 48.599464 ggml_debug: ffn_moe_weighted-37 = (f32) MUL(ffn_moe_down-37{6144, 3, 1, 1}, ffn_moe_weights_norm-37 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4890, 3.5110, 7.5110, ...], [24575.5117, 24579.5117, 24583.5117, ...], [49151.5117, 49155.5117, 49159.5117, ...], ], ] sum = 221215.609375 ggml_debug: ffn_moe_out-37 = (f32) ADD(ffn_moe_out-37{6144, 3, 1, 1}, ffn_moe_weighted-37{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0130, 4.0130, 8.0130, ...], [24576.0137, 24580.0137, 24584.0137, ...], [49152.0117, 49156.0117, 49160.0117, ...], ], ] sum = 221220.125000 ggml_debug: ffn_moe_up-37 = (f32) MUL_MAT_ID(blk.37.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0756, 4.0756, 8.0756, ...], [43008.0742, 43012.0742, 43016.0742, ...], [86016.0781, 86020.0781, 86024.0781, ...], ], ] sum = 387108.687500 ggml_debug: ffn_moe_gate-37 = (f32) MUL_MAT_ID(blk.37.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-37{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.4796, 4.4796, 8.4796, ...], [43008.4805, 43012.4805, 43016.4805, ...], [86016.4766, 86020.4766, 86024.4766, ...], ], ] sum = 387112.281250 ggml_debug: ffn_moe_silu-37 = (f32) UNARY(ffn_moe_gate-37{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.2962, 4.2962, 8.2962, ...], [43008.2969, 43012.2969, 43016.2969, ...], [86016.2969, 86020.2969, 86024.2969, ...], ], ] sum = 387110.687500 ggml_debug: ffn_moe_gate_par-37 = (f32) MUL(ffn_moe_up-37{10752, 3, 1, 1}, ffn_moe_silu-37{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0224, 4.0224, 8.0224, ...], [43008.0234, 43012.0234, 43016.0234, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.218750 ggml_debug: ffn_moe_down-37 = (f32) MUL_MAT_ID(blk.37.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-37{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -1.2898, 2.7102, 6.7102, ...], [24574.7109, 24578.7109, 24582.7109, ...], [49150.7109, 49154.7109, 49158.7109, ...], ], ] sum = 221208.406250 ggml_debug: ffn_moe_weights_norm-37 (view) = (f32) VIEW(ffn_moe_weights_norm-37{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1685], [ 16.1685], [ 32.1685], ], ] sum = 48.505356 ggml_debug: ffn_moe_weighted-37 = (f32) MUL(ffn_moe_down-37{6144, 3, 1, 1}, ffn_moe_weights_norm-37 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2173, 3.7827, 7.7827, ...], [24575.7832, 24579.7832, 24583.7832, ...], [49151.7812, 49155.7812, 49159.7812, ...], ], ] sum = 221218.031250 ggml_debug: ffn_moe_out-37 = (f32) ADD(ffn_moe_out-37{6144, 3, 1, 1}, ffn_moe_weighted-37{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2042, 3.7958, 7.7958, ...], [24575.7949, 24579.7949, 24583.7949, ...], [49151.7969, 49155.7969, 49159.7969, ...], ], ] sum = 221218.171875 ggml_debug: ffn_inp-37 = (f32) ADD(kqv_out-37{6144, 3, 1, 1}, l_out-36{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 3.1699, 7.1699, 11.1699, ...], [24579.1699, 24583.1699, 24587.1699, ...], [49155.1680, 49159.1680, 49163.1680, ...], ], ] sum = 221248.531250 ggml_debug: l_out-37 = (f32) ADD(ffn_moe_out-37{6144, 3, 1, 1}, ffn_inp-37{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 2.9656, 6.9656, 10.9656, ...], [24578.9648, 24582.9648, 24586.9648, ...], [49154.9648, 49158.9648, 49162.9648, ...], ], ] sum = 221246.703125 ggml_debug: norm-38 = (f32) NORM(l_out-37{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.6362, 4.6362, 8.6362, ...], [24576.6367, 24580.6367, 24584.6367, ...], [49152.6367, 49156.6367, 49160.6367, ...], ], ] sum = 221225.734375 ggml_debug: attn_norm-38 = (f32) MUL(norm-38{6144, 3, 1, 1}, blk.38.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1976, 4.1976, 8.1976, ...], [24576.1973, 24580.1973, 24584.1973, ...], [49152.1992, 49156.1992, 49160.1992, ...], ], ] sum = 221221.781250 ggml_debug: wqkv-38 = (f32) MUL_MAT(blk.38.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-38{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ 0.0176, 4.0176, 8.0176, ...], [32768.0195, 32772.0195, 32776.0195, ...], [65536.0156, 65540.0156, 65544.0156, ...], ], ] sum = 294948.156250 ggml_debug: wqkv_clamped-38 = (f32) CLAMP(wqkv-38{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ 0.0176, 4.0176, 8.0176, ...], [32768.0195, 32772.0195, 32776.0195, ...], [65536.0156, 65540.0156, 65544.0156, ...], ], ] sum = 294948.156250 ggml_debug: wqkv_clamped-38 (view) = (f32) VIEW(wqkv_clamped-38{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0176, 4.0176, 8.0176, ...], [32768.0195, 32772.0195, 32776.0195, ...], [65536.0156, 65540.0156, 65544.0156, ...], ], ] sum = 294948.156250 ggml_debug: Qcur-38 = (f32) CONT(wqkv_clamped-38 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.0176, 4.0176, 8.0176, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], ] sum = 221220.156250 ggml_debug: Qcur-38 (reshaped) = (f32) RESHAPE(Qcur-38{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ 0.0176, 4.0176, 8.0176, ...], [512.0176, 516.0176, 520.0176, ...], [1024.0176, 1028.0176, 1032.0176, ...], ... ], [ [24576.0176, 24580.0176, 24584.0176, ...], [25088.0176, 25092.0176, 25096.0176, ...], [25600.0176, 25604.0176, 25608.0176, ...], ... ], [ [49152.0195, 49156.0195, 49160.0195, ...], [49664.0195, 49668.0195, 49672.0195, ...], [50176.0195, 50180.0195, 50184.0195, ...], ... ], ] sum = 677484.500000 ggml_debug: Qcur-38 = (f32) ROPE(Qcur-38 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ 0.0176, 4.0176, 8.0176, ...], [512.0176, 516.0176, 520.0176, ...], [1024.0176, 1028.0176, 1032.0176, ...], ... ], [ [24576.0176, 24580.0176, 24584.0176, ...], [25088.0176, 25092.0176, 25096.0176, ...], [25600.0176, 25604.0176, 25608.0176, ...], ... ], [ [49152.0195, 49156.0195, 49160.0195, ...], [49664.0195, 49668.0195, 49672.0195, ...], [50176.0195, 50180.0195, 50184.0195, ...], ... ], ] sum = 677484.500000 ggml_debug: wqkv_clamped-38 (view) = (f32) VIEW(wqkv_clamped-38{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.1676, 4.1676, 8.1676, ...], [32768.1680, 32772.1680, 32776.1680, ...], [65536.1641, 65540.1641, 65544.1641, ...], ], ] sum = 294949.500000 ggml_debug: Kcur-38 = (f32) CONT(wqkv_clamped-38 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ 0.1676, 4.1676, 8.1676, ...], [4096.1675, 4100.1675, 4104.1675, ...], [8192.1680, 8196.1680, 8200.1680, ...], ], ] sum = 36901.507812 ggml_debug: Kcur-38 (reshaped) = (f32) RESHAPE(Kcur-38{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ 0.1676, 4.1676, 8.1676, ...], [512.1676, 516.1676, 520.1676, ...], [1024.1676, 1028.1676, 1032.1676, ...], ... ], [ [4096.1675, 4100.1675, 4104.1675, ...], [4608.1675, 4612.1675, 4616.1675, ...], [5120.1675, 5124.1675, 5128.1675, ...], ... ], [ [8192.1680, 8196.1680, 8200.1680, ...], [8704.1680, 8708.1680, 8712.1680, ...], [9216.1680, 9220.1680, 9224.1680, ...], ... ], ] sum = 124528.546875 ggml_debug: Kcur-38 = (f32) ROPE(Kcur-38 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ 0.1676, 4.1676, 8.1676, ...], [512.1676, 516.1676, 520.1676, ...], [1024.1676, 1028.1676, 1032.1676, ...], ... ], [ [4096.1675, 4100.1675, 4104.1675, ...], [4608.1675, 4612.1675, 4616.1675, ...], [5120.1675, 5124.1675, 5128.1675, ...], ... ], [ [8192.1680, 8196.1680, 8200.1680, ...], [8704.1680, 8708.1680, 8712.1680, ...], [9216.1680, 9220.1680, 9224.1680, ...], ... ], ] sum = 124528.546875 ggml_debug: wqkv_clamped-38 (view) = (f32) VIEW(wqkv_clamped-38{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2022, 3.7978, 7.7978, ...], [32767.7969, 32771.7969, 32775.7969, ...], [65535.7969, 65539.7969, 65543.7969, ...], ], ] sum = 294946.187500 ggml_debug: Vcur-38 = (f32) CONT(wqkv_clamped-38 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.2022, 3.7978, 7.7978, ...], [4095.7979, 4099.7979, 4103.7979, ...], [8191.7979, 8195.7979, 8199.7979, ...], ], ] sum = 36898.179688 ggml_debug: k_cache_view-38 = (f16) VIEW(cache_k_l38{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-38 (copy of Kcur-38) = (f16) CPY(Kcur-38{128, 8, 3, 1}, k_cache_view-38{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ 0.1676, 0.1678, 0.1681, ...], ], ] sum = 0.503540 ggml_debug: v_cur_t-38 = (f32) TRANSPOSE(Vcur-38{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.2022, 4095.7979, 8191.7979], [ 3.7978, 4099.7979, 8195.7979], [ 7.7978, 4103.7979, 8199.7979], ... ], ] sum = 36898.175781 ggml_debug: v_cache_view-38 = (f16) VIEW(cache_v_l38{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-38 (copy of v_cur_t-38) = (f16) CPY(v_cur_t-38{3, 1024, 1, 1}, v_cache_view-38{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.2021, -0.2024, -0.2026], [ -0.4043, -0.4048, -0.4053], [ -0.8086, -0.8096, -0.8105], ... ], ] sum = -4.250244 ggml_debug: v-38 = (f16) VIEW(cache_v_l38{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.2021, -0.2024, -0.2026, ...], [ -0.4043, -0.4048, -0.4053, ...], [ -0.8086, -0.8096, -0.8105, ...], ... ], [ [ -0.2021, -0.2024, -0.2026, ...], [ -0.4043, -0.4048, -0.4053, ...], [ -0.8086, -0.8096, -0.8105, ...], ... ], [ [ -0.2021, -0.2024, -0.2026, ...], [ -0.4043, -0.4048, -0.4053, ...], [ -0.8086, -0.8096, -0.8105, ...], ... ], ... ] sum = -12.750732 ggml_debug: k-38 = (f16) VIEW(cache_k_l38{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ 0.1676, 0.1678, 0.1681, ...], [ 0.6704, 0.6714, 0.6724, ...], [ 2.6816, 2.6855, 2.6895, ...], ... ], [ [ 0.1989, 0.1991, 0.1993, ...], [ 0.7954, 0.7964, 0.7974, ...], [ 3.1816, 3.1855, 3.1895, ...], ... ], [ [ 0.2301, 0.2303, 0.2306, ...], [ 0.9204, 0.9214, 0.9224, ...], [ 3.6816, 3.6855, 3.6895, ...], ... ], ... ] sum = 37.629272 ggml_debug: q-38 = (f32) PERMUTE(Qcur-38{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ 0.0176, 4.0176, 8.0176, ...], [24576.0176, 24580.0176, 24584.0176, ...], [49152.0195, 49156.0195, 49160.0195, ...], ], [ [512.0176, 516.0176, 520.0176, ...], [25088.0176, 25092.0176, 25096.0176, ...], [49664.0195, 49668.0195, 49672.0195, ...], ], [ [1024.0176, 1028.0176, 1032.0176, ...], [25600.0176, 25604.0176, 25608.0176, ...], [50176.0195, 50180.0195, 50184.0195, ...], ], ... ] sum = 677484.562500 ggml_debug: kq-38 = (f32) MUL_MAT(k-38{128, 32, 8, 1}, q-38{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ 5.0664, 9.0664, 13.0664, ...], [133.0664, 137.0664, 141.0664, ...], [261.0664, 265.0664, 269.0664, ...], ], [ [389.0664, 393.0664, 397.0664, ...], [517.0664, 521.0664, 525.0664, ...], [645.0664, 649.0664, 653.0664, ...], ], [ [773.0664, 777.0664, 781.0664, ...], [901.0664, 905.0664, 909.0664, ...], [1029.0664, 1033.0664, 1037.0664, ...], ], ... ] sum = 14068.792969 ggml_debug: kq_soft_max_ext-38 = (f32) SOFT_MAX(kq-38{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-38 = (f32) MUL_MAT(v-38{32, 128, 8, 1}, kq_soft_max_ext-38{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.2021, 3.7979, 7.7979, ...], [511.7979, 515.7979, 519.7979, ...], [1023.7979, 1027.7979, 1031.7979, ...], ], [ [1535.7979, 1539.7979, 1543.7979, ...], [2047.7979, 2051.7979, 2055.7979, ...], [2559.7979, 2563.7979, 2567.7979, ...], ], [ [3071.7979, 3075.7979, 3079.7979, ...], [3583.7979, 3587.7979, 3591.7979, ...], [4095.7979, 4099.7979, 4103.7979, ...], ], ... ] sum = 55398.531250 ggml_debug: kqv_merged-38 = (f32) PERMUTE(kqv-38{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.2021, 3.7979, 7.7979, ...], [1535.7979, 1539.7979, 1543.7979, ...], [3071.7979, 3075.7979, 3079.7979, ...], ... ], [ [511.7979, 515.7979, 519.7979, ...], [2047.7979, 2051.7979, 2055.7979, ...], [3583.7979, 3587.7979, 3591.7979, ...], ... ], [ [1023.7979, 1027.7979, 1031.7979, ...], [2559.7979, 2563.7979, 2567.7979, ...], [4095.7979, 4099.7979, 4103.7979, ...], ... ], ] sum = 55398.527344 ggml_debug: kqv_merged_cont-38 = (f32) CONT(kqv_merged-38{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.2021, 3.7979, 7.7979, ...], [24575.7969, 24579.7969, 24583.7969, ...], [49151.7969, 49155.7969, 49159.7969, ...], ], ] sum = 221218.171875 ggml_debug: kqv_out-38 = (f32) MUL_MAT(blk.38.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-38{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2736, 4.2736, 8.2736, ...], [24576.2734, 24580.2734, 24584.2734, ...], [49152.2734, 49156.2734, 49160.2734, ...], ], ] sum = 221222.468750 ggml_debug: norm-38 = (f32) NORM(kqv_out-38{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.3383, 4.3383, 8.3383, ...], [24576.3379, 24580.3379, 24584.3379, ...], [49152.3398, 49156.3398, 49160.3398, ...], ], ] sum = 221223.062500 ggml_debug: attn_out_norm-38 = (f32) MUL(norm-38{6144, 3, 1, 1}, blk.38.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2445, 4.2445, 8.2445, ...], [24576.2441, 24580.2441, 24584.2441, ...], [49152.2461, 49156.2461, 49160.2461, ...], ], ] sum = 221222.218750 ggml_debug: ffn_moe_logits-38 = (f32) MUL_MAT(blk.38.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {16, 3, 1, 1} [ [ [ -0.9321, 3.0679, 7.0679, ...], [ 63.0679, 67.0679, 71.0679, ...], [127.0679, 131.0679, 135.0679, ...], ], ] sum = 603.610840 ggml_debug: ffn_moe_probs-38 = (f32) SOFT_MAX(ffn_moe_logits-38{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 0.0203, 4.0203, 8.0203, ...], [ 64.0203, 68.0203, 72.0203, ...], [128.0203, 132.0203, 136.0203, ...], ], ] sum = 612.182617 ggml_debug: ffn_moe_argsort-38 = (i32) ARGSORT(ffn_moe_probs-38{16, 3, 1, 1}, }) = {16, 3, 1, 1} [ [ [ 6.0000, 10.0000, 14.0000, ...], [ 70.0000, 74.0000, 78.0000, ...], [134.0000, 138.0000, 142.0000, ...], ], ] sum = 666.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-38{16, 3, 1, 1}, }) = {4, 3, 1, 1} [ [ [ 6.0000, 10.0000, 14.0000, ...], [ 70.0000, 74.0000, 78.0000, ...], [134.0000, 138.0000, 142.0000, ...], ], ] sum = 666.000000 ggml_debug: ffn_moe_up-38 = (f32) MUL_MAT_ID(blk.38.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5133, 3.4867, 7.4867, ...], [43007.4883, 43011.4883, 43015.4883, ...], [86015.4844, 86019.4844, 86023.4844, ...], ], ] sum = 387103.375000 ggml_debug: ffn_moe_gate-38 = (f32) MUL_MAT_ID(blk.38.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 1.4209, 5.4209, 9.4209, ...], [43009.4219, 43013.4219, 43017.4219, ...], [86017.4219, 86021.4219, 86025.4219, ...], ], ] sum = 387120.812500 ggml_debug: ffn_moe_silu-38 = (f32) UNARY(ffn_moe_gate-38{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 1.1445, 5.1445, 9.1445, ...], [43009.1445, 43013.1445, 43017.1445, ...], [86017.1406, 86021.1406, 86025.1406, ...], ], ] sum = 387118.250000 ggml_debug: ffn_moe_gate_par-38 = (f32) MUL(ffn_moe_up-38{10752, 3, 1, 1}, ffn_moe_silu-38{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.5875, 3.4125, 7.4125, ...], [43007.4141, 43011.4141, 43015.4141, ...], [86015.4141, 86019.4141, 86023.4141, ...], ], ] sum = 387102.718750 ggml_debug: ffn_moe_down-38 = (f32) MUL_MAT_ID(blk.38.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-38{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.6985, 4.6985, 8.6985, ...], [24576.6992, 24580.6992, 24584.6992, ...], [49152.6992, 49156.6992, 49160.6992, ...], ], ] sum = 221226.296875 ggml_debug: ffn_moe_probs-38 (reshaped) = (f32) RESHAPE(ffn_moe_probs-38{16, 3, 1, 1}, }) = {1, 16, 3, 1} [ [ [ 0.0203], [ 4.0203], [ 8.0203], ... ], [ [ 64.0203], [ 68.0203], [ 72.0203], ... ], [ [128.0203], [132.0203], [136.0203], ... ], ] sum = 612.182617 ggml_debug: ffn_moe_weights-38 = (f32) GET_ROWS(ffn_moe_probs-38 (reshaped){1, 16, 3, 1}, (view){4, 3, 1, 1}}) = {1, 4, 3, 1} [ [ [ 0.1376], [ 4.1376], [ 8.1376], ... ], [ [ 16.1376], [ 20.1376], [ 24.1376], ... ], [ [ 32.1376], [ 36.1376], [ 40.1376], ... ], ] sum = 181.238770 ggml_debug: ffn_moe_weights-38 (reshaped) = (f32) RESHAPE(ffn_moe_weights-38{1, 4, 3, 1}, }) = {4, 3, 1, 1} [ [ [ 0.1376, 4.1376, 8.1376, ...], [ 16.1376, 20.1376, 24.1376, ...], [ 32.1376, 36.1376, 40.1376, ...], ], ] sum = 181.238770 ggml_debug: ffn_moe_weights_sum-38 = (f32) SUM_ROWS(ffn_moe_weights-38 (reshaped){4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.4336], [ 4.4336], [ 8.4336], ], ] sum = 13.300925 ggml_debug: ffn_moe_weights_norm-38 = (f32) DIV(ffn_moe_weights-38 (reshaped){4, 3, 1, 1}, ffn_moe_weights_sum-38{1, 3, 1, 1}}) = {4, 3, 1, 1} [ [ [ 0.3174, 4.3174, 8.3174, ...], [ 16.3174, 20.3174, 24.3174, ...], [ 32.3174, 36.3174, 40.3174, ...], ], ] sum = 182.856705 ggml_debug: ffn_moe_weights_norm-38 (view) = (f32) VIEW(ffn_moe_weights_norm-38{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3174], [ 16.3174], [ 32.3174], ], ] sum = 48.952229 ggml_debug: ffn_moe_weighted-38 = (f32) MUL(ffn_moe_down-38{6144, 3, 1, 1}, ffn_moe_weights_norm-38 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.2217, 4.2217, 8.2217, ...], [24576.2227, 24580.2227, 24584.2227, ...], [49152.2227, 49156.2227, 49160.2227, ...], ], ] sum = 221221.984375 ggml_debug: ffn_moe_up-38 = (f32) MUL_MAT_ID(blk.38.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.3720, 4.3720, 8.3720, ...], [43008.3711, 43012.3711, 43016.3711, ...], [86016.3750, 86020.3750, 86024.3750, ...], ], ] sum = 387111.375000 ggml_debug: ffn_moe_gate-38 = (f32) MUL_MAT_ID(blk.38.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.8963, 4.8963, 8.8963, ...], [43008.8945, 43012.8945, 43016.8945, ...], [86016.8984, 86020.8984, 86024.8984, ...], ], ] sum = 387116.093750 ggml_debug: ffn_moe_silu-38 = (f32) UNARY(ffn_moe_gate-38{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.6365, 4.6365, 8.6365, ...], [43008.6367, 43012.6367, 43016.6367, ...], [86016.6328, 86020.6328, 86024.6328, ...], ], ] sum = 387113.718750 ggml_debug: ffn_moe_gate_par-38 = (f32) MUL(ffn_moe_up-38{10752, 3, 1, 1}, ffn_moe_silu-38{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.2368, 4.2368, 8.2368, ...], [43008.2383, 43012.2383, 43016.2383, ...], [86016.2344, 86020.2344, 86024.2344, ...], ], ] sum = 387110.125000 ggml_debug: ffn_moe_down-38 = (f32) MUL_MAT_ID(blk.38.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-38{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.7466, 3.2534, 7.2534, ...], [24575.2539, 24579.2539, 24583.2539, ...], [49151.2539, 49155.2539, 49159.2539, ...], ], ] sum = 221213.265625 ggml_debug: ffn_moe_weights_norm-38 (view) = (f32) VIEW(ffn_moe_weights_norm-38{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.3081], [ 16.3081], [ 32.3081], ], ] sum = 48.924286 ggml_debug: ffn_moe_weighted-38 = (f32) MUL(ffn_moe_down-38{6144, 3, 1, 1}, ffn_moe_weights_norm-38 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.2300, 3.7700, 7.7700, ...], [24575.7695, 24579.7695, 24583.7695, ...], [49151.7695, 49155.7695, 49159.7695, ...], ], ] sum = 221217.921875 ggml_debug: ffn_moe_out-38 = (f32) ADD(ffn_moe_weighted-38{6144, 3, 1, 1}, ffn_moe_weighted-38{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0083, 3.9917, 7.9917, ...], [24575.9922, 24579.9922, 24583.9922, ...], [49151.9922, 49155.9922, 49159.9922, ...], ], ] sum = 221219.937500 ggml_debug: ffn_moe_up-38 = (f32) MUL_MAT_ID(blk.38.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.6130, 4.6130, 8.6130, ...], [43008.6133, 43012.6133, 43016.6133, ...], [86016.6094, 86020.6094, 86024.6094, ...], ], ] sum = 387113.500000 ggml_debug: ffn_moe_gate-38 = (f32) MUL_MAT_ID(blk.38.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0820, 4.0820, 8.0820, ...], [43008.0820, 43012.0820, 43016.0820, ...], [86016.0781, 86020.0781, 86024.0781, ...], ], ] sum = 387108.687500 ggml_debug: ffn_moe_silu-38 = (f32) UNARY(ffn_moe_gate-38{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.0427, 4.0427, 8.0427, ...], [43008.0430, 43012.0430, 43016.0430, ...], [86016.0391, 86020.0391, 86024.0391, ...], ], ] sum = 387108.375000 ggml_debug: ffn_moe_gate_par-38 = (f32) MUL(ffn_moe_up-38{10752, 3, 1, 1}, ffn_moe_silu-38{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.0261, 4.0261, 8.0261, ...], [43008.0273, 43012.0273, 43016.0273, ...], [86016.0234, 86020.0234, 86024.0234, ...], ], ] sum = 387108.250000 ggml_debug: ffn_moe_down-38 = (f32) MUL_MAT_ID(blk.38.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-38{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.4155, 3.5845, 7.5845, ...], [24575.5840, 24579.5840, 24583.5840, ...], [49151.5859, 49155.5859, 49159.5859, ...], ], ] sum = 221216.281250 ggml_debug: ffn_moe_weights_norm-38 (view) = (f32) VIEW(ffn_moe_weights_norm-38{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1894], [ 16.1894], [ 32.1894], ], ] sum = 48.568188 ggml_debug: ffn_moe_weighted-38 = (f32) MUL(ffn_moe_down-38{6144, 3, 1, 1}, ffn_moe_weights_norm-38 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0787, 3.9213, 7.9213, ...], [24575.9219, 24579.9219, 24583.9219, ...], [49151.9219, 49155.9219, 49159.9219, ...], ], ] sum = 221219.296875 ggml_debug: ffn_moe_out-38 = (f32) ADD(ffn_moe_out-38{6144, 3, 1, 1}, ffn_moe_weighted-38{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ -0.0870, 3.9130, 7.9130, ...], [24575.9121, 24579.9121, 24583.9121, ...], [49151.9141, 49155.9141, 49159.9141, ...], ], ] sum = 221219.218750 ggml_debug: ffn_moe_up-38 = (f32) MUL_MAT_ID(blk.38.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.3904, 3.6096, 7.6096, ...], [43007.6094, 43011.6094, 43015.6094, ...], [86015.6094, 86019.6094, 86023.6094, ...], ], ] sum = 387104.500000 ggml_debug: ffn_moe_gate-38 = (f32) MUL_MAT_ID(blk.38.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-38{6144, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ 0.7866, 4.7866, 8.7866, ...], [43008.7852, 43012.7852, 43016.7852, ...], [86016.7891, 86020.7891, 86024.7891, ...], ], ] sum = 387115.062500 ggml_debug: ffn_moe_silu-38 = (f32) UNARY(ffn_moe_gate-38{10752, 3, 1, 1}, }) = {10752, 3, 1, 1} [ [ [ 0.5405, 4.5405, 8.5405, ...], [43008.5391, 43012.5391, 43016.5391, ...], [86016.5391, 86020.5391, 86024.5391, ...], ], ] sum = 387112.843750 ggml_debug: ffn_moe_gate_par-38 = (f32) MUL(ffn_moe_up-38{10752, 3, 1, 1}, ffn_moe_silu-38{10752, 3, 1, 1}}) = {10752, 3, 1, 1} [ [ [ -0.2110, 3.7890, 7.7890, ...], [43007.7891, 43011.7891, 43015.7891, ...], [86015.7891, 86019.7891, 86023.7891, ...], ], ] sum = 387106.093750 ggml_debug: ffn_moe_down-38 = (f32) MUL_MAT_ID(blk.38.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-38{10752, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.4837, 4.4837, 8.4837, ...], [24576.4844, 24580.4844, 24584.4844, ...], [49152.4844, 49156.4844, 49160.4844, ...], ], ] sum = 221224.359375 ggml_debug: ffn_moe_weights_norm-38 (view) = (f32) VIEW(ffn_moe_weights_norm-38{4, 3, 1, 1}, }) = {1, 3, 1, 1} [ [ [ 0.1851], [ 16.1851], [ 32.1851], ], ] sum = 48.555294 ggml_debug: ffn_moe_weighted-38 = (f32) MUL(ffn_moe_down-38{6144, 3, 1, 1}, ffn_moe_weights_norm-38 (view){1, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0895, 4.0895, 8.0895, ...], [24576.0898, 24580.0898, 24584.0898, ...], [49152.0898, 49156.0898, 49160.0898, ...], ], ] sum = 221220.812500 ggml_debug: ffn_moe_out-38 = (f32) ADD(ffn_moe_out-38{6144, 3, 1, 1}, ffn_moe_weighted-38{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.0025, 4.0025, 8.0025, ...], [24576.0020, 24580.0020, 24584.0020, ...], [49152.0039, 49156.0039, 49160.0039, ...], ], ] sum = 221220.015625 ggml_debug: ffn_inp-38 = (f32) ADD(kqv_out-38{6144, 3, 1, 1}, l_out-37{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 3.2392, 7.2392, 11.2392, ...], [24579.2383, 24583.2383, 24587.2383, ...], [49155.2383, 49159.2383, 49163.2383, ...], ], ] sum = 221249.140625 ggml_debug: l_out-38 = (f32) ADD(ffn_moe_out-38{6144, 3, 1, 1}, ffn_inp-38{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 3.2417, 7.2417, 11.2417, ...], [24579.2422, 24583.2422, 24587.2422, ...], [49155.2422, 49159.2422, 49163.2422, ...], ], ] sum = 221249.187500 ggml_debug: norm-39 = (f32) NORM(l_out-38{6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ 0.7301, 4.7301, 8.7301, ...], [24576.7305, 24580.7305, 24584.7305, ...], [49152.7305, 49156.7305, 49160.7305, ...], ], ] sum = 221226.578125 ggml_debug: attn_norm-39 = (f32) MUL(norm-39{6144, 3, 1, 1}, blk.39.attn_norm.weight{6144, 1, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.1041, 4.1041, 8.1041, ...], [24576.1035, 24580.1035, 24584.1035, ...], [49152.1055, 49156.1055, 49160.1055, ...], ], ] sum = 221220.953125 ggml_debug: wqkv-39 = (f32) MUL_MAT(blk.39.attn_qkv.weight{6144, 8192, 1, 1}, attn_norm-39{6144, 3, 1, 1}}) = {8192, 3, 1, 1} [ [ [ -0.0918, 3.9082, 7.9082, ...], [32767.9082, 32771.9062, 32775.9062, ...], [65535.9062, 65539.9062, 65543.9062, ...], ], ] sum = 294947.156250 ggml_debug: wqkv_clamped-39 = (f32) CLAMP(wqkv-39{8192, 3, 1, 1}, }) = {8192, 3, 1, 1} [ [ [ -0.0918, 3.9082, 7.9082, ...], [32767.9082, 32771.9062, 32775.9062, ...], [65535.9062, 65539.9062, 65543.9062, ...], ], ] sum = 294947.156250 ggml_debug: wqkv_clamped-39 (view) = (f32) VIEW(wqkv_clamped-39{8192, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0918, 3.9082, 7.9082, ...], [32767.9082, 32771.9062, 32775.9062, ...], [65535.9062, 65539.9062, 65543.9062, ...], ], ] sum = 294947.156250 ggml_debug: Qcur-39 = (f32) CONT(wqkv_clamped-39 (view){6144, 3, 1, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.0918, 3.9082, 7.9082, ...], [24575.9082, 24579.9082, 24583.9082, ...], [49151.9062, 49155.9062, 49159.9062, ...], ], ] sum = 221219.156250 ggml_debug: Qcur-39 (reshaped) = (f32) RESHAPE(Qcur-39{6144, 3, 1, 1}, }) = {128, 48, 3, 1} [ [ [ -0.0918, 3.9082, 7.9082, ...], [511.9082, 515.9082, 519.9082, ...], [1023.9082, 1027.9082, 1031.9082, ...], ... ], [ [24575.9082, 24579.9082, 24583.9082, ...], [25087.9082, 25091.9082, 25095.9082, ...], [25599.9082, 25603.9082, 25607.9082, ...], ... ], [ [49151.9062, 49155.9062, 49159.9062, ...], [49663.9062, 49667.9062, 49671.9062, ...], [50175.9062, 50179.9062, 50183.9062, ...], ... ], ] sum = 677481.375000 ggml_debug: Qcur-39 = (f32) ROPE(Qcur-39 (reshaped){128, 48, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 48, 3, 1} [ [ [ -0.0918, 3.9082, 7.9082, ...], [511.9082, 515.9082, 519.9082, ...], [1023.9082, 1027.9082, 1031.9082, ...], ... ], [ [24575.9082, 24579.9082, 24583.9082, ...], [25087.9082, 25091.9082, 25095.9082, ...], [25599.9082, 25603.9082, 25607.9082, ...], ... ], [ [49151.9062, 49155.9062, 49159.9062, ...], [49663.9062, 49667.9062, 49671.9062, ...], [50175.9062, 50179.9062, 50183.9062, ...], ... ], ] sum = 677481.375000 ggml_debug: wqkv_clamped-39 (view) = (f32) VIEW(wqkv_clamped-39{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.8865, 3.1135, 7.1135, ...], [32767.1133, 32771.1133, 32775.1133, ...], [65535.1133, 65539.1172, 65543.1172, ...], ], ] sum = 294940.031250 ggml_debug: Kcur-39 = (f32) CONT(wqkv_clamped-39 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.8865, 3.1135, 7.1135, ...], [4095.1135, 4099.1138, 4103.1138, ...], [8191.1138, 8195.1133, 8199.1133, ...], ], ] sum = 36892.023438 ggml_debug: Kcur-39 (reshaped) = (f32) RESHAPE(Kcur-39{1024, 3, 1, 1}, }) = {128, 8, 3, 1} [ [ [ -0.8865, 3.1135, 7.1135, ...], [511.1135, 515.1135, 519.1135, ...], [1023.1135, 1027.1135, 1031.1135, ...], ... ], [ [4095.1135, 4099.1138, 4103.1138, ...], [4607.1138, 4611.1138, 4615.1138, ...], [5119.1138, 5123.1138, 5127.1138, ...], ... ], [ [8191.1138, 8195.1133, 8199.1133, ...], [8703.1133, 8707.1133, 8711.1133, ...], [9215.1133, 9219.1133, 9223.1133, ...], ... ], ] sum = 124500.046875 ggml_debug: Kcur-39 = (f32) ROPE(Kcur-39 (reshaped){128, 8, 3, 1}, CUDA2#inp_pos#0{3, 1, 1, 1}}) = {128, 8, 3, 1} [ [ [ -0.8865, 3.1135, 7.1135, ...], [511.1135, 515.1135, 519.1135, ...], [1023.1135, 1027.1135, 1031.1135, ...], ... ], [ [4095.1135, 4099.1138, 4103.1138, ...], [4607.1138, 4611.1138, 4615.1138, ...], [5119.1138, 5123.1138, 5127.1138, ...], ... ], [ [8191.1138, 8195.1133, 8199.1133, ...], [8703.1133, 8707.1133, 8711.1133, ...], [9215.1133, 9219.1133, 9223.1133, ...], ... ], ] sum = 124500.046875 ggml_debug: wqkv_clamped-39 (view) = (f32) VIEW(wqkv_clamped-39{8192, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1648, 3.8352, 7.8352, ...], [32767.8359, 32771.8359, 32775.8359, ...], [65535.8359, 65539.8359, 65543.8359, ...], ], ] sum = 294946.531250 ggml_debug: Vcur-39 = (f32) CONT(wqkv_clamped-39 (view){1024, 3, 1, 1}, }) = {1024, 3, 1, 1} [ [ [ -0.1648, 3.8352, 7.8352, ...], [4095.8352, 4099.8350, 4103.8350, ...], [8191.8350, 8195.8350, 8199.8350, ...], ], ] sum = 36898.515625 ggml_debug: k_cache_view-39 = (f16) VIEW(cache_k_l39{524288, 1, 1, 1}, }) = {3072, 1, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000, ...], ], ] sum = 0.000000 ggml_debug: k_cache_view-39 (copy of Kcur-39) = (f16) CPY(Kcur-39{128, 8, 3, 1}, k_cache_view-39{3072, 1, 1, 1}}) = {3072, 1, 1, 1} [ [ [ -0.8862, -0.8872, -0.8882, ...], ], ] sum = -2.661621 ggml_debug: v_cur_t-39 = (f32) TRANSPOSE(Vcur-39{1024, 3, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ -0.1648, 4095.8352, 8191.8350], [ 3.8352, 4099.8350, 8195.8350], [ 7.8352, 4103.8350, 8199.8350], ... ], ] sum = 36898.519531 ggml_debug: v_cache_view-39 = (f16) VIEW(cache_v_l39{524288, 1, 1, 1}, }) = {3, 1024, 1, 1} [ [ [ 0.0000, 0.0000, 0.0000], [ 0.0001, 0.0001, 0.0001], [ 0.0001, 0.0001, 0.0001], ... ], ] sum = 0.000551 ggml_debug: v_cache_view-39 (copy of v_cur_t-39) = (f16) CPY(v_cur_t-39{3, 1024, 1, 1}, v_cache_view-39{3, 1024, 1, 1}}) = {3, 1024, 1, 1} [ [ [ -0.1648, -0.1650, -0.1653], [ -0.3296, -0.3301, -0.3306], [ -0.6592, -0.6602, -0.6611], ... ], ] sum = -3.465820 ggml_debug: v-39 = (f16) VIEW(cache_v_l39{524288, 1, 1, 1}, }) = {32, 128, 8, 1} [ [ [ -0.1648, -0.1650, -0.1653, ...], [ -0.3296, -0.3301, -0.3306, ...], [ -0.6592, -0.6602, -0.6611, ...], ... ], [ [ -0.1648, -0.1650, -0.1653, ...], [ -0.3296, -0.3301, -0.3306, ...], [ -0.6592, -0.6602, -0.6611, ...], ... ], [ [ -0.1648, -0.1650, -0.1653, ...], [ -0.3296, -0.3301, -0.3306, ...], [ -0.6592, -0.6602, -0.6611, ...], ... ], ... ] sum = -10.397461 ggml_debug: k-39 = (f16) VIEW(cache_k_l39{524288, 1, 1, 1}, }) = {128, 32, 8, 1} [ [ [ -0.8862, -0.8872, -0.8882, ...], [ -3.5449, -3.5488, -3.5527, ...], [-14.1797, -14.1953, -14.2109, ...], ... ], [ [ -1.0225, -1.0244, -1.0264, ...], [ -4.0898, -4.0977, -4.1055, ...], [-16.3594, -16.3906, -16.4219, ...], ... ], [ [ -1.2725, -1.2744, -1.2764, ...], [ -5.0898, -5.0977, -5.1055, ...], [-20.3594, -20.3906, -20.4219, ...], ... ], ... ] sum = -200.720215 ggml_debug: q-39 = (f32) PERMUTE(Qcur-39{128, 48, 3, 1}, }) = {128, 3, 48, 1} [ [ [ -0.0918, 3.9082, 7.9082, ...], [24575.9082, 24579.9082, 24583.9082, ...], [49151.9062, 49155.9062, 49159.9062, ...], ], [ [511.9082, 515.9082, 519.9082, ...], [25087.9082, 25091.9082, 25095.9082, ...], [49663.9062, 49667.9062, 49671.9062, ...], ], [ [1023.9082, 1027.9082, 1031.9082, ...], [25599.9082, 25603.9082, 25607.9082, ...], [50175.9062, 50179.9062, 50183.9062, ...], ], ... ] sum = 677481.375000 ggml_debug: kq-39 = (f32) MUL_MAT(k-39{128, 32, 8, 1}, q-39{128, 3, 48, 1}}) = {32, 3, 48, 1} [ [ [ -6.7812, -2.7812, 1.2188, ...], [121.2188, 125.2188, 129.2188, ...], [249.2188, 253.2188, 257.2188, ...], ], [ [377.2188, 381.2188, 385.2188, ...], [505.2188, 509.2188, 513.2188, ...], [633.2188, 637.2188, 641.2188, ...], ], [ [761.2188, 765.2188, 769.2188, ...], [889.2188, 893.2188, 897.2188, ...], [1017.2188, 1021.2188, 1025.2188, ...], ], ... ] sum = 13748.906250 ggml_debug: kq_soft_max_ext-39 = (f32) SOFT_MAX(kq-39{32, 3, 48, 1}, CUDA2#KQ_mask#0{32, 3, 1, 1}}) = {32, 3, 48, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], [129.0000, 133.0000, 137.0000, ...], [257.0000, 261.0000, 265.0000, ...], ], [ [385.0000, 389.0000, 393.0000, ...], [513.0000, 517.0000, 521.0000, ...], [641.0000, 645.0000, 649.0000, ...], ], [ [769.0000, 773.0000, 777.0000, ...], [897.0000, 901.0000, 905.0000, ...], [1025.0000, 1029.0000, 1033.0000, ...], ], ... ] sum = 13959.000000 ggml_debug: kqv-39 = (f32) MUL_MAT(v-39{32, 128, 8, 1}, kq_soft_max_ext-39{32, 3, 48, 1}}) = {128, 3, 48, 1} [ [ [ -0.1648, 3.8352, 7.8352, ...], [511.8352, 515.8352, 519.8352, ...], [1023.8352, 1027.8352, 1031.8352, ...], ], [ [1535.8352, 1539.8352, 1543.8352, ...], [2047.8352, 2051.8352, 2055.8352, ...], [2559.8352, 2563.8352, 2567.8352, ...], ], [ [3071.8352, 3075.8352, 3079.8352, ...], [3583.8352, 3587.8352, 3591.8352, ...], [4095.8352, 4099.8350, 4103.8350, ...], ], ... ] sum = 55399.554688 ggml_debug: kqv_merged-39 = (f32) PERMUTE(kqv-39{128, 3, 48, 1}, }) = {128, 48, 3, 1} [ [ [ -0.1648, 3.8352, 7.8352, ...], [1535.8352, 1539.8352, 1543.8352, ...], [3071.8352, 3075.8352, 3079.8352, ...], ... ], [ [511.8352, 515.8352, 519.8352, ...], [2047.8352, 2051.8352, 2055.8352, ...], [3583.8352, 3587.8352, 3591.8352, ...], ... ], [ [1023.8352, 1027.8352, 1031.8352, ...], [2559.8352, 2563.8352, 2567.8352, ...], [4095.8352, 4099.8350, 4103.8350, ...], ... ], ] sum = 55399.558594 ggml_debug: kqv_merged_cont-39 = (f32) CONT(kqv_merged-39{128, 48, 3, 1}, }) = {6144, 3, 1, 1} [ [ [ -0.1648, 3.8352, 7.8352, ...], [24575.8359, 24579.8359, 24583.8359, ...], [49151.8359, 49155.8359, 49159.8359, ...], ], ] sum = 221218.531250 ggml_debug: kqv_out-39 = (f32) MUL_MAT(blk.39.attn_output.weight{6144, 6144, 1, 1}, kqv_merged_cont-39{6144, 3, 1, 1}}) = {6144, 3, 1, 1} [ [ [ 0.9227, 4.9227, 8.9227, ...], [24576.9219, 24580.9219, 24584.9219, ...], [49152.9219, 49156.9219, 49160.9219, ...], ], ] sum = 221228.296875 ggml_debug: node_2837 = (f32) GET_ROWS(kqv_out-39{6144, 3, 1, 1}, CUDA2#inp_out_ids#0{1, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 1.1036, 5.1036, 9.1036, ...], ], ] sum = 15.310773 ggml_debug: norm-39 = (f32) NORM(node_2837{6144, 1, 1, 1}, }) = {6144, 1, 1, 1} [ [ [ 0.9503, 4.9503, 8.9503, ...], ], ] sum = 14.850772 ggml_debug: attn_out_norm-39 = (f32) MUL(norm-39{6144, 1, 1, 1}, blk.39.attn_output_norm.weight{6144, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.2524, 4.2524, 8.2524, ...], ], ] sum = 12.757236 ggml_debug: ffn_moe_logits-39 = (f32) MUL_MAT(blk.39.ffn_gate_inp.weight{6144, 16, 1, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {16, 1, 1, 1} [ [ [ 0.3570, 4.3570, 8.3570, ...], ], ] sum = 13.070887 ggml_debug: ffn_moe_probs-39 = (f32) SOFT_MAX(ffn_moe_logits-39{16, 1, 1, 1}, }) = {16, 1, 1, 1} [ [ [ 0.0823, 4.0823, 8.0823, ...], ], ] sum = 12.246909 ggml_debug: ffn_moe_argsort-39 = (i32) ARGSORT(ffn_moe_probs-39{16, 1, 1, 1}, }) = {16, 1, 1, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], ], ] sum = 15.000000 ggml_debug: (view) = (i32) VIEW(ffn_moe_argsort-39{16, 1, 1, 1}, }) = {4, 1, 1, 1} [ [ [ 1.0000, 5.0000, 9.0000, ...], ], ] sum = 15.000000 ggml_debug: ffn_moe_up-39 = (f32) MUL_MAT_ID(blk.39.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ -0.3611, 3.6389, 7.6389, ...], ], ] sum = 10.916842 ggml_debug: ffn_moe_gate-39 = (f32) MUL_MAT_ID(blk.39.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ 0.2834, 4.2834, 8.2834, ...], ], ] sum = 12.850127 ggml_debug: ffn_moe_silu-39 = (f32) UNARY(ffn_moe_gate-39{10752, 1, 1, 1}, }) = {10752, 1, 1, 1} [ [ [ 0.1616, 4.1616, 8.1616, ...], ], ] sum = 12.484891 ggml_debug: ffn_moe_gate_par-39 = (f32) MUL(ffn_moe_up-39{10752, 1, 1, 1}, ffn_moe_silu-39{10752, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ -0.0584, 3.9416, 7.9416, ...], ], ] sum = 11.824929 ggml_debug: ffn_moe_down-39 = (f32) MUL_MAT_ID(blk.39.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-39{10752, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.6239, 4.6239, 8.6239, ...], ], ] sum = 13.871774 ggml_debug: ffn_moe_probs-39 (reshaped) = (f32) RESHAPE(ffn_moe_probs-39{16, 1, 1, 1}, }) = {1, 16, 1, 1} [ [ [ 0.0823], [ 4.0823], [ 8.0823], ... ], ] sum = 12.246909 ggml_debug: ffn_moe_weights-39 = (f32) GET_ROWS(ffn_moe_probs-39 (reshaped){1, 16, 1, 1}, (view){4, 1, 1, 1}}) = {1, 4, 1, 1} [ [ [ 0.1874], [ 4.1874], [ 8.1874], ... ], ] sum = 12.562168 ggml_debug: ffn_moe_weights-39 (reshaped) = (f32) RESHAPE(ffn_moe_weights-39{1, 4, 1, 1}, }) = {4, 1, 1, 1} [ [ [ 0.1874, 4.1874, 8.1874, ...], ], ] sum = 12.562168 ggml_debug: ffn_moe_weights_sum-39 = (f32) SUM_ROWS(ffn_moe_weights-39 (reshaped){4, 1, 1, 1}, }) = {1, 1, 1, 1} [ [ [ 0.4304], ], ] sum = 0.430443 ggml_debug: ffn_moe_weights_norm-39 = (f32) DIV(ffn_moe_weights-39 (reshaped){4, 1, 1, 1}, ffn_moe_weights_sum-39{1, 1, 1, 1}}) = {4, 1, 1, 1} [ [ [ 0.4353, 4.4353, 8.4353, ...], ], ] sum = 13.306021 ggml_debug: ffn_moe_weights_norm-39 (view) = (f32) VIEW(ffn_moe_weights_norm-39{4, 1, 1, 1}, }) = {1, 1, 1, 1} [ [ [ 0.4353], ], ] sum = 0.435340 ggml_debug: ffn_moe_weighted-39 = (f32) MUL(ffn_moe_down-39{6144, 1, 1, 1}, ffn_moe_weights_norm-39 (view){1, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.2716, 4.2716, 8.2716, ...], ], ] sum = 12.814858 ggml_debug: ffn_moe_up-39 = (f32) MUL_MAT_ID(blk.39.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ -0.7513, 3.2487, 7.2487, ...], ], ] sum = 9.745995 ggml_debug: ffn_moe_gate-39 = (f32) MUL_MAT_ID(blk.39.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ 0.5736, 4.5736, 8.5736, ...], ], ] sum = 13.720694 ggml_debug: ffn_moe_silu-39 = (f32) UNARY(ffn_moe_gate-39{10752, 1, 1, 1}, }) = {10752, 1, 1, 1} [ [ [ 0.3668, 4.3668, 8.3668, ...], ], ] sum = 13.100530 ggml_debug: ffn_moe_gate_par-39 = (f32) MUL(ffn_moe_up-39{10752, 1, 1, 1}, ffn_moe_silu-39{10752, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ -0.2756, 3.7244, 7.7244, ...], ], ] sum = 11.173133 ggml_debug: ffn_moe_down-39 = (f32) MUL_MAT_ID(blk.39.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-39{10752, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.4760, 4.4760, 8.4760, ...], ], ] sum = 13.428031 ggml_debug: ffn_moe_weights_norm-39 (view) = (f32) VIEW(ffn_moe_weights_norm-39{4, 1, 1, 1}, }) = {1, 1, 1, 1} [ [ [ 0.2016], ], ] sum = 0.201589 ggml_debug: ffn_moe_weighted-39 = (f32) MUL(ffn_moe_down-39{6144, 1, 1, 1}, ffn_moe_weights_norm-39 (view){1, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.0960, 4.0960, 8.0960, ...], ], ] sum = 12.287876 ggml_debug: ffn_moe_out-39 = (f32) ADD(ffn_moe_weighted-39{6144, 1, 1, 1}, ffn_moe_weighted-39{6144, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.3676, 4.3676, 8.3676, ...], ], ] sum = 13.102734 ggml_debug: ffn_moe_up-39 = (f32) MUL_MAT_ID(blk.39.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ -0.2290, 3.7710, 7.7709, ...], ], ] sum = 11.312850 ggml_debug: ffn_moe_gate-39 = (f32) MUL_MAT_ID(blk.39.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ -0.2796, 3.7204, 7.7204, ...], ], ] sum = 11.161320 ggml_debug: ffn_moe_silu-39 = (f32) UNARY(ffn_moe_gate-39{10752, 1, 1, 1}, }) = {10752, 1, 1, 1} [ [ [ -0.1204, 3.8796, 7.8796, ...], ], ] sum = 11.638896 ggml_debug: ffn_moe_gate_par-39 = (f32) MUL(ffn_moe_up-39{10752, 1, 1, 1}, ffn_moe_silu-39{10752, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ 0.0276, 4.0276, 8.0276, ...], ], ] sum = 12.082711 ggml_debug: ffn_moe_down-39 = (f32) MUL_MAT_ID(blk.39.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-39{10752, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ -0.1279, 3.8721, 7.8721, ...], ], ] sum = 11.616323 ggml_debug: ffn_moe_weights_norm-39 (view) = (f32) VIEW(ffn_moe_weights_norm-39{4, 1, 1, 1}, }) = {1, 1, 1, 1} [ [ [ 0.1912], ], ] sum = 0.191205 ggml_debug: ffn_moe_weighted-39 = (f32) MUL(ffn_moe_down-39{6144, 1, 1, 1}, ffn_moe_weights_norm-39 (view){1, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ -0.0245, 3.9755, 7.9755, ...], ], ] sum = 11.926640 ggml_debug: ffn_moe_out-39 = (f32) ADD(ffn_moe_out-39{6144, 1, 1, 1}, ffn_moe_weighted-39{6144, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.3431, 4.3431, 8.3431, ...], ], ] sum = 13.029373 ggml_debug: ffn_moe_up-39 = (f32) MUL_MAT_ID(blk.39.ffn_up_exps.weight{6144, 10752, 16, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ 1.1885, 5.1885, 9.1885, ...], ], ] sum = 15.565513 ggml_debug: ffn_moe_gate-39 = (f32) MUL_MAT_ID(blk.39.ffn_gate_exps.weight{6144, 10752, 16, 1}, attn_out_norm-39{6144, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ -0.2902, 3.7098, 7.7098, ...], ], ] sum = 11.129391 ggml_debug: ffn_moe_silu-39 = (f32) UNARY(ffn_moe_gate-39{10752, 1, 1, 1}, }) = {10752, 1, 1, 1} [ [ [ -0.1242, 3.8758, 7.8758, ...], ], ] sum = 11.627419 ggml_debug: ffn_moe_gate_par-39 = (f32) MUL(ffn_moe_up-39{10752, 1, 1, 1}, ffn_moe_silu-39{10752, 1, 1, 1}}) = {10752, 1, 1, 1} [ [ [ -0.1476, 3.8524, 7.8524, ...], ], ] sum = 11.557186 ggml_debug: ffn_moe_down-39 = (f32) MUL_MAT_ID(blk.39.ffn_down_exps.weight{10752, 6144, 16, 1}, ffn_moe_gate_par-39{10752, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ -0.2449, 3.7551, 7.7551, ...], ], ] sum = 11.265266 ggml_debug: ffn_moe_weights_norm-39 (view) = (f32) VIEW(ffn_moe_weights_norm-39{4, 1, 1, 1}, }) = {1, 1, 1, 1} [ [ [ 0.1719], ], ] sum = 0.171865 ggml_debug: ffn_moe_weighted-39 = (f32) MUL(ffn_moe_down-39{6144, 1, 1, 1}, ffn_moe_weights_norm-39 (view){1, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ -0.0421, 3.9579, 7.9579, ...], ], ] sum = 11.873724 ggml_debug: ffn_moe_out-39 = (f32) ADD(ffn_moe_out-39{6144, 1, 1, 1}, ffn_moe_weighted-39{6144, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.3010, 4.3010, 8.3010, ...], ], ] sum = 12.903098 ggml_debug: node_2880 = (f32) GET_ROWS(l_out-38{6144, 3, 1, 1}, CUDA2#inp_out_ids#0{1, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 2.0253, 6.0253, 10.0253, ...], ], ] sum = 18.075966 ggml_debug: ffn_inp-39 = (f32) ADD(node_2837{6144, 1, 1, 1}, node_2880{6144, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 3.1289, 7.1289, 11.1289, ...], ], ] sum = 21.386742 ggml_debug: l_out-39 = (f32) ADD(ffn_moe_out-39{6144, 1, 1, 1}, ffn_inp-39{6144, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 3.4299, 7.4299, 11.4299, ...], ], ] sum = 22.289837 ggml_debug: norm = (f32) NORM(l_out-39{6144, 1, 1, 1}, }) = {6144, 1, 1, 1} [ [ [ 0.8942, 4.8942, 8.8942, ...], ], ] sum = 14.682661 ggml_debug: result_norm = (f32) MUL(norm{6144, 1, 1, 1}, output_norm.weight{6144, 1, 1, 1}}) = {6144, 1, 1, 1} [ [ [ 0.4366, 4.4366, 8.4366, ...], ], ] sum = 13.309893 ggml_debug: result_output = (f32) MUL_MAT(output.weight{6144, 100352, 1, 1}, result_norm{6144, 1, 1, 1}}) = {100352, 1, 1, 1} [ [ [ 10.5669, 14.5669, 18.5669, ...], ], ] sum = 43.700581