program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.2.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] { func main(tensor encoder_hidden_states, tensor sample, tensor timestep) { tensor var_25 = const()[name = tensor("op_25"), val = tensor(-1)]; tensor var_42_axes_0 = const()[name = tensor("op_42_axes_0"), val = tensor([1])]; tensor var_42_cast_fp16 = expand_dims(axes = var_42_axes_0, x = timestep)[name = tensor("op_42_cast_fp16")]; tensor var_44_to_fp16 = const()[name = tensor("op_44_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor emb_3_cast_fp16 = mul(x = var_42_cast_fp16, y = var_44_to_fp16)[name = tensor("emb_3_cast_fp16")]; tensor var_49_cast_fp16 = sin(x = emb_3_cast_fp16)[name = tensor("op_49_cast_fp16")]; tensor var_50_cast_fp16 = cos(x = emb_3_cast_fp16)[name = tensor("op_50_cast_fp16")]; tensor emb_interleave_0 = const()[name = tensor("emb_interleave_0"), val = tensor(false)]; tensor emb_cast_fp16 = concat(axis = var_25, interleave = emb_interleave_0, values = (var_49_cast_fp16, var_50_cast_fp16))[name = tensor("emb_cast_fp16")]; tensor var_54_begin_0 = const()[name = tensor("op_54_begin_0"), val = tensor([0, 160])]; tensor var_54_end_0 = const()[name = tensor("op_54_end_0"), val = tensor([2, 320])]; tensor var_54_end_mask_0 = const()[name = tensor("op_54_end_mask_0"), val = tensor([true, true])]; tensor var_54_cast_fp16 = slice_by_index(begin = var_54_begin_0, end = var_54_end_0, end_mask = var_54_end_mask_0, x = emb_cast_fp16)[name = tensor("op_54_cast_fp16")]; tensor var_56_begin_0 = const()[name = tensor("op_56_begin_0"), val = tensor([0, 0])]; tensor var_56_end_0 = const()[name = tensor("op_56_end_0"), val = tensor([2, 160])]; tensor var_56_end_mask_0 = const()[name = tensor("op_56_end_mask_0"), val = tensor([true, false])]; tensor var_56_cast_fp16 = slice_by_index(begin = var_56_begin_0, end = var_56_end_0, end_mask = var_56_end_mask_0, x = emb_cast_fp16)[name = tensor("op_56_cast_fp16")]; tensor sample_interleave_0 = const()[name = tensor("sample_interleave_0"), val = tensor(false)]; tensor sample_cast_fp16 = concat(axis = var_25, interleave = sample_interleave_0, values = (var_54_cast_fp16, var_56_cast_fp16))[name = tensor("sample_cast_fp16")]; tensor var_59 = const()[name = tensor("op_59"), val = tensor(1)]; tensor var_66_axes_0 = const()[name = tensor("op_66_axes_0"), val = tensor([-1])]; tensor var_66_cast_fp16 = expand_dims(axes = var_66_axes_0, x = sample_cast_fp16)[name = tensor("op_66_cast_fp16")]; tensor input_1_axes_0 = const()[name = tensor("input_1_axes_0"), val = tensor([-1])]; tensor input_1_cast_fp16 = expand_dims(axes = input_1_axes_0, x = var_66_cast_fp16)[name = tensor("input_1_cast_fp16")]; tensor var_70 = const()[name = tensor("op_70"), val = tensor([1, 1])]; tensor var_72 = const()[name = tensor("op_72"), val = tensor([1, 1])]; tensor input_3_pad_type_0 = const()[name = tensor("input_3_pad_type_0"), val = tensor("custom")]; tensor input_3_pad_0 = const()[name = tensor("input_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor time_embedding_linear_1_weight_to_fp16 = const()[name = tensor("time_embedding_linear_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(448)))]; tensor time_embedding_linear_1_bias_to_fp16 = const()[name = tensor("time_embedding_linear_1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(819712)))]; tensor input_3_cast_fp16 = conv(bias = time_embedding_linear_1_bias_to_fp16, dilations = var_72, groups = var_59, pad = input_3_pad_0, pad_type = input_3_pad_type_0, strides = var_70, weight = time_embedding_linear_1_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("input_3_cast_fp16")]; tensor input_5_cast_fp16 = silu(x = input_3_cast_fp16)[name = tensor("input_5_cast_fp16")]; tensor var_78 = const()[name = tensor("op_78"), val = tensor([1, 1])]; tensor var_80 = const()[name = tensor("op_80"), val = tensor([1, 1])]; tensor input_13_pad_type_0 = const()[name = tensor("input_13_pad_type_0"), val = tensor("custom")]; tensor input_13_pad_0 = const()[name = tensor("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor time_embedding_linear_2_weight_to_fp16 = const()[name = tensor("time_embedding_linear_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(822336)))]; tensor time_embedding_linear_2_bias_to_fp16 = const()[name = tensor("time_embedding_linear_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4099200)))]; tensor input_13_cast_fp16 = conv(bias = time_embedding_linear_2_bias_to_fp16, dilations = var_80, groups = var_59, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = var_78, weight = time_embedding_linear_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor var_86 = const()[name = tensor("op_86"), val = tensor(1)]; tensor var_89 = const()[name = tensor("op_89"), val = tensor([1, 1])]; tensor var_91 = const()[name = tensor("op_91"), val = tensor([1, 1])]; tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([1, 1, 1, 1])]; tensor conv_in_weight_to_fp16 = const()[name = tensor("conv_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4101824)))]; tensor conv_in_bias_to_fp16 = const()[name = tensor("conv_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4124928)))]; tensor input_7_cast_fp16 = conv(bias = conv_in_bias_to_fp16, dilations = var_91, groups = var_86, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_89, weight = conv_in_weight_to_fp16, x = sample)[name = tensor("input_7_cast_fp16")]; tensor var_95 = const()[name = tensor("op_95"), val = tensor(3)]; tensor var_123 = const()[name = tensor("op_123"), val = tensor(1)]; tensor reshape_0_shape_0 = const()[name = tensor("reshape_0_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_0_cast_fp16 = reshape(shape = reshape_0_shape_0, x = input_7_cast_fp16)[name = tensor("reshape_0_cast_fp16")]; tensor reduce_mean_0_axes_0 = const()[name = tensor("reduce_mean_0_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_0_keep_dims_0 = const()[name = tensor("reduce_mean_0_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_0_cast_fp16 = reduce_mean(axes = reduce_mean_0_axes_0, keep_dims = reduce_mean_0_keep_dims_0, x = reshape_0_cast_fp16)[name = tensor("reduce_mean_0_cast_fp16")]; tensor sub_0_cast_fp16 = sub(x = reshape_0_cast_fp16, y = reduce_mean_0_cast_fp16)[name = tensor("sub_0_cast_fp16")]; tensor square_0_cast_fp16 = square(x = sub_0_cast_fp16)[name = tensor("square_0_cast_fp16")]; tensor reduce_mean_2_axes_0 = const()[name = tensor("reduce_mean_2_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_2_keep_dims_0 = const()[name = tensor("reduce_mean_2_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_2_cast_fp16 = reduce_mean(axes = reduce_mean_2_axes_0, keep_dims = reduce_mean_2_keep_dims_0, x = square_0_cast_fp16)[name = tensor("reduce_mean_2_cast_fp16")]; tensor add_0_y_0_to_fp16 = const()[name = tensor("add_0_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_0_cast_fp16 = add(x = reduce_mean_2_cast_fp16, y = add_0_y_0_to_fp16)[name = tensor("add_0_cast_fp16")]; tensor sqrt_0_cast_fp16 = sqrt(x = add_0_cast_fp16)[name = tensor("sqrt_0_cast_fp16")]; tensor real_div_0_cast_fp16 = real_div(x = sub_0_cast_fp16, y = sqrt_0_cast_fp16)[name = tensor("real_div_0_cast_fp16")]; tensor reshape_1_shape_0 = const()[name = tensor("reshape_1_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_1_cast_fp16 = reshape(shape = reshape_1_shape_0, x = real_div_0_cast_fp16)[name = tensor("reshape_1_cast_fp16")]; tensor add_1_mean_0_to_fp16 = const()[name = tensor("add_1_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4125632)))]; tensor add_1_variance_0_to_fp16 = const()[name = tensor("add_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4126336)))]; tensor add_1_gamma_0_to_fp16 = const()[name = tensor("add_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4127040)))]; tensor add_1_beta_0_to_fp16 = const()[name = tensor("add_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4127744)))]; tensor add_1_epsilon_0_to_fp16 = const()[name = tensor("add_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_1_cast_fp16 = batch_norm(beta = add_1_beta_0_to_fp16, epsilon = add_1_epsilon_0_to_fp16, gamma = add_1_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_1_cast_fp16)[name = tensor("add_1_cast_fp16")]; tensor input_11_cast_fp16 = silu(x = add_1_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor var_145 = const()[name = tensor("op_145"), val = tensor([1, 1])]; tensor var_147 = const()[name = tensor("op_147"), val = tensor([1, 1])]; tensor hidden_states_1_pad_type_0 = const()[name = tensor("hidden_states_1_pad_type_0"), val = tensor("custom")]; tensor hidden_states_1_pad_0 = const()[name = tensor("hidden_states_1_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_0_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("down_blocks_0_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(4128448)))]; tensor down_blocks_0_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("down_blocks_0_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5971712)))]; tensor hidden_states_1_cast_fp16 = conv(bias = down_blocks_0_resnets_0_conv1_bias_to_fp16, dilations = var_147, groups = var_123, pad = hidden_states_1_pad_0, pad_type = hidden_states_1_pad_type_0, strides = var_145, weight = down_blocks_0_resnets_0_conv1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; tensor input_15_cast_fp16 = silu(x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor var_153 = const()[name = tensor("op_153"), val = tensor([1, 1])]; tensor var_155 = const()[name = tensor("op_155"), val = tensor([1, 1])]; tensor temb_1_pad_type_0 = const()[name = tensor("temb_1_pad_type_0"), val = tensor("custom")]; tensor temb_1_pad_0 = const()[name = tensor("temb_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("down_blocks_0_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(5972416)))]; tensor down_blocks_0_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("down_blocks_0_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6791680)))]; tensor temb_1_cast_fp16 = conv(bias = down_blocks_0_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_155, groups = var_123, pad = temb_1_pad_0, pad_type = temb_1_pad_type_0, strides = var_153, weight = down_blocks_0_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_1_cast_fp16")]; tensor input_17_cast_fp16 = add(x = hidden_states_1_cast_fp16, y = temb_1_cast_fp16)[name = tensor("input_17_cast_fp16")]; tensor reshape_4_shape_0 = const()[name = tensor("reshape_4_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_4_cast_fp16 = reshape(shape = reshape_4_shape_0, x = input_17_cast_fp16)[name = tensor("reshape_4_cast_fp16")]; tensor reduce_mean_3_axes_0 = const()[name = tensor("reduce_mean_3_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_3_keep_dims_0 = const()[name = tensor("reduce_mean_3_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_3_cast_fp16 = reduce_mean(axes = reduce_mean_3_axes_0, keep_dims = reduce_mean_3_keep_dims_0, x = reshape_4_cast_fp16)[name = tensor("reduce_mean_3_cast_fp16")]; tensor sub_2_cast_fp16 = sub(x = reshape_4_cast_fp16, y = reduce_mean_3_cast_fp16)[name = tensor("sub_2_cast_fp16")]; tensor square_1_cast_fp16 = square(x = sub_2_cast_fp16)[name = tensor("square_1_cast_fp16")]; tensor reduce_mean_5_axes_0 = const()[name = tensor("reduce_mean_5_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_5_keep_dims_0 = const()[name = tensor("reduce_mean_5_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_5_cast_fp16 = reduce_mean(axes = reduce_mean_5_axes_0, keep_dims = reduce_mean_5_keep_dims_0, x = square_1_cast_fp16)[name = tensor("reduce_mean_5_cast_fp16")]; tensor add_2_y_0_to_fp16 = const()[name = tensor("add_2_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_2_cast_fp16 = add(x = reduce_mean_5_cast_fp16, y = add_2_y_0_to_fp16)[name = tensor("add_2_cast_fp16")]; tensor sqrt_1_cast_fp16 = sqrt(x = add_2_cast_fp16)[name = tensor("sqrt_1_cast_fp16")]; tensor real_div_1_cast_fp16 = real_div(x = sub_2_cast_fp16, y = sqrt_1_cast_fp16)[name = tensor("real_div_1_cast_fp16")]; tensor reshape_5_shape_0 = const()[name = tensor("reshape_5_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_5_cast_fp16 = reshape(shape = reshape_5_shape_0, x = real_div_1_cast_fp16)[name = tensor("reshape_5_cast_fp16")]; tensor add_3_gamma_0_to_fp16 = const()[name = tensor("add_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6792384)))]; tensor add_3_beta_0_to_fp16 = const()[name = tensor("add_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6793088)))]; tensor add_3_epsilon_0_to_fp16 = const()[name = tensor("add_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_3_cast_fp16 = batch_norm(beta = add_3_beta_0_to_fp16, epsilon = add_3_epsilon_0_to_fp16, gamma = add_3_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_5_cast_fp16)[name = tensor("add_3_cast_fp16")]; tensor input_21_cast_fp16 = silu(x = add_3_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor var_165 = const()[name = tensor("op_165"), val = tensor([1, 1])]; tensor var_167 = const()[name = tensor("op_167"), val = tensor([1, 1])]; tensor hidden_states_3_pad_type_0 = const()[name = tensor("hidden_states_3_pad_type_0"), val = tensor("custom")]; tensor hidden_states_3_pad_0 = const()[name = tensor("hidden_states_3_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_0_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("down_blocks_0_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6793792)))]; tensor down_blocks_0_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("down_blocks_0_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8637056)))]; tensor hidden_states_3_cast_fp16 = conv(bias = down_blocks_0_resnets_0_conv2_bias_to_fp16, dilations = var_167, groups = var_123, pad = hidden_states_3_pad_0, pad_type = hidden_states_3_pad_type_0, strides = var_165, weight = down_blocks_0_resnets_0_conv2_weight_to_fp16, x = input_21_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = input_7_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; tensor reshape_8_shape_0 = const()[name = tensor("reshape_8_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_8_cast_fp16 = reshape(shape = reshape_8_shape_0, x = hidden_states_5_cast_fp16)[name = tensor("reshape_8_cast_fp16")]; tensor reduce_mean_6_axes_0 = const()[name = tensor("reduce_mean_6_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_6_keep_dims_0 = const()[name = tensor("reduce_mean_6_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_6_cast_fp16 = reduce_mean(axes = reduce_mean_6_axes_0, keep_dims = reduce_mean_6_keep_dims_0, x = reshape_8_cast_fp16)[name = tensor("reduce_mean_6_cast_fp16")]; tensor sub_4_cast_fp16 = sub(x = reshape_8_cast_fp16, y = reduce_mean_6_cast_fp16)[name = tensor("sub_4_cast_fp16")]; tensor square_2_cast_fp16 = square(x = sub_4_cast_fp16)[name = tensor("square_2_cast_fp16")]; tensor reduce_mean_8_axes_0 = const()[name = tensor("reduce_mean_8_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_8_keep_dims_0 = const()[name = tensor("reduce_mean_8_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_8_cast_fp16 = reduce_mean(axes = reduce_mean_8_axes_0, keep_dims = reduce_mean_8_keep_dims_0, x = square_2_cast_fp16)[name = tensor("reduce_mean_8_cast_fp16")]; tensor add_4_y_0_to_fp16 = const()[name = tensor("add_4_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_4_cast_fp16 = add(x = reduce_mean_8_cast_fp16, y = add_4_y_0_to_fp16)[name = tensor("add_4_cast_fp16")]; tensor sqrt_2_cast_fp16 = sqrt(x = add_4_cast_fp16)[name = tensor("sqrt_2_cast_fp16")]; tensor real_div_2_cast_fp16 = real_div(x = sub_4_cast_fp16, y = sqrt_2_cast_fp16)[name = tensor("real_div_2_cast_fp16")]; tensor reshape_9_shape_0 = const()[name = tensor("reshape_9_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_9_cast_fp16 = reshape(shape = reshape_9_shape_0, x = real_div_2_cast_fp16)[name = tensor("reshape_9_cast_fp16")]; tensor add_5_gamma_0_to_fp16 = const()[name = tensor("add_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8637760)))]; tensor add_5_beta_0_to_fp16 = const()[name = tensor("add_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8638464)))]; tensor add_5_epsilon_0_to_fp16 = const()[name = tensor("add_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_5_cast_fp16 = batch_norm(beta = add_5_beta_0_to_fp16, epsilon = add_5_epsilon_0_to_fp16, gamma = add_5_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_9_cast_fp16)[name = tensor("add_5_cast_fp16")]; tensor var_187 = const()[name = tensor("op_187"), val = tensor([1, 1])]; tensor var_189 = const()[name = tensor("op_189"), val = tensor([1, 1])]; tensor hidden_states_7_pad_type_0 = const()[name = tensor("hidden_states_7_pad_type_0"), val = tensor("custom")]; tensor hidden_states_7_pad_0 = const()[name = tensor("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_proj_in_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8639168)))]; tensor down_blocks_0_attentions_0_proj_in_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8844032)))]; tensor hidden_states_7_cast_fp16 = conv(bias = down_blocks_0_attentions_0_proj_in_bias_to_fp16, dilations = var_189, groups = var_123, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_187, weight = down_blocks_0_attentions_0_proj_in_weight_to_fp16, x = add_5_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; tensor var_194 = const()[name = tensor("op_194"), val = tensor([2, 320, 1, 4096])]; tensor inputs_1_cast_fp16 = reshape(shape = var_194, x = hidden_states_7_cast_fp16)[name = tensor("inputs_1_cast_fp16")]; tensor hidden_states_9_axes_0 = const()[name = tensor("hidden_states_9_axes_0"), val = tensor([1])]; tensor hidden_states_9_gamma_0_to_fp16 = const()[name = tensor("hidden_states_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8844736)))]; tensor hidden_states_9_beta_0_to_fp16 = const()[name = tensor("hidden_states_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8845440)))]; tensor var_210_to_fp16 = const()[name = tensor("op_210_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_9_cast_fp16 = layer_norm(axes = hidden_states_9_axes_0, beta = hidden_states_9_beta_0_to_fp16, epsilon = var_210_to_fp16, gamma = hidden_states_9_gamma_0_to_fp16, x = inputs_1_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; tensor var_225 = const()[name = tensor("op_225"), val = tensor([1, 1])]; tensor var_227 = const()[name = tensor("op_227"), val = tensor([1, 1])]; tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8846144)))]; tensor q_1_cast_fp16 = conv(dilations = var_227, groups = var_123, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_225, weight = down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = tensor("q_1_cast_fp16")]; tensor var_231 = const()[name = tensor("op_231"), val = tensor([1, 1])]; tensor var_233 = const()[name = tensor("op_233"), val = tensor([1, 1])]; tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9051008)))]; tensor k_1_cast_fp16 = conv(dilations = var_233, groups = var_123, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_231, weight = down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = tensor("k_1_cast_fp16")]; tensor var_237 = const()[name = tensor("op_237"), val = tensor([1, 1])]; tensor var_239 = const()[name = tensor("op_239"), val = tensor([1, 1])]; tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9255872)))]; tensor v_1_cast_fp16 = conv(dilations = var_239, groups = var_123, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_237, weight = down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_9_cast_fp16)[name = tensor("v_1_cast_fp16")]; tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_1_cast_fp16)[name = tensor("op_243_cast_fp16")]; tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_1_cast_fp16)[name = tensor("op_247_cast_fp16")]; tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_1_cast_fp16)[name = tensor("op_251_cast_fp16")]; tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_1_cast_fp16)[name = tensor("op_255_cast_fp16")]; tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_1_cast_fp16)[name = tensor("op_259_cast_fp16")]; tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_1_cast_fp16)[name = tensor("op_263_cast_fp16")]; tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_1_cast_fp16)[name = tensor("op_267_cast_fp16")]; tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_1_cast_fp16)[name = tensor("op_271_cast_fp16")]; tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = var_243_cast_fp16)[name = tensor("op_274_cast_fp16")]; tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = var_243_cast_fp16)[name = tensor("op_275_cast_fp16")]; tensor var_276_begin_0 = const()[name = tensor("op_276_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_276_end_0 = const()[name = tensor("op_276_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_276_end_mask_0 = const()[name = tensor("op_276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_276_cast_fp16 = slice_by_index(begin = var_276_begin_0, end = var_276_end_0, end_mask = var_276_end_mask_0, x = var_243_cast_fp16)[name = tensor("op_276_cast_fp16")]; tensor var_277_begin_0 = const()[name = tensor("op_277_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_277_end_0 = const()[name = tensor("op_277_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_277_end_mask_0 = const()[name = tensor("op_277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_277_cast_fp16 = slice_by_index(begin = var_277_begin_0, end = var_277_end_0, end_mask = var_277_end_mask_0, x = var_243_cast_fp16)[name = tensor("op_277_cast_fp16")]; tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = var_243_cast_fp16)[name = tensor("op_278_cast_fp16")]; tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = var_243_cast_fp16)[name = tensor("op_279_cast_fp16")]; tensor var_280_begin_0 = const()[name = tensor("op_280_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_280_end_0 = const()[name = tensor("op_280_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_280_end_mask_0 = const()[name = tensor("op_280_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_280_cast_fp16 = slice_by_index(begin = var_280_begin_0, end = var_280_end_0, end_mask = var_280_end_mask_0, x = var_243_cast_fp16)[name = tensor("op_280_cast_fp16")]; tensor var_281_begin_0 = const()[name = tensor("op_281_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_281_end_0 = const()[name = tensor("op_281_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_281_end_mask_0 = const()[name = tensor("op_281_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_281_cast_fp16 = slice_by_index(begin = var_281_begin_0, end = var_281_end_0, end_mask = var_281_end_mask_0, x = var_243_cast_fp16)[name = tensor("op_281_cast_fp16")]; tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = var_247_cast_fp16)[name = tensor("op_282_cast_fp16")]; tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = var_247_cast_fp16)[name = tensor("op_283_cast_fp16")]; tensor var_284_begin_0 = const()[name = tensor("op_284_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_284_end_0 = const()[name = tensor("op_284_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_284_end_mask_0 = const()[name = tensor("op_284_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_284_cast_fp16 = slice_by_index(begin = var_284_begin_0, end = var_284_end_0, end_mask = var_284_end_mask_0, x = var_247_cast_fp16)[name = tensor("op_284_cast_fp16")]; tensor var_285_begin_0 = const()[name = tensor("op_285_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_285_end_0 = const()[name = tensor("op_285_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_285_end_mask_0 = const()[name = tensor("op_285_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_285_cast_fp16 = slice_by_index(begin = var_285_begin_0, end = var_285_end_0, end_mask = var_285_end_mask_0, x = var_247_cast_fp16)[name = tensor("op_285_cast_fp16")]; tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = var_247_cast_fp16)[name = tensor("op_286_cast_fp16")]; tensor var_287_begin_0 = const()[name = tensor("op_287_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_287_end_0 = const()[name = tensor("op_287_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_287_end_mask_0 = const()[name = tensor("op_287_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_287_cast_fp16 = slice_by_index(begin = var_287_begin_0, end = var_287_end_0, end_mask = var_287_end_mask_0, x = var_247_cast_fp16)[name = tensor("op_287_cast_fp16")]; tensor var_288_begin_0 = const()[name = tensor("op_288_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_288_end_0 = const()[name = tensor("op_288_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_288_end_mask_0 = const()[name = tensor("op_288_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_288_cast_fp16 = slice_by_index(begin = var_288_begin_0, end = var_288_end_0, end_mask = var_288_end_mask_0, x = var_247_cast_fp16)[name = tensor("op_288_cast_fp16")]; tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = var_247_cast_fp16)[name = tensor("op_289_cast_fp16")]; tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = var_251_cast_fp16)[name = tensor("op_290_cast_fp16")]; tensor var_291_begin_0 = const()[name = tensor("op_291_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_291_end_0 = const()[name = tensor("op_291_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_291_end_mask_0 = const()[name = tensor("op_291_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_291_cast_fp16 = slice_by_index(begin = var_291_begin_0, end = var_291_end_0, end_mask = var_291_end_mask_0, x = var_251_cast_fp16)[name = tensor("op_291_cast_fp16")]; tensor var_292_begin_0 = const()[name = tensor("op_292_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_292_end_0 = const()[name = tensor("op_292_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_292_end_mask_0 = const()[name = tensor("op_292_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_292_cast_fp16 = slice_by_index(begin = var_292_begin_0, end = var_292_end_0, end_mask = var_292_end_mask_0, x = var_251_cast_fp16)[name = tensor("op_292_cast_fp16")]; tensor var_293_begin_0 = const()[name = tensor("op_293_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_293_end_0 = const()[name = tensor("op_293_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_293_end_mask_0 = const()[name = tensor("op_293_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_293_cast_fp16 = slice_by_index(begin = var_293_begin_0, end = var_293_end_0, end_mask = var_293_end_mask_0, x = var_251_cast_fp16)[name = tensor("op_293_cast_fp16")]; tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = var_251_cast_fp16)[name = tensor("op_294_cast_fp16")]; tensor var_295_begin_0 = const()[name = tensor("op_295_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_295_end_0 = const()[name = tensor("op_295_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_295_end_mask_0 = const()[name = tensor("op_295_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_295_cast_fp16 = slice_by_index(begin = var_295_begin_0, end = var_295_end_0, end_mask = var_295_end_mask_0, x = var_251_cast_fp16)[name = tensor("op_295_cast_fp16")]; tensor var_296_begin_0 = const()[name = tensor("op_296_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_296_end_0 = const()[name = tensor("op_296_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_296_end_mask_0 = const()[name = tensor("op_296_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_296_cast_fp16 = slice_by_index(begin = var_296_begin_0, end = var_296_end_0, end_mask = var_296_end_mask_0, x = var_251_cast_fp16)[name = tensor("op_296_cast_fp16")]; tensor var_297_begin_0 = const()[name = tensor("op_297_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_297_end_0 = const()[name = tensor("op_297_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_297_end_mask_0 = const()[name = tensor("op_297_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_297_cast_fp16 = slice_by_index(begin = var_297_begin_0, end = var_297_end_0, end_mask = var_297_end_mask_0, x = var_251_cast_fp16)[name = tensor("op_297_cast_fp16")]; tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = var_255_cast_fp16)[name = tensor("op_298_cast_fp16")]; tensor var_299_begin_0 = const()[name = tensor("op_299_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_299_end_0 = const()[name = tensor("op_299_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_299_end_mask_0 = const()[name = tensor("op_299_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_299_cast_fp16 = slice_by_index(begin = var_299_begin_0, end = var_299_end_0, end_mask = var_299_end_mask_0, x = var_255_cast_fp16)[name = tensor("op_299_cast_fp16")]; tensor var_300_begin_0 = const()[name = tensor("op_300_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_300_end_0 = const()[name = tensor("op_300_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_300_end_mask_0 = const()[name = tensor("op_300_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_300_cast_fp16 = slice_by_index(begin = var_300_begin_0, end = var_300_end_0, end_mask = var_300_end_mask_0, x = var_255_cast_fp16)[name = tensor("op_300_cast_fp16")]; tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = var_255_cast_fp16)[name = tensor("op_301_cast_fp16")]; tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = var_255_cast_fp16)[name = tensor("op_302_cast_fp16")]; tensor var_303_begin_0 = const()[name = tensor("op_303_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_303_end_0 = const()[name = tensor("op_303_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_303_end_mask_0 = const()[name = tensor("op_303_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_303_cast_fp16 = slice_by_index(begin = var_303_begin_0, end = var_303_end_0, end_mask = var_303_end_mask_0, x = var_255_cast_fp16)[name = tensor("op_303_cast_fp16")]; tensor var_304_begin_0 = const()[name = tensor("op_304_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_304_end_0 = const()[name = tensor("op_304_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_304_end_mask_0 = const()[name = tensor("op_304_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_304_cast_fp16 = slice_by_index(begin = var_304_begin_0, end = var_304_end_0, end_mask = var_304_end_mask_0, x = var_255_cast_fp16)[name = tensor("op_304_cast_fp16")]; tensor var_305_begin_0 = const()[name = tensor("op_305_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_305_end_0 = const()[name = tensor("op_305_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_305_end_mask_0 = const()[name = tensor("op_305_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_305_cast_fp16 = slice_by_index(begin = var_305_begin_0, end = var_305_end_0, end_mask = var_305_end_mask_0, x = var_255_cast_fp16)[name = tensor("op_305_cast_fp16")]; tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = var_259_cast_fp16)[name = tensor("op_306_cast_fp16")]; tensor var_307_begin_0 = const()[name = tensor("op_307_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_307_end_0 = const()[name = tensor("op_307_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_307_end_mask_0 = const()[name = tensor("op_307_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_307_cast_fp16 = slice_by_index(begin = var_307_begin_0, end = var_307_end_0, end_mask = var_307_end_mask_0, x = var_259_cast_fp16)[name = tensor("op_307_cast_fp16")]; tensor var_308_begin_0 = const()[name = tensor("op_308_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_308_end_0 = const()[name = tensor("op_308_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_308_end_mask_0 = const()[name = tensor("op_308_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_308_cast_fp16 = slice_by_index(begin = var_308_begin_0, end = var_308_end_0, end_mask = var_308_end_mask_0, x = var_259_cast_fp16)[name = tensor("op_308_cast_fp16")]; tensor var_309_begin_0 = const()[name = tensor("op_309_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_309_end_0 = const()[name = tensor("op_309_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_309_end_mask_0 = const()[name = tensor("op_309_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_309_cast_fp16 = slice_by_index(begin = var_309_begin_0, end = var_309_end_0, end_mask = var_309_end_mask_0, x = var_259_cast_fp16)[name = tensor("op_309_cast_fp16")]; tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = var_259_cast_fp16)[name = tensor("op_310_cast_fp16")]; tensor var_311_begin_0 = const()[name = tensor("op_311_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_311_end_0 = const()[name = tensor("op_311_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_311_end_mask_0 = const()[name = tensor("op_311_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_311_cast_fp16 = slice_by_index(begin = var_311_begin_0, end = var_311_end_0, end_mask = var_311_end_mask_0, x = var_259_cast_fp16)[name = tensor("op_311_cast_fp16")]; tensor var_312_begin_0 = const()[name = tensor("op_312_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_312_end_0 = const()[name = tensor("op_312_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_312_end_mask_0 = const()[name = tensor("op_312_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, x = var_259_cast_fp16)[name = tensor("op_312_cast_fp16")]; tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = var_259_cast_fp16)[name = tensor("op_313_cast_fp16")]; tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = var_263_cast_fp16)[name = tensor("op_314_cast_fp16")]; tensor var_315_begin_0 = const()[name = tensor("op_315_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_315_end_0 = const()[name = tensor("op_315_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_315_end_mask_0 = const()[name = tensor("op_315_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_315_cast_fp16 = slice_by_index(begin = var_315_begin_0, end = var_315_end_0, end_mask = var_315_end_mask_0, x = var_263_cast_fp16)[name = tensor("op_315_cast_fp16")]; tensor var_316_begin_0 = const()[name = tensor("op_316_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_316_end_0 = const()[name = tensor("op_316_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_316_end_mask_0 = const()[name = tensor("op_316_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_316_cast_fp16 = slice_by_index(begin = var_316_begin_0, end = var_316_end_0, end_mask = var_316_end_mask_0, x = var_263_cast_fp16)[name = tensor("op_316_cast_fp16")]; tensor var_317_begin_0 = const()[name = tensor("op_317_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_317_end_0 = const()[name = tensor("op_317_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_317_end_mask_0 = const()[name = tensor("op_317_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_317_cast_fp16 = slice_by_index(begin = var_317_begin_0, end = var_317_end_0, end_mask = var_317_end_mask_0, x = var_263_cast_fp16)[name = tensor("op_317_cast_fp16")]; tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = var_263_cast_fp16)[name = tensor("op_318_cast_fp16")]; tensor var_319_begin_0 = const()[name = tensor("op_319_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_319_end_0 = const()[name = tensor("op_319_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_319_end_mask_0 = const()[name = tensor("op_319_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_319_cast_fp16 = slice_by_index(begin = var_319_begin_0, end = var_319_end_0, end_mask = var_319_end_mask_0, x = var_263_cast_fp16)[name = tensor("op_319_cast_fp16")]; tensor var_320_begin_0 = const()[name = tensor("op_320_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_320_end_0 = const()[name = tensor("op_320_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_320_end_mask_0 = const()[name = tensor("op_320_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_320_cast_fp16 = slice_by_index(begin = var_320_begin_0, end = var_320_end_0, end_mask = var_320_end_mask_0, x = var_263_cast_fp16)[name = tensor("op_320_cast_fp16")]; tensor var_321_begin_0 = const()[name = tensor("op_321_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_321_end_0 = const()[name = tensor("op_321_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_321_end_mask_0 = const()[name = tensor("op_321_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_321_cast_fp16 = slice_by_index(begin = var_321_begin_0, end = var_321_end_0, end_mask = var_321_end_mask_0, x = var_263_cast_fp16)[name = tensor("op_321_cast_fp16")]; tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = var_267_cast_fp16)[name = tensor("op_322_cast_fp16")]; tensor var_323_begin_0 = const()[name = tensor("op_323_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_323_end_0 = const()[name = tensor("op_323_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_323_end_mask_0 = const()[name = tensor("op_323_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_323_cast_fp16 = slice_by_index(begin = var_323_begin_0, end = var_323_end_0, end_mask = var_323_end_mask_0, x = var_267_cast_fp16)[name = tensor("op_323_cast_fp16")]; tensor var_324_begin_0 = const()[name = tensor("op_324_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_324_end_0 = const()[name = tensor("op_324_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_324_end_mask_0 = const()[name = tensor("op_324_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_324_cast_fp16 = slice_by_index(begin = var_324_begin_0, end = var_324_end_0, end_mask = var_324_end_mask_0, x = var_267_cast_fp16)[name = tensor("op_324_cast_fp16")]; tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = var_267_cast_fp16)[name = tensor("op_325_cast_fp16")]; tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = var_267_cast_fp16)[name = tensor("op_326_cast_fp16")]; tensor var_327_begin_0 = const()[name = tensor("op_327_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_327_end_0 = const()[name = tensor("op_327_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_327_end_mask_0 = const()[name = tensor("op_327_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_327_cast_fp16 = slice_by_index(begin = var_327_begin_0, end = var_327_end_0, end_mask = var_327_end_mask_0, x = var_267_cast_fp16)[name = tensor("op_327_cast_fp16")]; tensor var_328_begin_0 = const()[name = tensor("op_328_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_328_end_0 = const()[name = tensor("op_328_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_328_end_mask_0 = const()[name = tensor("op_328_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_328_cast_fp16 = slice_by_index(begin = var_328_begin_0, end = var_328_end_0, end_mask = var_328_end_mask_0, x = var_267_cast_fp16)[name = tensor("op_328_cast_fp16")]; tensor var_329_begin_0 = const()[name = tensor("op_329_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_329_end_0 = const()[name = tensor("op_329_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_329_end_mask_0 = const()[name = tensor("op_329_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_329_cast_fp16 = slice_by_index(begin = var_329_begin_0, end = var_329_end_0, end_mask = var_329_end_mask_0, x = var_267_cast_fp16)[name = tensor("op_329_cast_fp16")]; tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = var_271_cast_fp16)[name = tensor("op_330_cast_fp16")]; tensor var_331_begin_0 = const()[name = tensor("op_331_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_331_end_0 = const()[name = tensor("op_331_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_331_end_mask_0 = const()[name = tensor("op_331_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_331_cast_fp16 = slice_by_index(begin = var_331_begin_0, end = var_331_end_0, end_mask = var_331_end_mask_0, x = var_271_cast_fp16)[name = tensor("op_331_cast_fp16")]; tensor var_332_begin_0 = const()[name = tensor("op_332_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_332_end_0 = const()[name = tensor("op_332_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_332_end_mask_0 = const()[name = tensor("op_332_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_332_cast_fp16 = slice_by_index(begin = var_332_begin_0, end = var_332_end_0, end_mask = var_332_end_mask_0, x = var_271_cast_fp16)[name = tensor("op_332_cast_fp16")]; tensor var_333_begin_0 = const()[name = tensor("op_333_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_333_end_0 = const()[name = tensor("op_333_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_333_end_mask_0 = const()[name = tensor("op_333_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = var_333_end_0, end_mask = var_333_end_mask_0, x = var_271_cast_fp16)[name = tensor("op_333_cast_fp16")]; tensor var_334_begin_0 = const()[name = tensor("op_334_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_334_end_0 = const()[name = tensor("op_334_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_334_end_mask_0 = const()[name = tensor("op_334_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_334_cast_fp16 = slice_by_index(begin = var_334_begin_0, end = var_334_end_0, end_mask = var_334_end_mask_0, x = var_271_cast_fp16)[name = tensor("op_334_cast_fp16")]; tensor var_335_begin_0 = const()[name = tensor("op_335_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_335_end_0 = const()[name = tensor("op_335_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_335_end_mask_0 = const()[name = tensor("op_335_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_335_cast_fp16 = slice_by_index(begin = var_335_begin_0, end = var_335_end_0, end_mask = var_335_end_mask_0, x = var_271_cast_fp16)[name = tensor("op_335_cast_fp16")]; tensor var_336_begin_0 = const()[name = tensor("op_336_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_336_end_0 = const()[name = tensor("op_336_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_336_end_mask_0 = const()[name = tensor("op_336_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = var_336_end_0, end_mask = var_336_end_mask_0, x = var_271_cast_fp16)[name = tensor("op_336_cast_fp16")]; tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = var_271_cast_fp16)[name = tensor("op_337_cast_fp16")]; tensor k_3_perm_0 = const()[name = tensor("k_3_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_342_begin_0 = const()[name = tensor("op_342_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_342_end_0 = const()[name = tensor("op_342_end_0"), val = tensor([2, 4096, 1, 40])]; tensor var_342_end_mask_0 = const()[name = tensor("op_342_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_31 = transpose(perm = k_3_perm_0, x = k_1_cast_fp16)[name = tensor("transpose_31")]; tensor var_342_cast_fp16 = slice_by_index(begin = var_342_begin_0, end = var_342_end_0, end_mask = var_342_end_mask_0, x = transpose_31)[name = tensor("op_342_cast_fp16")]; tensor var_346_begin_0 = const()[name = tensor("op_346_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_346_end_0 = const()[name = tensor("op_346_end_0"), val = tensor([2, 4096, 1, 80])]; tensor var_346_end_mask_0 = const()[name = tensor("op_346_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = var_346_end_0, end_mask = var_346_end_mask_0, x = transpose_31)[name = tensor("op_346_cast_fp16")]; tensor var_350_begin_0 = const()[name = tensor("op_350_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_350_end_0 = const()[name = tensor("op_350_end_0"), val = tensor([2, 4096, 1, 120])]; tensor var_350_end_mask_0 = const()[name = tensor("op_350_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_350_cast_fp16 = slice_by_index(begin = var_350_begin_0, end = var_350_end_0, end_mask = var_350_end_mask_0, x = transpose_31)[name = tensor("op_350_cast_fp16")]; tensor var_354_begin_0 = const()[name = tensor("op_354_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_354_end_0 = const()[name = tensor("op_354_end_0"), val = tensor([2, 4096, 1, 160])]; tensor var_354_end_mask_0 = const()[name = tensor("op_354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_354_cast_fp16 = slice_by_index(begin = var_354_begin_0, end = var_354_end_0, end_mask = var_354_end_mask_0, x = transpose_31)[name = tensor("op_354_cast_fp16")]; tensor var_358_begin_0 = const()[name = tensor("op_358_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_358_end_0 = const()[name = tensor("op_358_end_0"), val = tensor([2, 4096, 1, 200])]; tensor var_358_end_mask_0 = const()[name = tensor("op_358_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_358_cast_fp16 = slice_by_index(begin = var_358_begin_0, end = var_358_end_0, end_mask = var_358_end_mask_0, x = transpose_31)[name = tensor("op_358_cast_fp16")]; tensor var_362_begin_0 = const()[name = tensor("op_362_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_362_end_0 = const()[name = tensor("op_362_end_0"), val = tensor([2, 4096, 1, 240])]; tensor var_362_end_mask_0 = const()[name = tensor("op_362_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_362_cast_fp16 = slice_by_index(begin = var_362_begin_0, end = var_362_end_0, end_mask = var_362_end_mask_0, x = transpose_31)[name = tensor("op_362_cast_fp16")]; tensor var_366_begin_0 = const()[name = tensor("op_366_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_366_end_0 = const()[name = tensor("op_366_end_0"), val = tensor([2, 4096, 1, 280])]; tensor var_366_end_mask_0 = const()[name = tensor("op_366_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_366_cast_fp16 = slice_by_index(begin = var_366_begin_0, end = var_366_end_0, end_mask = var_366_end_mask_0, x = transpose_31)[name = tensor("op_366_cast_fp16")]; tensor var_370_begin_0 = const()[name = tensor("op_370_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_370_end_0 = const()[name = tensor("op_370_end_0"), val = tensor([2, 4096, 1, 320])]; tensor var_370_end_mask_0 = const()[name = tensor("op_370_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_370_cast_fp16 = slice_by_index(begin = var_370_begin_0, end = var_370_end_0, end_mask = var_370_end_mask_0, x = transpose_31)[name = tensor("op_370_cast_fp16")]; tensor var_372_begin_0 = const()[name = tensor("op_372_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_372_end_0 = const()[name = tensor("op_372_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_372_end_mask_0 = const()[name = tensor("op_372_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_372_cast_fp16 = slice_by_index(begin = var_372_begin_0, end = var_372_end_0, end_mask = var_372_end_mask_0, x = v_1_cast_fp16)[name = tensor("op_372_cast_fp16")]; tensor var_376_begin_0 = const()[name = tensor("op_376_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_376_end_0 = const()[name = tensor("op_376_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_376_end_mask_0 = const()[name = tensor("op_376_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_376_cast_fp16 = slice_by_index(begin = var_376_begin_0, end = var_376_end_0, end_mask = var_376_end_mask_0, x = v_1_cast_fp16)[name = tensor("op_376_cast_fp16")]; tensor var_380_begin_0 = const()[name = tensor("op_380_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_380_end_0 = const()[name = tensor("op_380_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_380_end_mask_0 = const()[name = tensor("op_380_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_380_cast_fp16 = slice_by_index(begin = var_380_begin_0, end = var_380_end_0, end_mask = var_380_end_mask_0, x = v_1_cast_fp16)[name = tensor("op_380_cast_fp16")]; tensor var_384_begin_0 = const()[name = tensor("op_384_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_384_end_0 = const()[name = tensor("op_384_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_384_end_mask_0 = const()[name = tensor("op_384_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_384_cast_fp16 = slice_by_index(begin = var_384_begin_0, end = var_384_end_0, end_mask = var_384_end_mask_0, x = v_1_cast_fp16)[name = tensor("op_384_cast_fp16")]; tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = v_1_cast_fp16)[name = tensor("op_388_cast_fp16")]; tensor var_392_begin_0 = const()[name = tensor("op_392_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_392_end_0 = const()[name = tensor("op_392_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_392_end_mask_0 = const()[name = tensor("op_392_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_392_cast_fp16 = slice_by_index(begin = var_392_begin_0, end = var_392_end_0, end_mask = var_392_end_mask_0, x = v_1_cast_fp16)[name = tensor("op_392_cast_fp16")]; tensor var_396_begin_0 = const()[name = tensor("op_396_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_396_end_0 = const()[name = tensor("op_396_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_396_end_mask_0 = const()[name = tensor("op_396_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_396_cast_fp16 = slice_by_index(begin = var_396_begin_0, end = var_396_end_0, end_mask = var_396_end_mask_0, x = v_1_cast_fp16)[name = tensor("op_396_cast_fp16")]; tensor var_400_begin_0 = const()[name = tensor("op_400_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_400_end_0 = const()[name = tensor("op_400_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_400_end_mask_0 = const()[name = tensor("op_400_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_400_cast_fp16 = slice_by_index(begin = var_400_begin_0, end = var_400_end_0, end_mask = var_400_end_mask_0, x = v_1_cast_fp16)[name = tensor("op_400_cast_fp16")]; tensor var_404_equation_0 = const()[name = tensor("op_404_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_404_cast_fp16 = einsum(equation = var_404_equation_0, values = (var_342_cast_fp16, var_274_cast_fp16))[name = tensor("op_404_cast_fp16")]; tensor var_405_to_fp16 = const()[name = tensor("op_405_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1_cast_fp16 = mul(x = var_404_cast_fp16, y = var_405_to_fp16)[name = tensor("aw_chunk_1_cast_fp16")]; tensor var_408_equation_0 = const()[name = tensor("op_408_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_408_cast_fp16 = einsum(equation = var_408_equation_0, values = (var_342_cast_fp16, var_275_cast_fp16))[name = tensor("op_408_cast_fp16")]; tensor var_409_to_fp16 = const()[name = tensor("op_409_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_3_cast_fp16 = mul(x = var_408_cast_fp16, y = var_409_to_fp16)[name = tensor("aw_chunk_3_cast_fp16")]; tensor var_412_equation_0 = const()[name = tensor("op_412_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_412_cast_fp16 = einsum(equation = var_412_equation_0, values = (var_342_cast_fp16, var_276_cast_fp16))[name = tensor("op_412_cast_fp16")]; tensor var_413_to_fp16 = const()[name = tensor("op_413_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_5_cast_fp16 = mul(x = var_412_cast_fp16, y = var_413_to_fp16)[name = tensor("aw_chunk_5_cast_fp16")]; tensor var_416_equation_0 = const()[name = tensor("op_416_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_416_cast_fp16 = einsum(equation = var_416_equation_0, values = (var_342_cast_fp16, var_277_cast_fp16))[name = tensor("op_416_cast_fp16")]; tensor var_417_to_fp16 = const()[name = tensor("op_417_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_7_cast_fp16 = mul(x = var_416_cast_fp16, y = var_417_to_fp16)[name = tensor("aw_chunk_7_cast_fp16")]; tensor var_420_equation_0 = const()[name = tensor("op_420_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_420_cast_fp16 = einsum(equation = var_420_equation_0, values = (var_342_cast_fp16, var_278_cast_fp16))[name = tensor("op_420_cast_fp16")]; tensor var_421_to_fp16 = const()[name = tensor("op_421_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_9_cast_fp16 = mul(x = var_420_cast_fp16, y = var_421_to_fp16)[name = tensor("aw_chunk_9_cast_fp16")]; tensor var_424_equation_0 = const()[name = tensor("op_424_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_424_cast_fp16 = einsum(equation = var_424_equation_0, values = (var_342_cast_fp16, var_279_cast_fp16))[name = tensor("op_424_cast_fp16")]; tensor var_425_to_fp16 = const()[name = tensor("op_425_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_11_cast_fp16 = mul(x = var_424_cast_fp16, y = var_425_to_fp16)[name = tensor("aw_chunk_11_cast_fp16")]; tensor var_428_equation_0 = const()[name = tensor("op_428_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_428_cast_fp16 = einsum(equation = var_428_equation_0, values = (var_342_cast_fp16, var_280_cast_fp16))[name = tensor("op_428_cast_fp16")]; tensor var_429_to_fp16 = const()[name = tensor("op_429_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_13_cast_fp16 = mul(x = var_428_cast_fp16, y = var_429_to_fp16)[name = tensor("aw_chunk_13_cast_fp16")]; tensor var_432_equation_0 = const()[name = tensor("op_432_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_432_cast_fp16 = einsum(equation = var_432_equation_0, values = (var_342_cast_fp16, var_281_cast_fp16))[name = tensor("op_432_cast_fp16")]; tensor var_433_to_fp16 = const()[name = tensor("op_433_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_15_cast_fp16 = mul(x = var_432_cast_fp16, y = var_433_to_fp16)[name = tensor("aw_chunk_15_cast_fp16")]; tensor var_436_equation_0 = const()[name = tensor("op_436_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_436_cast_fp16 = einsum(equation = var_436_equation_0, values = (var_346_cast_fp16, var_282_cast_fp16))[name = tensor("op_436_cast_fp16")]; tensor var_437_to_fp16 = const()[name = tensor("op_437_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_17_cast_fp16 = mul(x = var_436_cast_fp16, y = var_437_to_fp16)[name = tensor("aw_chunk_17_cast_fp16")]; tensor var_440_equation_0 = const()[name = tensor("op_440_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_440_cast_fp16 = einsum(equation = var_440_equation_0, values = (var_346_cast_fp16, var_283_cast_fp16))[name = tensor("op_440_cast_fp16")]; tensor var_441_to_fp16 = const()[name = tensor("op_441_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_19_cast_fp16 = mul(x = var_440_cast_fp16, y = var_441_to_fp16)[name = tensor("aw_chunk_19_cast_fp16")]; tensor var_444_equation_0 = const()[name = tensor("op_444_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_444_cast_fp16 = einsum(equation = var_444_equation_0, values = (var_346_cast_fp16, var_284_cast_fp16))[name = tensor("op_444_cast_fp16")]; tensor var_445_to_fp16 = const()[name = tensor("op_445_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_21_cast_fp16 = mul(x = var_444_cast_fp16, y = var_445_to_fp16)[name = tensor("aw_chunk_21_cast_fp16")]; tensor var_448_equation_0 = const()[name = tensor("op_448_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_448_cast_fp16 = einsum(equation = var_448_equation_0, values = (var_346_cast_fp16, var_285_cast_fp16))[name = tensor("op_448_cast_fp16")]; tensor var_449_to_fp16 = const()[name = tensor("op_449_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_23_cast_fp16 = mul(x = var_448_cast_fp16, y = var_449_to_fp16)[name = tensor("aw_chunk_23_cast_fp16")]; tensor var_452_equation_0 = const()[name = tensor("op_452_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_452_cast_fp16 = einsum(equation = var_452_equation_0, values = (var_346_cast_fp16, var_286_cast_fp16))[name = tensor("op_452_cast_fp16")]; tensor var_453_to_fp16 = const()[name = tensor("op_453_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_25_cast_fp16 = mul(x = var_452_cast_fp16, y = var_453_to_fp16)[name = tensor("aw_chunk_25_cast_fp16")]; tensor var_456_equation_0 = const()[name = tensor("op_456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_456_cast_fp16 = einsum(equation = var_456_equation_0, values = (var_346_cast_fp16, var_287_cast_fp16))[name = tensor("op_456_cast_fp16")]; tensor var_457_to_fp16 = const()[name = tensor("op_457_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_27_cast_fp16 = mul(x = var_456_cast_fp16, y = var_457_to_fp16)[name = tensor("aw_chunk_27_cast_fp16")]; tensor var_460_equation_0 = const()[name = tensor("op_460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_460_cast_fp16 = einsum(equation = var_460_equation_0, values = (var_346_cast_fp16, var_288_cast_fp16))[name = tensor("op_460_cast_fp16")]; tensor var_461_to_fp16 = const()[name = tensor("op_461_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_29_cast_fp16 = mul(x = var_460_cast_fp16, y = var_461_to_fp16)[name = tensor("aw_chunk_29_cast_fp16")]; tensor var_464_equation_0 = const()[name = tensor("op_464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_464_cast_fp16 = einsum(equation = var_464_equation_0, values = (var_346_cast_fp16, var_289_cast_fp16))[name = tensor("op_464_cast_fp16")]; tensor var_465_to_fp16 = const()[name = tensor("op_465_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_31_cast_fp16 = mul(x = var_464_cast_fp16, y = var_465_to_fp16)[name = tensor("aw_chunk_31_cast_fp16")]; tensor var_468_equation_0 = const()[name = tensor("op_468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_468_cast_fp16 = einsum(equation = var_468_equation_0, values = (var_350_cast_fp16, var_290_cast_fp16))[name = tensor("op_468_cast_fp16")]; tensor var_469_to_fp16 = const()[name = tensor("op_469_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_33_cast_fp16 = mul(x = var_468_cast_fp16, y = var_469_to_fp16)[name = tensor("aw_chunk_33_cast_fp16")]; tensor var_472_equation_0 = const()[name = tensor("op_472_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_472_cast_fp16 = einsum(equation = var_472_equation_0, values = (var_350_cast_fp16, var_291_cast_fp16))[name = tensor("op_472_cast_fp16")]; tensor var_473_to_fp16 = const()[name = tensor("op_473_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_35_cast_fp16 = mul(x = var_472_cast_fp16, y = var_473_to_fp16)[name = tensor("aw_chunk_35_cast_fp16")]; tensor var_476_equation_0 = const()[name = tensor("op_476_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_476_cast_fp16 = einsum(equation = var_476_equation_0, values = (var_350_cast_fp16, var_292_cast_fp16))[name = tensor("op_476_cast_fp16")]; tensor var_477_to_fp16 = const()[name = tensor("op_477_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_37_cast_fp16 = mul(x = var_476_cast_fp16, y = var_477_to_fp16)[name = tensor("aw_chunk_37_cast_fp16")]; tensor var_480_equation_0 = const()[name = tensor("op_480_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_480_cast_fp16 = einsum(equation = var_480_equation_0, values = (var_350_cast_fp16, var_293_cast_fp16))[name = tensor("op_480_cast_fp16")]; tensor var_481_to_fp16 = const()[name = tensor("op_481_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_39_cast_fp16 = mul(x = var_480_cast_fp16, y = var_481_to_fp16)[name = tensor("aw_chunk_39_cast_fp16")]; tensor var_484_equation_0 = const()[name = tensor("op_484_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_484_cast_fp16 = einsum(equation = var_484_equation_0, values = (var_350_cast_fp16, var_294_cast_fp16))[name = tensor("op_484_cast_fp16")]; tensor var_485_to_fp16 = const()[name = tensor("op_485_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_41_cast_fp16 = mul(x = var_484_cast_fp16, y = var_485_to_fp16)[name = tensor("aw_chunk_41_cast_fp16")]; tensor var_488_equation_0 = const()[name = tensor("op_488_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_488_cast_fp16 = einsum(equation = var_488_equation_0, values = (var_350_cast_fp16, var_295_cast_fp16))[name = tensor("op_488_cast_fp16")]; tensor var_489_to_fp16 = const()[name = tensor("op_489_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_43_cast_fp16 = mul(x = var_488_cast_fp16, y = var_489_to_fp16)[name = tensor("aw_chunk_43_cast_fp16")]; tensor var_492_equation_0 = const()[name = tensor("op_492_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_492_cast_fp16 = einsum(equation = var_492_equation_0, values = (var_350_cast_fp16, var_296_cast_fp16))[name = tensor("op_492_cast_fp16")]; tensor var_493_to_fp16 = const()[name = tensor("op_493_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_45_cast_fp16 = mul(x = var_492_cast_fp16, y = var_493_to_fp16)[name = tensor("aw_chunk_45_cast_fp16")]; tensor var_496_equation_0 = const()[name = tensor("op_496_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_496_cast_fp16 = einsum(equation = var_496_equation_0, values = (var_350_cast_fp16, var_297_cast_fp16))[name = tensor("op_496_cast_fp16")]; tensor var_497_to_fp16 = const()[name = tensor("op_497_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_47_cast_fp16 = mul(x = var_496_cast_fp16, y = var_497_to_fp16)[name = tensor("aw_chunk_47_cast_fp16")]; tensor var_500_equation_0 = const()[name = tensor("op_500_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_354_cast_fp16, var_298_cast_fp16))[name = tensor("op_500_cast_fp16")]; tensor var_501_to_fp16 = const()[name = tensor("op_501_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_49_cast_fp16 = mul(x = var_500_cast_fp16, y = var_501_to_fp16)[name = tensor("aw_chunk_49_cast_fp16")]; tensor var_504_equation_0 = const()[name = tensor("op_504_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_354_cast_fp16, var_299_cast_fp16))[name = tensor("op_504_cast_fp16")]; tensor var_505_to_fp16 = const()[name = tensor("op_505_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_51_cast_fp16 = mul(x = var_504_cast_fp16, y = var_505_to_fp16)[name = tensor("aw_chunk_51_cast_fp16")]; tensor var_508_equation_0 = const()[name = tensor("op_508_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_354_cast_fp16, var_300_cast_fp16))[name = tensor("op_508_cast_fp16")]; tensor var_509_to_fp16 = const()[name = tensor("op_509_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_53_cast_fp16 = mul(x = var_508_cast_fp16, y = var_509_to_fp16)[name = tensor("aw_chunk_53_cast_fp16")]; tensor var_512_equation_0 = const()[name = tensor("op_512_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_354_cast_fp16, var_301_cast_fp16))[name = tensor("op_512_cast_fp16")]; tensor var_513_to_fp16 = const()[name = tensor("op_513_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_55_cast_fp16 = mul(x = var_512_cast_fp16, y = var_513_to_fp16)[name = tensor("aw_chunk_55_cast_fp16")]; tensor var_516_equation_0 = const()[name = tensor("op_516_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_516_cast_fp16 = einsum(equation = var_516_equation_0, values = (var_354_cast_fp16, var_302_cast_fp16))[name = tensor("op_516_cast_fp16")]; tensor var_517_to_fp16 = const()[name = tensor("op_517_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_57_cast_fp16 = mul(x = var_516_cast_fp16, y = var_517_to_fp16)[name = tensor("aw_chunk_57_cast_fp16")]; tensor var_520_equation_0 = const()[name = tensor("op_520_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_520_cast_fp16 = einsum(equation = var_520_equation_0, values = (var_354_cast_fp16, var_303_cast_fp16))[name = tensor("op_520_cast_fp16")]; tensor var_521_to_fp16 = const()[name = tensor("op_521_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_59_cast_fp16 = mul(x = var_520_cast_fp16, y = var_521_to_fp16)[name = tensor("aw_chunk_59_cast_fp16")]; tensor var_524_equation_0 = const()[name = tensor("op_524_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_524_cast_fp16 = einsum(equation = var_524_equation_0, values = (var_354_cast_fp16, var_304_cast_fp16))[name = tensor("op_524_cast_fp16")]; tensor var_525_to_fp16 = const()[name = tensor("op_525_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_61_cast_fp16 = mul(x = var_524_cast_fp16, y = var_525_to_fp16)[name = tensor("aw_chunk_61_cast_fp16")]; tensor var_528_equation_0 = const()[name = tensor("op_528_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_528_cast_fp16 = einsum(equation = var_528_equation_0, values = (var_354_cast_fp16, var_305_cast_fp16))[name = tensor("op_528_cast_fp16")]; tensor var_529_to_fp16 = const()[name = tensor("op_529_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_63_cast_fp16 = mul(x = var_528_cast_fp16, y = var_529_to_fp16)[name = tensor("aw_chunk_63_cast_fp16")]; tensor var_532_equation_0 = const()[name = tensor("op_532_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_532_cast_fp16 = einsum(equation = var_532_equation_0, values = (var_358_cast_fp16, var_306_cast_fp16))[name = tensor("op_532_cast_fp16")]; tensor var_533_to_fp16 = const()[name = tensor("op_533_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_65_cast_fp16 = mul(x = var_532_cast_fp16, y = var_533_to_fp16)[name = tensor("aw_chunk_65_cast_fp16")]; tensor var_536_equation_0 = const()[name = tensor("op_536_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_536_cast_fp16 = einsum(equation = var_536_equation_0, values = (var_358_cast_fp16, var_307_cast_fp16))[name = tensor("op_536_cast_fp16")]; tensor var_537_to_fp16 = const()[name = tensor("op_537_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_67_cast_fp16 = mul(x = var_536_cast_fp16, y = var_537_to_fp16)[name = tensor("aw_chunk_67_cast_fp16")]; tensor var_540_equation_0 = const()[name = tensor("op_540_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_540_cast_fp16 = einsum(equation = var_540_equation_0, values = (var_358_cast_fp16, var_308_cast_fp16))[name = tensor("op_540_cast_fp16")]; tensor var_541_to_fp16 = const()[name = tensor("op_541_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_69_cast_fp16 = mul(x = var_540_cast_fp16, y = var_541_to_fp16)[name = tensor("aw_chunk_69_cast_fp16")]; tensor var_544_equation_0 = const()[name = tensor("op_544_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_544_cast_fp16 = einsum(equation = var_544_equation_0, values = (var_358_cast_fp16, var_309_cast_fp16))[name = tensor("op_544_cast_fp16")]; tensor var_545_to_fp16 = const()[name = tensor("op_545_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_71_cast_fp16 = mul(x = var_544_cast_fp16, y = var_545_to_fp16)[name = tensor("aw_chunk_71_cast_fp16")]; tensor var_548_equation_0 = const()[name = tensor("op_548_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_548_cast_fp16 = einsum(equation = var_548_equation_0, values = (var_358_cast_fp16, var_310_cast_fp16))[name = tensor("op_548_cast_fp16")]; tensor var_549_to_fp16 = const()[name = tensor("op_549_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_73_cast_fp16 = mul(x = var_548_cast_fp16, y = var_549_to_fp16)[name = tensor("aw_chunk_73_cast_fp16")]; tensor var_552_equation_0 = const()[name = tensor("op_552_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_552_cast_fp16 = einsum(equation = var_552_equation_0, values = (var_358_cast_fp16, var_311_cast_fp16))[name = tensor("op_552_cast_fp16")]; tensor var_553_to_fp16 = const()[name = tensor("op_553_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_75_cast_fp16 = mul(x = var_552_cast_fp16, y = var_553_to_fp16)[name = tensor("aw_chunk_75_cast_fp16")]; tensor var_556_equation_0 = const()[name = tensor("op_556_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_556_cast_fp16 = einsum(equation = var_556_equation_0, values = (var_358_cast_fp16, var_312_cast_fp16))[name = tensor("op_556_cast_fp16")]; tensor var_557_to_fp16 = const()[name = tensor("op_557_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_77_cast_fp16 = mul(x = var_556_cast_fp16, y = var_557_to_fp16)[name = tensor("aw_chunk_77_cast_fp16")]; tensor var_560_equation_0 = const()[name = tensor("op_560_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_560_cast_fp16 = einsum(equation = var_560_equation_0, values = (var_358_cast_fp16, var_313_cast_fp16))[name = tensor("op_560_cast_fp16")]; tensor var_561_to_fp16 = const()[name = tensor("op_561_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_79_cast_fp16 = mul(x = var_560_cast_fp16, y = var_561_to_fp16)[name = tensor("aw_chunk_79_cast_fp16")]; tensor var_564_equation_0 = const()[name = tensor("op_564_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_564_cast_fp16 = einsum(equation = var_564_equation_0, values = (var_362_cast_fp16, var_314_cast_fp16))[name = tensor("op_564_cast_fp16")]; tensor var_565_to_fp16 = const()[name = tensor("op_565_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_81_cast_fp16 = mul(x = var_564_cast_fp16, y = var_565_to_fp16)[name = tensor("aw_chunk_81_cast_fp16")]; tensor var_568_equation_0 = const()[name = tensor("op_568_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_568_cast_fp16 = einsum(equation = var_568_equation_0, values = (var_362_cast_fp16, var_315_cast_fp16))[name = tensor("op_568_cast_fp16")]; tensor var_569_to_fp16 = const()[name = tensor("op_569_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_83_cast_fp16 = mul(x = var_568_cast_fp16, y = var_569_to_fp16)[name = tensor("aw_chunk_83_cast_fp16")]; tensor var_572_equation_0 = const()[name = tensor("op_572_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_572_cast_fp16 = einsum(equation = var_572_equation_0, values = (var_362_cast_fp16, var_316_cast_fp16))[name = tensor("op_572_cast_fp16")]; tensor var_573_to_fp16 = const()[name = tensor("op_573_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_85_cast_fp16 = mul(x = var_572_cast_fp16, y = var_573_to_fp16)[name = tensor("aw_chunk_85_cast_fp16")]; tensor var_576_equation_0 = const()[name = tensor("op_576_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_576_cast_fp16 = einsum(equation = var_576_equation_0, values = (var_362_cast_fp16, var_317_cast_fp16))[name = tensor("op_576_cast_fp16")]; tensor var_577_to_fp16 = const()[name = tensor("op_577_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_87_cast_fp16 = mul(x = var_576_cast_fp16, y = var_577_to_fp16)[name = tensor("aw_chunk_87_cast_fp16")]; tensor var_580_equation_0 = const()[name = tensor("op_580_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_580_cast_fp16 = einsum(equation = var_580_equation_0, values = (var_362_cast_fp16, var_318_cast_fp16))[name = tensor("op_580_cast_fp16")]; tensor var_581_to_fp16 = const()[name = tensor("op_581_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_89_cast_fp16 = mul(x = var_580_cast_fp16, y = var_581_to_fp16)[name = tensor("aw_chunk_89_cast_fp16")]; tensor var_584_equation_0 = const()[name = tensor("op_584_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_584_cast_fp16 = einsum(equation = var_584_equation_0, values = (var_362_cast_fp16, var_319_cast_fp16))[name = tensor("op_584_cast_fp16")]; tensor var_585_to_fp16 = const()[name = tensor("op_585_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_91_cast_fp16 = mul(x = var_584_cast_fp16, y = var_585_to_fp16)[name = tensor("aw_chunk_91_cast_fp16")]; tensor var_588_equation_0 = const()[name = tensor("op_588_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_588_cast_fp16 = einsum(equation = var_588_equation_0, values = (var_362_cast_fp16, var_320_cast_fp16))[name = tensor("op_588_cast_fp16")]; tensor var_589_to_fp16 = const()[name = tensor("op_589_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_93_cast_fp16 = mul(x = var_588_cast_fp16, y = var_589_to_fp16)[name = tensor("aw_chunk_93_cast_fp16")]; tensor var_592_equation_0 = const()[name = tensor("op_592_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_592_cast_fp16 = einsum(equation = var_592_equation_0, values = (var_362_cast_fp16, var_321_cast_fp16))[name = tensor("op_592_cast_fp16")]; tensor var_593_to_fp16 = const()[name = tensor("op_593_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_95_cast_fp16 = mul(x = var_592_cast_fp16, y = var_593_to_fp16)[name = tensor("aw_chunk_95_cast_fp16")]; tensor var_596_equation_0 = const()[name = tensor("op_596_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_596_cast_fp16 = einsum(equation = var_596_equation_0, values = (var_366_cast_fp16, var_322_cast_fp16))[name = tensor("op_596_cast_fp16")]; tensor var_597_to_fp16 = const()[name = tensor("op_597_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_97_cast_fp16 = mul(x = var_596_cast_fp16, y = var_597_to_fp16)[name = tensor("aw_chunk_97_cast_fp16")]; tensor var_600_equation_0 = const()[name = tensor("op_600_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_600_cast_fp16 = einsum(equation = var_600_equation_0, values = (var_366_cast_fp16, var_323_cast_fp16))[name = tensor("op_600_cast_fp16")]; tensor var_601_to_fp16 = const()[name = tensor("op_601_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_99_cast_fp16 = mul(x = var_600_cast_fp16, y = var_601_to_fp16)[name = tensor("aw_chunk_99_cast_fp16")]; tensor var_604_equation_0 = const()[name = tensor("op_604_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_604_cast_fp16 = einsum(equation = var_604_equation_0, values = (var_366_cast_fp16, var_324_cast_fp16))[name = tensor("op_604_cast_fp16")]; tensor var_605_to_fp16 = const()[name = tensor("op_605_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_101_cast_fp16 = mul(x = var_604_cast_fp16, y = var_605_to_fp16)[name = tensor("aw_chunk_101_cast_fp16")]; tensor var_608_equation_0 = const()[name = tensor("op_608_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_608_cast_fp16 = einsum(equation = var_608_equation_0, values = (var_366_cast_fp16, var_325_cast_fp16))[name = tensor("op_608_cast_fp16")]; tensor var_609_to_fp16 = const()[name = tensor("op_609_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_103_cast_fp16 = mul(x = var_608_cast_fp16, y = var_609_to_fp16)[name = tensor("aw_chunk_103_cast_fp16")]; tensor var_612_equation_0 = const()[name = tensor("op_612_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_612_cast_fp16 = einsum(equation = var_612_equation_0, values = (var_366_cast_fp16, var_326_cast_fp16))[name = tensor("op_612_cast_fp16")]; tensor var_613_to_fp16 = const()[name = tensor("op_613_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_105_cast_fp16 = mul(x = var_612_cast_fp16, y = var_613_to_fp16)[name = tensor("aw_chunk_105_cast_fp16")]; tensor var_616_equation_0 = const()[name = tensor("op_616_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_616_cast_fp16 = einsum(equation = var_616_equation_0, values = (var_366_cast_fp16, var_327_cast_fp16))[name = tensor("op_616_cast_fp16")]; tensor var_617_to_fp16 = const()[name = tensor("op_617_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_107_cast_fp16 = mul(x = var_616_cast_fp16, y = var_617_to_fp16)[name = tensor("aw_chunk_107_cast_fp16")]; tensor var_620_equation_0 = const()[name = tensor("op_620_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_620_cast_fp16 = einsum(equation = var_620_equation_0, values = (var_366_cast_fp16, var_328_cast_fp16))[name = tensor("op_620_cast_fp16")]; tensor var_621_to_fp16 = const()[name = tensor("op_621_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_109_cast_fp16 = mul(x = var_620_cast_fp16, y = var_621_to_fp16)[name = tensor("aw_chunk_109_cast_fp16")]; tensor var_624_equation_0 = const()[name = tensor("op_624_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_624_cast_fp16 = einsum(equation = var_624_equation_0, values = (var_366_cast_fp16, var_329_cast_fp16))[name = tensor("op_624_cast_fp16")]; tensor var_625_to_fp16 = const()[name = tensor("op_625_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_111_cast_fp16 = mul(x = var_624_cast_fp16, y = var_625_to_fp16)[name = tensor("aw_chunk_111_cast_fp16")]; tensor var_628_equation_0 = const()[name = tensor("op_628_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_628_cast_fp16 = einsum(equation = var_628_equation_0, values = (var_370_cast_fp16, var_330_cast_fp16))[name = tensor("op_628_cast_fp16")]; tensor var_629_to_fp16 = const()[name = tensor("op_629_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_113_cast_fp16 = mul(x = var_628_cast_fp16, y = var_629_to_fp16)[name = tensor("aw_chunk_113_cast_fp16")]; tensor var_632_equation_0 = const()[name = tensor("op_632_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_632_cast_fp16 = einsum(equation = var_632_equation_0, values = (var_370_cast_fp16, var_331_cast_fp16))[name = tensor("op_632_cast_fp16")]; tensor var_633_to_fp16 = const()[name = tensor("op_633_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_115_cast_fp16 = mul(x = var_632_cast_fp16, y = var_633_to_fp16)[name = tensor("aw_chunk_115_cast_fp16")]; tensor var_636_equation_0 = const()[name = tensor("op_636_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_636_cast_fp16 = einsum(equation = var_636_equation_0, values = (var_370_cast_fp16, var_332_cast_fp16))[name = tensor("op_636_cast_fp16")]; tensor var_637_to_fp16 = const()[name = tensor("op_637_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_117_cast_fp16 = mul(x = var_636_cast_fp16, y = var_637_to_fp16)[name = tensor("aw_chunk_117_cast_fp16")]; tensor var_640_equation_0 = const()[name = tensor("op_640_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_640_cast_fp16 = einsum(equation = var_640_equation_0, values = (var_370_cast_fp16, var_333_cast_fp16))[name = tensor("op_640_cast_fp16")]; tensor var_641_to_fp16 = const()[name = tensor("op_641_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_119_cast_fp16 = mul(x = var_640_cast_fp16, y = var_641_to_fp16)[name = tensor("aw_chunk_119_cast_fp16")]; tensor var_644_equation_0 = const()[name = tensor("op_644_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_644_cast_fp16 = einsum(equation = var_644_equation_0, values = (var_370_cast_fp16, var_334_cast_fp16))[name = tensor("op_644_cast_fp16")]; tensor var_645_to_fp16 = const()[name = tensor("op_645_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_121_cast_fp16 = mul(x = var_644_cast_fp16, y = var_645_to_fp16)[name = tensor("aw_chunk_121_cast_fp16")]; tensor var_648_equation_0 = const()[name = tensor("op_648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_648_cast_fp16 = einsum(equation = var_648_equation_0, values = (var_370_cast_fp16, var_335_cast_fp16))[name = tensor("op_648_cast_fp16")]; tensor var_649_to_fp16 = const()[name = tensor("op_649_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_123_cast_fp16 = mul(x = var_648_cast_fp16, y = var_649_to_fp16)[name = tensor("aw_chunk_123_cast_fp16")]; tensor var_652_equation_0 = const()[name = tensor("op_652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_652_cast_fp16 = einsum(equation = var_652_equation_0, values = (var_370_cast_fp16, var_336_cast_fp16))[name = tensor("op_652_cast_fp16")]; tensor var_653_to_fp16 = const()[name = tensor("op_653_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_125_cast_fp16 = mul(x = var_652_cast_fp16, y = var_653_to_fp16)[name = tensor("aw_chunk_125_cast_fp16")]; tensor var_656_equation_0 = const()[name = tensor("op_656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_656_cast_fp16 = einsum(equation = var_656_equation_0, values = (var_370_cast_fp16, var_337_cast_fp16))[name = tensor("op_656_cast_fp16")]; tensor var_657_to_fp16 = const()[name = tensor("op_657_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_127_cast_fp16 = mul(x = var_656_cast_fp16, y = var_657_to_fp16)[name = tensor("aw_chunk_127_cast_fp16")]; tensor var_659_cast_fp16 = softmax(axis = var_123, x = aw_chunk_1_cast_fp16)[name = tensor("op_659_cast_fp16")]; tensor var_660_cast_fp16 = softmax(axis = var_123, x = aw_chunk_3_cast_fp16)[name = tensor("op_660_cast_fp16")]; tensor var_661_cast_fp16 = softmax(axis = var_123, x = aw_chunk_5_cast_fp16)[name = tensor("op_661_cast_fp16")]; tensor var_662_cast_fp16 = softmax(axis = var_123, x = aw_chunk_7_cast_fp16)[name = tensor("op_662_cast_fp16")]; tensor var_663_cast_fp16 = softmax(axis = var_123, x = aw_chunk_9_cast_fp16)[name = tensor("op_663_cast_fp16")]; tensor var_664_cast_fp16 = softmax(axis = var_123, x = aw_chunk_11_cast_fp16)[name = tensor("op_664_cast_fp16")]; tensor var_665_cast_fp16 = softmax(axis = var_123, x = aw_chunk_13_cast_fp16)[name = tensor("op_665_cast_fp16")]; tensor var_666_cast_fp16 = softmax(axis = var_123, x = aw_chunk_15_cast_fp16)[name = tensor("op_666_cast_fp16")]; tensor var_667_cast_fp16 = softmax(axis = var_123, x = aw_chunk_17_cast_fp16)[name = tensor("op_667_cast_fp16")]; tensor var_668_cast_fp16 = softmax(axis = var_123, x = aw_chunk_19_cast_fp16)[name = tensor("op_668_cast_fp16")]; tensor var_669_cast_fp16 = softmax(axis = var_123, x = aw_chunk_21_cast_fp16)[name = tensor("op_669_cast_fp16")]; tensor var_670_cast_fp16 = softmax(axis = var_123, x = aw_chunk_23_cast_fp16)[name = tensor("op_670_cast_fp16")]; tensor var_671_cast_fp16 = softmax(axis = var_123, x = aw_chunk_25_cast_fp16)[name = tensor("op_671_cast_fp16")]; tensor var_672_cast_fp16 = softmax(axis = var_123, x = aw_chunk_27_cast_fp16)[name = tensor("op_672_cast_fp16")]; tensor var_673_cast_fp16 = softmax(axis = var_123, x = aw_chunk_29_cast_fp16)[name = tensor("op_673_cast_fp16")]; tensor var_674_cast_fp16 = softmax(axis = var_123, x = aw_chunk_31_cast_fp16)[name = tensor("op_674_cast_fp16")]; tensor var_675_cast_fp16 = softmax(axis = var_123, x = aw_chunk_33_cast_fp16)[name = tensor("op_675_cast_fp16")]; tensor var_676_cast_fp16 = softmax(axis = var_123, x = aw_chunk_35_cast_fp16)[name = tensor("op_676_cast_fp16")]; tensor var_677_cast_fp16 = softmax(axis = var_123, x = aw_chunk_37_cast_fp16)[name = tensor("op_677_cast_fp16")]; tensor var_678_cast_fp16 = softmax(axis = var_123, x = aw_chunk_39_cast_fp16)[name = tensor("op_678_cast_fp16")]; tensor var_679_cast_fp16 = softmax(axis = var_123, x = aw_chunk_41_cast_fp16)[name = tensor("op_679_cast_fp16")]; tensor var_680_cast_fp16 = softmax(axis = var_123, x = aw_chunk_43_cast_fp16)[name = tensor("op_680_cast_fp16")]; tensor var_681_cast_fp16 = softmax(axis = var_123, x = aw_chunk_45_cast_fp16)[name = tensor("op_681_cast_fp16")]; tensor var_682_cast_fp16 = softmax(axis = var_123, x = aw_chunk_47_cast_fp16)[name = tensor("op_682_cast_fp16")]; tensor var_683_cast_fp16 = softmax(axis = var_123, x = aw_chunk_49_cast_fp16)[name = tensor("op_683_cast_fp16")]; tensor var_684_cast_fp16 = softmax(axis = var_123, x = aw_chunk_51_cast_fp16)[name = tensor("op_684_cast_fp16")]; tensor var_685_cast_fp16 = softmax(axis = var_123, x = aw_chunk_53_cast_fp16)[name = tensor("op_685_cast_fp16")]; tensor var_686_cast_fp16 = softmax(axis = var_123, x = aw_chunk_55_cast_fp16)[name = tensor("op_686_cast_fp16")]; tensor var_687_cast_fp16 = softmax(axis = var_123, x = aw_chunk_57_cast_fp16)[name = tensor("op_687_cast_fp16")]; tensor var_688_cast_fp16 = softmax(axis = var_123, x = aw_chunk_59_cast_fp16)[name = tensor("op_688_cast_fp16")]; tensor var_689_cast_fp16 = softmax(axis = var_123, x = aw_chunk_61_cast_fp16)[name = tensor("op_689_cast_fp16")]; tensor var_690_cast_fp16 = softmax(axis = var_123, x = aw_chunk_63_cast_fp16)[name = tensor("op_690_cast_fp16")]; tensor var_691_cast_fp16 = softmax(axis = var_123, x = aw_chunk_65_cast_fp16)[name = tensor("op_691_cast_fp16")]; tensor var_692_cast_fp16 = softmax(axis = var_123, x = aw_chunk_67_cast_fp16)[name = tensor("op_692_cast_fp16")]; tensor var_693_cast_fp16 = softmax(axis = var_123, x = aw_chunk_69_cast_fp16)[name = tensor("op_693_cast_fp16")]; tensor var_694_cast_fp16 = softmax(axis = var_123, x = aw_chunk_71_cast_fp16)[name = tensor("op_694_cast_fp16")]; tensor var_695_cast_fp16 = softmax(axis = var_123, x = aw_chunk_73_cast_fp16)[name = tensor("op_695_cast_fp16")]; tensor var_696_cast_fp16 = softmax(axis = var_123, x = aw_chunk_75_cast_fp16)[name = tensor("op_696_cast_fp16")]; tensor var_697_cast_fp16 = softmax(axis = var_123, x = aw_chunk_77_cast_fp16)[name = tensor("op_697_cast_fp16")]; tensor var_698_cast_fp16 = softmax(axis = var_123, x = aw_chunk_79_cast_fp16)[name = tensor("op_698_cast_fp16")]; tensor var_699_cast_fp16 = softmax(axis = var_123, x = aw_chunk_81_cast_fp16)[name = tensor("op_699_cast_fp16")]; tensor var_700_cast_fp16 = softmax(axis = var_123, x = aw_chunk_83_cast_fp16)[name = tensor("op_700_cast_fp16")]; tensor var_701_cast_fp16 = softmax(axis = var_123, x = aw_chunk_85_cast_fp16)[name = tensor("op_701_cast_fp16")]; tensor var_702_cast_fp16 = softmax(axis = var_123, x = aw_chunk_87_cast_fp16)[name = tensor("op_702_cast_fp16")]; tensor var_703_cast_fp16 = softmax(axis = var_123, x = aw_chunk_89_cast_fp16)[name = tensor("op_703_cast_fp16")]; tensor var_704_cast_fp16 = softmax(axis = var_123, x = aw_chunk_91_cast_fp16)[name = tensor("op_704_cast_fp16")]; tensor var_705_cast_fp16 = softmax(axis = var_123, x = aw_chunk_93_cast_fp16)[name = tensor("op_705_cast_fp16")]; tensor var_706_cast_fp16 = softmax(axis = var_123, x = aw_chunk_95_cast_fp16)[name = tensor("op_706_cast_fp16")]; tensor var_707_cast_fp16 = softmax(axis = var_123, x = aw_chunk_97_cast_fp16)[name = tensor("op_707_cast_fp16")]; tensor var_708_cast_fp16 = softmax(axis = var_123, x = aw_chunk_99_cast_fp16)[name = tensor("op_708_cast_fp16")]; tensor var_709_cast_fp16 = softmax(axis = var_123, x = aw_chunk_101_cast_fp16)[name = tensor("op_709_cast_fp16")]; tensor var_710_cast_fp16 = softmax(axis = var_123, x = aw_chunk_103_cast_fp16)[name = tensor("op_710_cast_fp16")]; tensor var_711_cast_fp16 = softmax(axis = var_123, x = aw_chunk_105_cast_fp16)[name = tensor("op_711_cast_fp16")]; tensor var_712_cast_fp16 = softmax(axis = var_123, x = aw_chunk_107_cast_fp16)[name = tensor("op_712_cast_fp16")]; tensor var_713_cast_fp16 = softmax(axis = var_123, x = aw_chunk_109_cast_fp16)[name = tensor("op_713_cast_fp16")]; tensor var_714_cast_fp16 = softmax(axis = var_123, x = aw_chunk_111_cast_fp16)[name = tensor("op_714_cast_fp16")]; tensor var_715_cast_fp16 = softmax(axis = var_123, x = aw_chunk_113_cast_fp16)[name = tensor("op_715_cast_fp16")]; tensor var_716_cast_fp16 = softmax(axis = var_123, x = aw_chunk_115_cast_fp16)[name = tensor("op_716_cast_fp16")]; tensor var_717_cast_fp16 = softmax(axis = var_123, x = aw_chunk_117_cast_fp16)[name = tensor("op_717_cast_fp16")]; tensor var_718_cast_fp16 = softmax(axis = var_123, x = aw_chunk_119_cast_fp16)[name = tensor("op_718_cast_fp16")]; tensor var_719_cast_fp16 = softmax(axis = var_123, x = aw_chunk_121_cast_fp16)[name = tensor("op_719_cast_fp16")]; tensor var_720_cast_fp16 = softmax(axis = var_123, x = aw_chunk_123_cast_fp16)[name = tensor("op_720_cast_fp16")]; tensor var_721_cast_fp16 = softmax(axis = var_123, x = aw_chunk_125_cast_fp16)[name = tensor("op_721_cast_fp16")]; tensor var_722_cast_fp16 = softmax(axis = var_123, x = aw_chunk_127_cast_fp16)[name = tensor("op_722_cast_fp16")]; tensor var_724_equation_0 = const()[name = tensor("op_724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_724_cast_fp16 = einsum(equation = var_724_equation_0, values = (var_372_cast_fp16, var_659_cast_fp16))[name = tensor("op_724_cast_fp16")]; tensor var_726_equation_0 = const()[name = tensor("op_726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_726_cast_fp16 = einsum(equation = var_726_equation_0, values = (var_372_cast_fp16, var_660_cast_fp16))[name = tensor("op_726_cast_fp16")]; tensor var_728_equation_0 = const()[name = tensor("op_728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_728_cast_fp16 = einsum(equation = var_728_equation_0, values = (var_372_cast_fp16, var_661_cast_fp16))[name = tensor("op_728_cast_fp16")]; tensor var_730_equation_0 = const()[name = tensor("op_730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_730_cast_fp16 = einsum(equation = var_730_equation_0, values = (var_372_cast_fp16, var_662_cast_fp16))[name = tensor("op_730_cast_fp16")]; tensor var_732_equation_0 = const()[name = tensor("op_732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_732_cast_fp16 = einsum(equation = var_732_equation_0, values = (var_372_cast_fp16, var_663_cast_fp16))[name = tensor("op_732_cast_fp16")]; tensor var_734_equation_0 = const()[name = tensor("op_734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_734_cast_fp16 = einsum(equation = var_734_equation_0, values = (var_372_cast_fp16, var_664_cast_fp16))[name = tensor("op_734_cast_fp16")]; tensor var_736_equation_0 = const()[name = tensor("op_736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_736_cast_fp16 = einsum(equation = var_736_equation_0, values = (var_372_cast_fp16, var_665_cast_fp16))[name = tensor("op_736_cast_fp16")]; tensor var_738_equation_0 = const()[name = tensor("op_738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_738_cast_fp16 = einsum(equation = var_738_equation_0, values = (var_372_cast_fp16, var_666_cast_fp16))[name = tensor("op_738_cast_fp16")]; tensor var_740_equation_0 = const()[name = tensor("op_740_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_740_cast_fp16 = einsum(equation = var_740_equation_0, values = (var_376_cast_fp16, var_667_cast_fp16))[name = tensor("op_740_cast_fp16")]; tensor var_742_equation_0 = const()[name = tensor("op_742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_742_cast_fp16 = einsum(equation = var_742_equation_0, values = (var_376_cast_fp16, var_668_cast_fp16))[name = tensor("op_742_cast_fp16")]; tensor var_744_equation_0 = const()[name = tensor("op_744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_744_cast_fp16 = einsum(equation = var_744_equation_0, values = (var_376_cast_fp16, var_669_cast_fp16))[name = tensor("op_744_cast_fp16")]; tensor var_746_equation_0 = const()[name = tensor("op_746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_746_cast_fp16 = einsum(equation = var_746_equation_0, values = (var_376_cast_fp16, var_670_cast_fp16))[name = tensor("op_746_cast_fp16")]; tensor var_748_equation_0 = const()[name = tensor("op_748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_748_cast_fp16 = einsum(equation = var_748_equation_0, values = (var_376_cast_fp16, var_671_cast_fp16))[name = tensor("op_748_cast_fp16")]; tensor var_750_equation_0 = const()[name = tensor("op_750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_750_cast_fp16 = einsum(equation = var_750_equation_0, values = (var_376_cast_fp16, var_672_cast_fp16))[name = tensor("op_750_cast_fp16")]; tensor var_752_equation_0 = const()[name = tensor("op_752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_752_cast_fp16 = einsum(equation = var_752_equation_0, values = (var_376_cast_fp16, var_673_cast_fp16))[name = tensor("op_752_cast_fp16")]; tensor var_754_equation_0 = const()[name = tensor("op_754_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_754_cast_fp16 = einsum(equation = var_754_equation_0, values = (var_376_cast_fp16, var_674_cast_fp16))[name = tensor("op_754_cast_fp16")]; tensor var_756_equation_0 = const()[name = tensor("op_756_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_756_cast_fp16 = einsum(equation = var_756_equation_0, values = (var_380_cast_fp16, var_675_cast_fp16))[name = tensor("op_756_cast_fp16")]; tensor var_758_equation_0 = const()[name = tensor("op_758_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_758_cast_fp16 = einsum(equation = var_758_equation_0, values = (var_380_cast_fp16, var_676_cast_fp16))[name = tensor("op_758_cast_fp16")]; tensor var_760_equation_0 = const()[name = tensor("op_760_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_760_cast_fp16 = einsum(equation = var_760_equation_0, values = (var_380_cast_fp16, var_677_cast_fp16))[name = tensor("op_760_cast_fp16")]; tensor var_762_equation_0 = const()[name = tensor("op_762_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_762_cast_fp16 = einsum(equation = var_762_equation_0, values = (var_380_cast_fp16, var_678_cast_fp16))[name = tensor("op_762_cast_fp16")]; tensor var_764_equation_0 = const()[name = tensor("op_764_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_764_cast_fp16 = einsum(equation = var_764_equation_0, values = (var_380_cast_fp16, var_679_cast_fp16))[name = tensor("op_764_cast_fp16")]; tensor var_766_equation_0 = const()[name = tensor("op_766_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_766_cast_fp16 = einsum(equation = var_766_equation_0, values = (var_380_cast_fp16, var_680_cast_fp16))[name = tensor("op_766_cast_fp16")]; tensor var_768_equation_0 = const()[name = tensor("op_768_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_768_cast_fp16 = einsum(equation = var_768_equation_0, values = (var_380_cast_fp16, var_681_cast_fp16))[name = tensor("op_768_cast_fp16")]; tensor var_770_equation_0 = const()[name = tensor("op_770_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_770_cast_fp16 = einsum(equation = var_770_equation_0, values = (var_380_cast_fp16, var_682_cast_fp16))[name = tensor("op_770_cast_fp16")]; tensor var_772_equation_0 = const()[name = tensor("op_772_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_772_cast_fp16 = einsum(equation = var_772_equation_0, values = (var_384_cast_fp16, var_683_cast_fp16))[name = tensor("op_772_cast_fp16")]; tensor var_774_equation_0 = const()[name = tensor("op_774_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_774_cast_fp16 = einsum(equation = var_774_equation_0, values = (var_384_cast_fp16, var_684_cast_fp16))[name = tensor("op_774_cast_fp16")]; tensor var_776_equation_0 = const()[name = tensor("op_776_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_776_cast_fp16 = einsum(equation = var_776_equation_0, values = (var_384_cast_fp16, var_685_cast_fp16))[name = tensor("op_776_cast_fp16")]; tensor var_778_equation_0 = const()[name = tensor("op_778_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_778_cast_fp16 = einsum(equation = var_778_equation_0, values = (var_384_cast_fp16, var_686_cast_fp16))[name = tensor("op_778_cast_fp16")]; tensor var_780_equation_0 = const()[name = tensor("op_780_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_780_cast_fp16 = einsum(equation = var_780_equation_0, values = (var_384_cast_fp16, var_687_cast_fp16))[name = tensor("op_780_cast_fp16")]; tensor var_782_equation_0 = const()[name = tensor("op_782_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_782_cast_fp16 = einsum(equation = var_782_equation_0, values = (var_384_cast_fp16, var_688_cast_fp16))[name = tensor("op_782_cast_fp16")]; tensor var_784_equation_0 = const()[name = tensor("op_784_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_784_cast_fp16 = einsum(equation = var_784_equation_0, values = (var_384_cast_fp16, var_689_cast_fp16))[name = tensor("op_784_cast_fp16")]; tensor var_786_equation_0 = const()[name = tensor("op_786_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_786_cast_fp16 = einsum(equation = var_786_equation_0, values = (var_384_cast_fp16, var_690_cast_fp16))[name = tensor("op_786_cast_fp16")]; tensor var_788_equation_0 = const()[name = tensor("op_788_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_788_cast_fp16 = einsum(equation = var_788_equation_0, values = (var_388_cast_fp16, var_691_cast_fp16))[name = tensor("op_788_cast_fp16")]; tensor var_790_equation_0 = const()[name = tensor("op_790_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_790_cast_fp16 = einsum(equation = var_790_equation_0, values = (var_388_cast_fp16, var_692_cast_fp16))[name = tensor("op_790_cast_fp16")]; tensor var_792_equation_0 = const()[name = tensor("op_792_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_792_cast_fp16 = einsum(equation = var_792_equation_0, values = (var_388_cast_fp16, var_693_cast_fp16))[name = tensor("op_792_cast_fp16")]; tensor var_794_equation_0 = const()[name = tensor("op_794_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_794_cast_fp16 = einsum(equation = var_794_equation_0, values = (var_388_cast_fp16, var_694_cast_fp16))[name = tensor("op_794_cast_fp16")]; tensor var_796_equation_0 = const()[name = tensor("op_796_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_796_cast_fp16 = einsum(equation = var_796_equation_0, values = (var_388_cast_fp16, var_695_cast_fp16))[name = tensor("op_796_cast_fp16")]; tensor var_798_equation_0 = const()[name = tensor("op_798_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_798_cast_fp16 = einsum(equation = var_798_equation_0, values = (var_388_cast_fp16, var_696_cast_fp16))[name = tensor("op_798_cast_fp16")]; tensor var_800_equation_0 = const()[name = tensor("op_800_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_800_cast_fp16 = einsum(equation = var_800_equation_0, values = (var_388_cast_fp16, var_697_cast_fp16))[name = tensor("op_800_cast_fp16")]; tensor var_802_equation_0 = const()[name = tensor("op_802_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_802_cast_fp16 = einsum(equation = var_802_equation_0, values = (var_388_cast_fp16, var_698_cast_fp16))[name = tensor("op_802_cast_fp16")]; tensor var_804_equation_0 = const()[name = tensor("op_804_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_804_cast_fp16 = einsum(equation = var_804_equation_0, values = (var_392_cast_fp16, var_699_cast_fp16))[name = tensor("op_804_cast_fp16")]; tensor var_806_equation_0 = const()[name = tensor("op_806_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_806_cast_fp16 = einsum(equation = var_806_equation_0, values = (var_392_cast_fp16, var_700_cast_fp16))[name = tensor("op_806_cast_fp16")]; tensor var_808_equation_0 = const()[name = tensor("op_808_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_808_cast_fp16 = einsum(equation = var_808_equation_0, values = (var_392_cast_fp16, var_701_cast_fp16))[name = tensor("op_808_cast_fp16")]; tensor var_810_equation_0 = const()[name = tensor("op_810_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_810_cast_fp16 = einsum(equation = var_810_equation_0, values = (var_392_cast_fp16, var_702_cast_fp16))[name = tensor("op_810_cast_fp16")]; tensor var_812_equation_0 = const()[name = tensor("op_812_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_812_cast_fp16 = einsum(equation = var_812_equation_0, values = (var_392_cast_fp16, var_703_cast_fp16))[name = tensor("op_812_cast_fp16")]; tensor var_814_equation_0 = const()[name = tensor("op_814_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_814_cast_fp16 = einsum(equation = var_814_equation_0, values = (var_392_cast_fp16, var_704_cast_fp16))[name = tensor("op_814_cast_fp16")]; tensor var_816_equation_0 = const()[name = tensor("op_816_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_816_cast_fp16 = einsum(equation = var_816_equation_0, values = (var_392_cast_fp16, var_705_cast_fp16))[name = tensor("op_816_cast_fp16")]; tensor var_818_equation_0 = const()[name = tensor("op_818_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_818_cast_fp16 = einsum(equation = var_818_equation_0, values = (var_392_cast_fp16, var_706_cast_fp16))[name = tensor("op_818_cast_fp16")]; tensor var_820_equation_0 = const()[name = tensor("op_820_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_820_cast_fp16 = einsum(equation = var_820_equation_0, values = (var_396_cast_fp16, var_707_cast_fp16))[name = tensor("op_820_cast_fp16")]; tensor var_822_equation_0 = const()[name = tensor("op_822_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_822_cast_fp16 = einsum(equation = var_822_equation_0, values = (var_396_cast_fp16, var_708_cast_fp16))[name = tensor("op_822_cast_fp16")]; tensor var_824_equation_0 = const()[name = tensor("op_824_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_824_cast_fp16 = einsum(equation = var_824_equation_0, values = (var_396_cast_fp16, var_709_cast_fp16))[name = tensor("op_824_cast_fp16")]; tensor var_826_equation_0 = const()[name = tensor("op_826_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_826_cast_fp16 = einsum(equation = var_826_equation_0, values = (var_396_cast_fp16, var_710_cast_fp16))[name = tensor("op_826_cast_fp16")]; tensor var_828_equation_0 = const()[name = tensor("op_828_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_828_cast_fp16 = einsum(equation = var_828_equation_0, values = (var_396_cast_fp16, var_711_cast_fp16))[name = tensor("op_828_cast_fp16")]; tensor var_830_equation_0 = const()[name = tensor("op_830_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_830_cast_fp16 = einsum(equation = var_830_equation_0, values = (var_396_cast_fp16, var_712_cast_fp16))[name = tensor("op_830_cast_fp16")]; tensor var_832_equation_0 = const()[name = tensor("op_832_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_832_cast_fp16 = einsum(equation = var_832_equation_0, values = (var_396_cast_fp16, var_713_cast_fp16))[name = tensor("op_832_cast_fp16")]; tensor var_834_equation_0 = const()[name = tensor("op_834_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_834_cast_fp16 = einsum(equation = var_834_equation_0, values = (var_396_cast_fp16, var_714_cast_fp16))[name = tensor("op_834_cast_fp16")]; tensor var_836_equation_0 = const()[name = tensor("op_836_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_836_cast_fp16 = einsum(equation = var_836_equation_0, values = (var_400_cast_fp16, var_715_cast_fp16))[name = tensor("op_836_cast_fp16")]; tensor var_838_equation_0 = const()[name = tensor("op_838_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_838_cast_fp16 = einsum(equation = var_838_equation_0, values = (var_400_cast_fp16, var_716_cast_fp16))[name = tensor("op_838_cast_fp16")]; tensor var_840_equation_0 = const()[name = tensor("op_840_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_840_cast_fp16 = einsum(equation = var_840_equation_0, values = (var_400_cast_fp16, var_717_cast_fp16))[name = tensor("op_840_cast_fp16")]; tensor var_842_equation_0 = const()[name = tensor("op_842_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_842_cast_fp16 = einsum(equation = var_842_equation_0, values = (var_400_cast_fp16, var_718_cast_fp16))[name = tensor("op_842_cast_fp16")]; tensor var_844_equation_0 = const()[name = tensor("op_844_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_844_cast_fp16 = einsum(equation = var_844_equation_0, values = (var_400_cast_fp16, var_719_cast_fp16))[name = tensor("op_844_cast_fp16")]; tensor var_846_equation_0 = const()[name = tensor("op_846_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_846_cast_fp16 = einsum(equation = var_846_equation_0, values = (var_400_cast_fp16, var_720_cast_fp16))[name = tensor("op_846_cast_fp16")]; tensor var_848_equation_0 = const()[name = tensor("op_848_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_848_cast_fp16 = einsum(equation = var_848_equation_0, values = (var_400_cast_fp16, var_721_cast_fp16))[name = tensor("op_848_cast_fp16")]; tensor var_850_equation_0 = const()[name = tensor("op_850_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_850_cast_fp16 = einsum(equation = var_850_equation_0, values = (var_400_cast_fp16, var_722_cast_fp16))[name = tensor("op_850_cast_fp16")]; tensor var_852_interleave_0 = const()[name = tensor("op_852_interleave_0"), val = tensor(false)]; tensor var_852_cast_fp16 = concat(axis = var_95, interleave = var_852_interleave_0, values = (var_724_cast_fp16, var_726_cast_fp16, var_728_cast_fp16, var_730_cast_fp16, var_732_cast_fp16, var_734_cast_fp16, var_736_cast_fp16, var_738_cast_fp16))[name = tensor("op_852_cast_fp16")]; tensor var_854_interleave_0 = const()[name = tensor("op_854_interleave_0"), val = tensor(false)]; tensor var_854_cast_fp16 = concat(axis = var_95, interleave = var_854_interleave_0, values = (var_740_cast_fp16, var_742_cast_fp16, var_744_cast_fp16, var_746_cast_fp16, var_748_cast_fp16, var_750_cast_fp16, var_752_cast_fp16, var_754_cast_fp16))[name = tensor("op_854_cast_fp16")]; tensor var_856_interleave_0 = const()[name = tensor("op_856_interleave_0"), val = tensor(false)]; tensor var_856_cast_fp16 = concat(axis = var_95, interleave = var_856_interleave_0, values = (var_756_cast_fp16, var_758_cast_fp16, var_760_cast_fp16, var_762_cast_fp16, var_764_cast_fp16, var_766_cast_fp16, var_768_cast_fp16, var_770_cast_fp16))[name = tensor("op_856_cast_fp16")]; tensor var_858_interleave_0 = const()[name = tensor("op_858_interleave_0"), val = tensor(false)]; tensor var_858_cast_fp16 = concat(axis = var_95, interleave = var_858_interleave_0, values = (var_772_cast_fp16, var_774_cast_fp16, var_776_cast_fp16, var_778_cast_fp16, var_780_cast_fp16, var_782_cast_fp16, var_784_cast_fp16, var_786_cast_fp16))[name = tensor("op_858_cast_fp16")]; tensor var_860_interleave_0 = const()[name = tensor("op_860_interleave_0"), val = tensor(false)]; tensor var_860_cast_fp16 = concat(axis = var_95, interleave = var_860_interleave_0, values = (var_788_cast_fp16, var_790_cast_fp16, var_792_cast_fp16, var_794_cast_fp16, var_796_cast_fp16, var_798_cast_fp16, var_800_cast_fp16, var_802_cast_fp16))[name = tensor("op_860_cast_fp16")]; tensor var_862_interleave_0 = const()[name = tensor("op_862_interleave_0"), val = tensor(false)]; tensor var_862_cast_fp16 = concat(axis = var_95, interleave = var_862_interleave_0, values = (var_804_cast_fp16, var_806_cast_fp16, var_808_cast_fp16, var_810_cast_fp16, var_812_cast_fp16, var_814_cast_fp16, var_816_cast_fp16, var_818_cast_fp16))[name = tensor("op_862_cast_fp16")]; tensor var_864_interleave_0 = const()[name = tensor("op_864_interleave_0"), val = tensor(false)]; tensor var_864_cast_fp16 = concat(axis = var_95, interleave = var_864_interleave_0, values = (var_820_cast_fp16, var_822_cast_fp16, var_824_cast_fp16, var_826_cast_fp16, var_828_cast_fp16, var_830_cast_fp16, var_832_cast_fp16, var_834_cast_fp16))[name = tensor("op_864_cast_fp16")]; tensor var_866_interleave_0 = const()[name = tensor("op_866_interleave_0"), val = tensor(false)]; tensor var_866_cast_fp16 = concat(axis = var_95, interleave = var_866_interleave_0, values = (var_836_cast_fp16, var_838_cast_fp16, var_840_cast_fp16, var_842_cast_fp16, var_844_cast_fp16, var_846_cast_fp16, var_848_cast_fp16, var_850_cast_fp16))[name = tensor("op_866_cast_fp16")]; tensor input_25_interleave_0 = const()[name = tensor("input_25_interleave_0"), val = tensor(false)]; tensor input_25_cast_fp16 = concat(axis = var_123, interleave = input_25_interleave_0, values = (var_852_cast_fp16, var_854_cast_fp16, var_856_cast_fp16, var_858_cast_fp16, var_860_cast_fp16, var_862_cast_fp16, var_864_cast_fp16, var_866_cast_fp16))[name = tensor("input_25_cast_fp16")]; tensor var_872 = const()[name = tensor("op_872"), val = tensor([1, 1])]; tensor var_874 = const()[name = tensor("op_874"), val = tensor([1, 1])]; tensor var_876_pad_type_0 = const()[name = tensor("op_876_pad_type_0"), val = tensor("custom")]; tensor var_876_pad_0 = const()[name = tensor("op_876_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9460736)))]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9665600)))]; tensor var_876_cast_fp16 = conv(bias = down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_874, groups = var_123, pad = var_876_pad_0, pad_type = var_876_pad_type_0, strides = var_872, weight = down_blocks_0_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("op_876_cast_fp16")]; tensor inputs_3_cast_fp16 = add(x = var_876_cast_fp16, y = inputs_1_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; tensor hidden_states_11_axes_0 = const()[name = tensor("hidden_states_11_axes_0"), val = tensor([1])]; tensor hidden_states_11_gamma_0_to_fp16 = const()[name = tensor("hidden_states_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9666304)))]; tensor hidden_states_11_beta_0_to_fp16 = const()[name = tensor("hidden_states_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9667008)))]; tensor var_886_to_fp16 = const()[name = tensor("op_886_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_11_cast_fp16 = layer_norm(axes = hidden_states_11_axes_0, beta = hidden_states_11_beta_0_to_fp16, epsilon = var_886_to_fp16, gamma = hidden_states_11_gamma_0_to_fp16, x = inputs_3_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; tensor var_901 = const()[name = tensor("op_901"), val = tensor([1, 1])]; tensor var_903 = const()[name = tensor("op_903"), val = tensor([1, 1])]; tensor q_3_pad_type_0 = const()[name = tensor("q_3_pad_type_0"), val = tensor("custom")]; tensor q_3_pad_0 = const()[name = tensor("q_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9667712)))]; tensor q_3_cast_fp16 = conv(dilations = var_903, groups = var_123, pad = q_3_pad_0, pad_type = q_3_pad_type_0, strides = var_901, weight = down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_11_cast_fp16)[name = tensor("q_3_cast_fp16")]; tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, 1])]; tensor var_909 = const()[name = tensor("op_909"), val = tensor([1, 1])]; tensor k_5_pad_type_0 = const()[name = tensor("k_5_pad_type_0"), val = tensor("custom")]; tensor k_5_pad_0 = const()[name = tensor("k_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9872576)))]; tensor k_5_cast_fp16 = conv(dilations = var_909, groups = var_123, pad = k_5_pad_0, pad_type = k_5_pad_type_0, strides = var_907, weight = down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_5_cast_fp16")]; tensor var_913 = const()[name = tensor("op_913"), val = tensor([1, 1])]; tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 1])]; tensor v_3_pad_type_0 = const()[name = tensor("v_3_pad_type_0"), val = tensor("custom")]; tensor v_3_pad_0 = const()[name = tensor("v_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10364160)))]; tensor v_3_cast_fp16 = conv(dilations = var_915, groups = var_123, pad = v_3_pad_0, pad_type = v_3_pad_type_0, strides = var_913, weight = down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_3_cast_fp16")]; tensor var_919_begin_0 = const()[name = tensor("op_919_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_919_end_0 = const()[name = tensor("op_919_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_919_end_mask_0 = const()[name = tensor("op_919_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_919_cast_fp16 = slice_by_index(begin = var_919_begin_0, end = var_919_end_0, end_mask = var_919_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_919_cast_fp16")]; tensor var_923_begin_0 = const()[name = tensor("op_923_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_923_end_0 = const()[name = tensor("op_923_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_923_end_mask_0 = const()[name = tensor("op_923_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_923_cast_fp16 = slice_by_index(begin = var_923_begin_0, end = var_923_end_0, end_mask = var_923_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_923_cast_fp16")]; tensor var_927_begin_0 = const()[name = tensor("op_927_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_927_end_0 = const()[name = tensor("op_927_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_927_end_mask_0 = const()[name = tensor("op_927_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_927_cast_fp16 = slice_by_index(begin = var_927_begin_0, end = var_927_end_0, end_mask = var_927_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_927_cast_fp16")]; tensor var_931_begin_0 = const()[name = tensor("op_931_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_931_end_0 = const()[name = tensor("op_931_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_931_end_mask_0 = const()[name = tensor("op_931_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_931_cast_fp16 = slice_by_index(begin = var_931_begin_0, end = var_931_end_0, end_mask = var_931_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_931_cast_fp16")]; tensor var_935_begin_0 = const()[name = tensor("op_935_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_935_end_0 = const()[name = tensor("op_935_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_935_end_mask_0 = const()[name = tensor("op_935_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_935_cast_fp16 = slice_by_index(begin = var_935_begin_0, end = var_935_end_0, end_mask = var_935_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_935_cast_fp16")]; tensor var_939_begin_0 = const()[name = tensor("op_939_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_939_end_0 = const()[name = tensor("op_939_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_939_end_mask_0 = const()[name = tensor("op_939_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_939_cast_fp16 = slice_by_index(begin = var_939_begin_0, end = var_939_end_0, end_mask = var_939_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_939_cast_fp16")]; tensor var_943_begin_0 = const()[name = tensor("op_943_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_943_end_0 = const()[name = tensor("op_943_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_943_end_mask_0 = const()[name = tensor("op_943_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_943_cast_fp16 = slice_by_index(begin = var_943_begin_0, end = var_943_end_0, end_mask = var_943_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_943_cast_fp16")]; tensor var_947_begin_0 = const()[name = tensor("op_947_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_947_end_0 = const()[name = tensor("op_947_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_947_end_mask_0 = const()[name = tensor("op_947_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_947_cast_fp16 = slice_by_index(begin = var_947_begin_0, end = var_947_end_0, end_mask = var_947_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_947_cast_fp16")]; tensor var_950_begin_0 = const()[name = tensor("op_950_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_950_end_0 = const()[name = tensor("op_950_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_950_end_mask_0 = const()[name = tensor("op_950_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_950_cast_fp16 = slice_by_index(begin = var_950_begin_0, end = var_950_end_0, end_mask = var_950_end_mask_0, x = var_919_cast_fp16)[name = tensor("op_950_cast_fp16")]; tensor var_951_begin_0 = const()[name = tensor("op_951_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_951_end_0 = const()[name = tensor("op_951_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_951_end_mask_0 = const()[name = tensor("op_951_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_951_cast_fp16 = slice_by_index(begin = var_951_begin_0, end = var_951_end_0, end_mask = var_951_end_mask_0, x = var_919_cast_fp16)[name = tensor("op_951_cast_fp16")]; tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = var_919_cast_fp16)[name = tensor("op_952_cast_fp16")]; tensor var_953_begin_0 = const()[name = tensor("op_953_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_953_end_0 = const()[name = tensor("op_953_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_953_end_mask_0 = const()[name = tensor("op_953_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_953_cast_fp16 = slice_by_index(begin = var_953_begin_0, end = var_953_end_0, end_mask = var_953_end_mask_0, x = var_919_cast_fp16)[name = tensor("op_953_cast_fp16")]; tensor var_954_begin_0 = const()[name = tensor("op_954_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_954_end_0 = const()[name = tensor("op_954_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_954_end_mask_0 = const()[name = tensor("op_954_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_954_cast_fp16 = slice_by_index(begin = var_954_begin_0, end = var_954_end_0, end_mask = var_954_end_mask_0, x = var_919_cast_fp16)[name = tensor("op_954_cast_fp16")]; tensor var_955_begin_0 = const()[name = tensor("op_955_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_955_end_0 = const()[name = tensor("op_955_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_955_end_mask_0 = const()[name = tensor("op_955_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_955_cast_fp16 = slice_by_index(begin = var_955_begin_0, end = var_955_end_0, end_mask = var_955_end_mask_0, x = var_919_cast_fp16)[name = tensor("op_955_cast_fp16")]; tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = var_919_cast_fp16)[name = tensor("op_956_cast_fp16")]; tensor var_957_begin_0 = const()[name = tensor("op_957_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_957_end_0 = const()[name = tensor("op_957_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_957_end_mask_0 = const()[name = tensor("op_957_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_957_cast_fp16 = slice_by_index(begin = var_957_begin_0, end = var_957_end_0, end_mask = var_957_end_mask_0, x = var_919_cast_fp16)[name = tensor("op_957_cast_fp16")]; tensor var_958_begin_0 = const()[name = tensor("op_958_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_958_end_0 = const()[name = tensor("op_958_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_958_end_mask_0 = const()[name = tensor("op_958_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_958_cast_fp16 = slice_by_index(begin = var_958_begin_0, end = var_958_end_0, end_mask = var_958_end_mask_0, x = var_923_cast_fp16)[name = tensor("op_958_cast_fp16")]; tensor var_959_begin_0 = const()[name = tensor("op_959_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_959_end_0 = const()[name = tensor("op_959_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_959_end_mask_0 = const()[name = tensor("op_959_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_959_cast_fp16 = slice_by_index(begin = var_959_begin_0, end = var_959_end_0, end_mask = var_959_end_mask_0, x = var_923_cast_fp16)[name = tensor("op_959_cast_fp16")]; tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = var_923_cast_fp16)[name = tensor("op_960_cast_fp16")]; tensor var_961_begin_0 = const()[name = tensor("op_961_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_961_end_0 = const()[name = tensor("op_961_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_961_end_mask_0 = const()[name = tensor("op_961_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_961_cast_fp16 = slice_by_index(begin = var_961_begin_0, end = var_961_end_0, end_mask = var_961_end_mask_0, x = var_923_cast_fp16)[name = tensor("op_961_cast_fp16")]; tensor var_962_begin_0 = const()[name = tensor("op_962_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_962_end_0 = const()[name = tensor("op_962_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_962_end_mask_0 = const()[name = tensor("op_962_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_962_cast_fp16 = slice_by_index(begin = var_962_begin_0, end = var_962_end_0, end_mask = var_962_end_mask_0, x = var_923_cast_fp16)[name = tensor("op_962_cast_fp16")]; tensor var_963_begin_0 = const()[name = tensor("op_963_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_963_end_0 = const()[name = tensor("op_963_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_963_end_mask_0 = const()[name = tensor("op_963_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_963_cast_fp16 = slice_by_index(begin = var_963_begin_0, end = var_963_end_0, end_mask = var_963_end_mask_0, x = var_923_cast_fp16)[name = tensor("op_963_cast_fp16")]; tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = var_923_cast_fp16)[name = tensor("op_964_cast_fp16")]; tensor var_965_begin_0 = const()[name = tensor("op_965_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_965_end_0 = const()[name = tensor("op_965_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_965_end_mask_0 = const()[name = tensor("op_965_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_965_cast_fp16 = slice_by_index(begin = var_965_begin_0, end = var_965_end_0, end_mask = var_965_end_mask_0, x = var_923_cast_fp16)[name = tensor("op_965_cast_fp16")]; tensor var_966_begin_0 = const()[name = tensor("op_966_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_966_end_0 = const()[name = tensor("op_966_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_966_end_mask_0 = const()[name = tensor("op_966_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_966_cast_fp16 = slice_by_index(begin = var_966_begin_0, end = var_966_end_0, end_mask = var_966_end_mask_0, x = var_927_cast_fp16)[name = tensor("op_966_cast_fp16")]; tensor var_967_begin_0 = const()[name = tensor("op_967_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_967_end_0 = const()[name = tensor("op_967_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_967_end_mask_0 = const()[name = tensor("op_967_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_967_cast_fp16 = slice_by_index(begin = var_967_begin_0, end = var_967_end_0, end_mask = var_967_end_mask_0, x = var_927_cast_fp16)[name = tensor("op_967_cast_fp16")]; tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = var_927_cast_fp16)[name = tensor("op_968_cast_fp16")]; tensor var_969_begin_0 = const()[name = tensor("op_969_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_969_end_0 = const()[name = tensor("op_969_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_969_end_mask_0 = const()[name = tensor("op_969_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_969_cast_fp16 = slice_by_index(begin = var_969_begin_0, end = var_969_end_0, end_mask = var_969_end_mask_0, x = var_927_cast_fp16)[name = tensor("op_969_cast_fp16")]; tensor var_970_begin_0 = const()[name = tensor("op_970_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_970_end_0 = const()[name = tensor("op_970_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_970_end_mask_0 = const()[name = tensor("op_970_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_970_cast_fp16 = slice_by_index(begin = var_970_begin_0, end = var_970_end_0, end_mask = var_970_end_mask_0, x = var_927_cast_fp16)[name = tensor("op_970_cast_fp16")]; tensor var_971_begin_0 = const()[name = tensor("op_971_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_971_end_0 = const()[name = tensor("op_971_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_971_end_mask_0 = const()[name = tensor("op_971_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_971_cast_fp16 = slice_by_index(begin = var_971_begin_0, end = var_971_end_0, end_mask = var_971_end_mask_0, x = var_927_cast_fp16)[name = tensor("op_971_cast_fp16")]; tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = var_927_cast_fp16)[name = tensor("op_972_cast_fp16")]; tensor var_973_begin_0 = const()[name = tensor("op_973_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_973_end_0 = const()[name = tensor("op_973_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_973_end_mask_0 = const()[name = tensor("op_973_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_973_cast_fp16 = slice_by_index(begin = var_973_begin_0, end = var_973_end_0, end_mask = var_973_end_mask_0, x = var_927_cast_fp16)[name = tensor("op_973_cast_fp16")]; tensor var_974_begin_0 = const()[name = tensor("op_974_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_974_end_0 = const()[name = tensor("op_974_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_974_end_mask_0 = const()[name = tensor("op_974_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_974_cast_fp16 = slice_by_index(begin = var_974_begin_0, end = var_974_end_0, end_mask = var_974_end_mask_0, x = var_931_cast_fp16)[name = tensor("op_974_cast_fp16")]; tensor var_975_begin_0 = const()[name = tensor("op_975_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_975_end_0 = const()[name = tensor("op_975_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_975_end_mask_0 = const()[name = tensor("op_975_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_975_cast_fp16 = slice_by_index(begin = var_975_begin_0, end = var_975_end_0, end_mask = var_975_end_mask_0, x = var_931_cast_fp16)[name = tensor("op_975_cast_fp16")]; tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = var_931_cast_fp16)[name = tensor("op_976_cast_fp16")]; tensor var_977_begin_0 = const()[name = tensor("op_977_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_977_end_0 = const()[name = tensor("op_977_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_977_end_mask_0 = const()[name = tensor("op_977_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_977_cast_fp16 = slice_by_index(begin = var_977_begin_0, end = var_977_end_0, end_mask = var_977_end_mask_0, x = var_931_cast_fp16)[name = tensor("op_977_cast_fp16")]; tensor var_978_begin_0 = const()[name = tensor("op_978_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_978_end_0 = const()[name = tensor("op_978_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_978_end_mask_0 = const()[name = tensor("op_978_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_978_cast_fp16 = slice_by_index(begin = var_978_begin_0, end = var_978_end_0, end_mask = var_978_end_mask_0, x = var_931_cast_fp16)[name = tensor("op_978_cast_fp16")]; tensor var_979_begin_0 = const()[name = tensor("op_979_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_979_end_0 = const()[name = tensor("op_979_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_979_end_mask_0 = const()[name = tensor("op_979_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_979_cast_fp16 = slice_by_index(begin = var_979_begin_0, end = var_979_end_0, end_mask = var_979_end_mask_0, x = var_931_cast_fp16)[name = tensor("op_979_cast_fp16")]; tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = var_931_cast_fp16)[name = tensor("op_980_cast_fp16")]; tensor var_981_begin_0 = const()[name = tensor("op_981_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_981_end_0 = const()[name = tensor("op_981_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_981_end_mask_0 = const()[name = tensor("op_981_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_981_cast_fp16 = slice_by_index(begin = var_981_begin_0, end = var_981_end_0, end_mask = var_981_end_mask_0, x = var_931_cast_fp16)[name = tensor("op_981_cast_fp16")]; tensor var_982_begin_0 = const()[name = tensor("op_982_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_982_end_0 = const()[name = tensor("op_982_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_982_end_mask_0 = const()[name = tensor("op_982_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_982_cast_fp16 = slice_by_index(begin = var_982_begin_0, end = var_982_end_0, end_mask = var_982_end_mask_0, x = var_935_cast_fp16)[name = tensor("op_982_cast_fp16")]; tensor var_983_begin_0 = const()[name = tensor("op_983_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_983_end_0 = const()[name = tensor("op_983_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_983_end_mask_0 = const()[name = tensor("op_983_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_983_cast_fp16 = slice_by_index(begin = var_983_begin_0, end = var_983_end_0, end_mask = var_983_end_mask_0, x = var_935_cast_fp16)[name = tensor("op_983_cast_fp16")]; tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = var_935_cast_fp16)[name = tensor("op_984_cast_fp16")]; tensor var_985_begin_0 = const()[name = tensor("op_985_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_985_end_0 = const()[name = tensor("op_985_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_985_end_mask_0 = const()[name = tensor("op_985_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_985_cast_fp16 = slice_by_index(begin = var_985_begin_0, end = var_985_end_0, end_mask = var_985_end_mask_0, x = var_935_cast_fp16)[name = tensor("op_985_cast_fp16")]; tensor var_986_begin_0 = const()[name = tensor("op_986_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_986_end_0 = const()[name = tensor("op_986_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_986_end_mask_0 = const()[name = tensor("op_986_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_986_cast_fp16 = slice_by_index(begin = var_986_begin_0, end = var_986_end_0, end_mask = var_986_end_mask_0, x = var_935_cast_fp16)[name = tensor("op_986_cast_fp16")]; tensor var_987_begin_0 = const()[name = tensor("op_987_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_987_end_0 = const()[name = tensor("op_987_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_987_end_mask_0 = const()[name = tensor("op_987_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_987_cast_fp16 = slice_by_index(begin = var_987_begin_0, end = var_987_end_0, end_mask = var_987_end_mask_0, x = var_935_cast_fp16)[name = tensor("op_987_cast_fp16")]; tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = var_935_cast_fp16)[name = tensor("op_988_cast_fp16")]; tensor var_989_begin_0 = const()[name = tensor("op_989_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_989_end_0 = const()[name = tensor("op_989_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_989_end_mask_0 = const()[name = tensor("op_989_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_989_cast_fp16 = slice_by_index(begin = var_989_begin_0, end = var_989_end_0, end_mask = var_989_end_mask_0, x = var_935_cast_fp16)[name = tensor("op_989_cast_fp16")]; tensor var_990_begin_0 = const()[name = tensor("op_990_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_990_end_0 = const()[name = tensor("op_990_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_990_end_mask_0 = const()[name = tensor("op_990_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_990_cast_fp16 = slice_by_index(begin = var_990_begin_0, end = var_990_end_0, end_mask = var_990_end_mask_0, x = var_939_cast_fp16)[name = tensor("op_990_cast_fp16")]; tensor var_991_begin_0 = const()[name = tensor("op_991_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_991_end_0 = const()[name = tensor("op_991_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_991_end_mask_0 = const()[name = tensor("op_991_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_991_cast_fp16 = slice_by_index(begin = var_991_begin_0, end = var_991_end_0, end_mask = var_991_end_mask_0, x = var_939_cast_fp16)[name = tensor("op_991_cast_fp16")]; tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = var_939_cast_fp16)[name = tensor("op_992_cast_fp16")]; tensor var_993_begin_0 = const()[name = tensor("op_993_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_993_end_0 = const()[name = tensor("op_993_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_993_end_mask_0 = const()[name = tensor("op_993_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_993_cast_fp16 = slice_by_index(begin = var_993_begin_0, end = var_993_end_0, end_mask = var_993_end_mask_0, x = var_939_cast_fp16)[name = tensor("op_993_cast_fp16")]; tensor var_994_begin_0 = const()[name = tensor("op_994_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_994_end_0 = const()[name = tensor("op_994_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_994_end_mask_0 = const()[name = tensor("op_994_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_994_cast_fp16 = slice_by_index(begin = var_994_begin_0, end = var_994_end_0, end_mask = var_994_end_mask_0, x = var_939_cast_fp16)[name = tensor("op_994_cast_fp16")]; tensor var_995_begin_0 = const()[name = tensor("op_995_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_995_end_0 = const()[name = tensor("op_995_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_995_end_mask_0 = const()[name = tensor("op_995_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_995_cast_fp16 = slice_by_index(begin = var_995_begin_0, end = var_995_end_0, end_mask = var_995_end_mask_0, x = var_939_cast_fp16)[name = tensor("op_995_cast_fp16")]; tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = var_939_cast_fp16)[name = tensor("op_996_cast_fp16")]; tensor var_997_begin_0 = const()[name = tensor("op_997_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_997_end_0 = const()[name = tensor("op_997_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_997_end_mask_0 = const()[name = tensor("op_997_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_997_cast_fp16 = slice_by_index(begin = var_997_begin_0, end = var_997_end_0, end_mask = var_997_end_mask_0, x = var_939_cast_fp16)[name = tensor("op_997_cast_fp16")]; tensor var_998_begin_0 = const()[name = tensor("op_998_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_998_end_0 = const()[name = tensor("op_998_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_998_end_mask_0 = const()[name = tensor("op_998_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_998_cast_fp16 = slice_by_index(begin = var_998_begin_0, end = var_998_end_0, end_mask = var_998_end_mask_0, x = var_943_cast_fp16)[name = tensor("op_998_cast_fp16")]; tensor var_999_begin_0 = const()[name = tensor("op_999_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_999_end_0 = const()[name = tensor("op_999_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_999_end_mask_0 = const()[name = tensor("op_999_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_999_cast_fp16 = slice_by_index(begin = var_999_begin_0, end = var_999_end_0, end_mask = var_999_end_mask_0, x = var_943_cast_fp16)[name = tensor("op_999_cast_fp16")]; tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = var_943_cast_fp16)[name = tensor("op_1000_cast_fp16")]; tensor var_1001_begin_0 = const()[name = tensor("op_1001_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1001_end_0 = const()[name = tensor("op_1001_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1001_end_mask_0 = const()[name = tensor("op_1001_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1001_cast_fp16 = slice_by_index(begin = var_1001_begin_0, end = var_1001_end_0, end_mask = var_1001_end_mask_0, x = var_943_cast_fp16)[name = tensor("op_1001_cast_fp16")]; tensor var_1002_begin_0 = const()[name = tensor("op_1002_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1002_end_0 = const()[name = tensor("op_1002_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1002_end_mask_0 = const()[name = tensor("op_1002_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1002_cast_fp16 = slice_by_index(begin = var_1002_begin_0, end = var_1002_end_0, end_mask = var_1002_end_mask_0, x = var_943_cast_fp16)[name = tensor("op_1002_cast_fp16")]; tensor var_1003_begin_0 = const()[name = tensor("op_1003_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1003_end_0 = const()[name = tensor("op_1003_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1003_end_mask_0 = const()[name = tensor("op_1003_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1003_cast_fp16 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = var_943_cast_fp16)[name = tensor("op_1003_cast_fp16")]; tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = var_943_cast_fp16)[name = tensor("op_1004_cast_fp16")]; tensor var_1005_begin_0 = const()[name = tensor("op_1005_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1005_end_0 = const()[name = tensor("op_1005_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1005_end_mask_0 = const()[name = tensor("op_1005_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1005_cast_fp16 = slice_by_index(begin = var_1005_begin_0, end = var_1005_end_0, end_mask = var_1005_end_mask_0, x = var_943_cast_fp16)[name = tensor("op_1005_cast_fp16")]; tensor var_1006_begin_0 = const()[name = tensor("op_1006_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1006_end_0 = const()[name = tensor("op_1006_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1006_end_mask_0 = const()[name = tensor("op_1006_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1006_cast_fp16 = slice_by_index(begin = var_1006_begin_0, end = var_1006_end_0, end_mask = var_1006_end_mask_0, x = var_947_cast_fp16)[name = tensor("op_1006_cast_fp16")]; tensor var_1007_begin_0 = const()[name = tensor("op_1007_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1007_end_0 = const()[name = tensor("op_1007_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1007_end_mask_0 = const()[name = tensor("op_1007_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = var_947_cast_fp16)[name = tensor("op_1007_cast_fp16")]; tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = var_947_cast_fp16)[name = tensor("op_1008_cast_fp16")]; tensor var_1009_begin_0 = const()[name = tensor("op_1009_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1009_end_0 = const()[name = tensor("op_1009_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1009_end_mask_0 = const()[name = tensor("op_1009_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1009_cast_fp16 = slice_by_index(begin = var_1009_begin_0, end = var_1009_end_0, end_mask = var_1009_end_mask_0, x = var_947_cast_fp16)[name = tensor("op_1009_cast_fp16")]; tensor var_1010_begin_0 = const()[name = tensor("op_1010_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1010_end_0 = const()[name = tensor("op_1010_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1010_end_mask_0 = const()[name = tensor("op_1010_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1010_cast_fp16 = slice_by_index(begin = var_1010_begin_0, end = var_1010_end_0, end_mask = var_1010_end_mask_0, x = var_947_cast_fp16)[name = tensor("op_1010_cast_fp16")]; tensor var_1011_begin_0 = const()[name = tensor("op_1011_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1011_end_0 = const()[name = tensor("op_1011_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1011_end_mask_0 = const()[name = tensor("op_1011_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1011_cast_fp16 = slice_by_index(begin = var_1011_begin_0, end = var_1011_end_0, end_mask = var_1011_end_mask_0, x = var_947_cast_fp16)[name = tensor("op_1011_cast_fp16")]; tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = var_947_cast_fp16)[name = tensor("op_1012_cast_fp16")]; tensor var_1013_begin_0 = const()[name = tensor("op_1013_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1013_end_0 = const()[name = tensor("op_1013_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1013_end_mask_0 = const()[name = tensor("op_1013_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1013_cast_fp16 = slice_by_index(begin = var_1013_begin_0, end = var_1013_end_0, end_mask = var_1013_end_mask_0, x = var_947_cast_fp16)[name = tensor("op_1013_cast_fp16")]; tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([2, 77, 1, 40])]; tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_30 = transpose(perm = k_7_perm_0, x = k_5_cast_fp16)[name = tensor("transpose_30")]; tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = transpose_30)[name = tensor("op_1018_cast_fp16")]; tensor var_1022_begin_0 = const()[name = tensor("op_1022_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_1022_end_0 = const()[name = tensor("op_1022_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_1022_end_mask_0 = const()[name = tensor("op_1022_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1022_cast_fp16 = slice_by_index(begin = var_1022_begin_0, end = var_1022_end_0, end_mask = var_1022_end_mask_0, x = transpose_30)[name = tensor("op_1022_cast_fp16")]; tensor var_1026_begin_0 = const()[name = tensor("op_1026_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_1026_end_0 = const()[name = tensor("op_1026_end_0"), val = tensor([2, 77, 1, 120])]; tensor var_1026_end_mask_0 = const()[name = tensor("op_1026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1026_cast_fp16 = slice_by_index(begin = var_1026_begin_0, end = var_1026_end_0, end_mask = var_1026_end_mask_0, x = transpose_30)[name = tensor("op_1026_cast_fp16")]; tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = transpose_30)[name = tensor("op_1030_cast_fp16")]; tensor var_1034_begin_0 = const()[name = tensor("op_1034_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_1034_end_0 = const()[name = tensor("op_1034_end_0"), val = tensor([2, 77, 1, 200])]; tensor var_1034_end_mask_0 = const()[name = tensor("op_1034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1034_cast_fp16 = slice_by_index(begin = var_1034_begin_0, end = var_1034_end_0, end_mask = var_1034_end_mask_0, x = transpose_30)[name = tensor("op_1034_cast_fp16")]; tensor var_1038_begin_0 = const()[name = tensor("op_1038_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_1038_end_0 = const()[name = tensor("op_1038_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_1038_end_mask_0 = const()[name = tensor("op_1038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1038_cast_fp16 = slice_by_index(begin = var_1038_begin_0, end = var_1038_end_0, end_mask = var_1038_end_mask_0, x = transpose_30)[name = tensor("op_1038_cast_fp16")]; tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([2, 77, 1, 280])]; tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = transpose_30)[name = tensor("op_1042_cast_fp16")]; tensor var_1046_begin_0 = const()[name = tensor("op_1046_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_1046_end_0 = const()[name = tensor("op_1046_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_1046_end_mask_0 = const()[name = tensor("op_1046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1046_cast_fp16 = slice_by_index(begin = var_1046_begin_0, end = var_1046_end_0, end_mask = var_1046_end_mask_0, x = transpose_30)[name = tensor("op_1046_cast_fp16")]; tensor var_1048_begin_0 = const()[name = tensor("op_1048_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1048_end_0 = const()[name = tensor("op_1048_end_0"), val = tensor([2, 40, 1, 77])]; tensor var_1048_end_mask_0 = const()[name = tensor("op_1048_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1048_cast_fp16 = slice_by_index(begin = var_1048_begin_0, end = var_1048_end_0, end_mask = var_1048_end_mask_0, x = v_3_cast_fp16)[name = tensor("op_1048_cast_fp16")]; tensor var_1052_begin_0 = const()[name = tensor("op_1052_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_1052_end_0 = const()[name = tensor("op_1052_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_1052_end_mask_0 = const()[name = tensor("op_1052_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1052_cast_fp16 = slice_by_index(begin = var_1052_begin_0, end = var_1052_end_0, end_mask = var_1052_end_mask_0, x = v_3_cast_fp16)[name = tensor("op_1052_cast_fp16")]; tensor var_1056_begin_0 = const()[name = tensor("op_1056_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_1056_end_0 = const()[name = tensor("op_1056_end_0"), val = tensor([2, 120, 1, 77])]; tensor var_1056_end_mask_0 = const()[name = tensor("op_1056_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1056_cast_fp16 = slice_by_index(begin = var_1056_begin_0, end = var_1056_end_0, end_mask = var_1056_end_mask_0, x = v_3_cast_fp16)[name = tensor("op_1056_cast_fp16")]; tensor var_1060_begin_0 = const()[name = tensor("op_1060_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_1060_end_0 = const()[name = tensor("op_1060_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_1060_end_mask_0 = const()[name = tensor("op_1060_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1060_cast_fp16 = slice_by_index(begin = var_1060_begin_0, end = var_1060_end_0, end_mask = var_1060_end_mask_0, x = v_3_cast_fp16)[name = tensor("op_1060_cast_fp16")]; tensor var_1064_begin_0 = const()[name = tensor("op_1064_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_1064_end_0 = const()[name = tensor("op_1064_end_0"), val = tensor([2, 200, 1, 77])]; tensor var_1064_end_mask_0 = const()[name = tensor("op_1064_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1064_cast_fp16 = slice_by_index(begin = var_1064_begin_0, end = var_1064_end_0, end_mask = var_1064_end_mask_0, x = v_3_cast_fp16)[name = tensor("op_1064_cast_fp16")]; tensor var_1068_begin_0 = const()[name = tensor("op_1068_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_1068_end_0 = const()[name = tensor("op_1068_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_1068_end_mask_0 = const()[name = tensor("op_1068_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1068_cast_fp16 = slice_by_index(begin = var_1068_begin_0, end = var_1068_end_0, end_mask = var_1068_end_mask_0, x = v_3_cast_fp16)[name = tensor("op_1068_cast_fp16")]; tensor var_1072_begin_0 = const()[name = tensor("op_1072_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_1072_end_0 = const()[name = tensor("op_1072_end_0"), val = tensor([2, 280, 1, 77])]; tensor var_1072_end_mask_0 = const()[name = tensor("op_1072_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1072_cast_fp16 = slice_by_index(begin = var_1072_begin_0, end = var_1072_end_0, end_mask = var_1072_end_mask_0, x = v_3_cast_fp16)[name = tensor("op_1072_cast_fp16")]; tensor var_1076_begin_0 = const()[name = tensor("op_1076_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_1076_end_0 = const()[name = tensor("op_1076_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_1076_end_mask_0 = const()[name = tensor("op_1076_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1076_cast_fp16 = slice_by_index(begin = var_1076_begin_0, end = var_1076_end_0, end_mask = var_1076_end_mask_0, x = v_3_cast_fp16)[name = tensor("op_1076_cast_fp16")]; tensor var_1080_equation_0 = const()[name = tensor("op_1080_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1080_cast_fp16 = einsum(equation = var_1080_equation_0, values = (var_1018_cast_fp16, var_950_cast_fp16))[name = tensor("op_1080_cast_fp16")]; tensor var_1081_to_fp16 = const()[name = tensor("op_1081_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_129_cast_fp16 = mul(x = var_1080_cast_fp16, y = var_1081_to_fp16)[name = tensor("aw_chunk_129_cast_fp16")]; tensor var_1084_equation_0 = const()[name = tensor("op_1084_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1084_cast_fp16 = einsum(equation = var_1084_equation_0, values = (var_1018_cast_fp16, var_951_cast_fp16))[name = tensor("op_1084_cast_fp16")]; tensor var_1085_to_fp16 = const()[name = tensor("op_1085_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_131_cast_fp16 = mul(x = var_1084_cast_fp16, y = var_1085_to_fp16)[name = tensor("aw_chunk_131_cast_fp16")]; tensor var_1088_equation_0 = const()[name = tensor("op_1088_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1088_cast_fp16 = einsum(equation = var_1088_equation_0, values = (var_1018_cast_fp16, var_952_cast_fp16))[name = tensor("op_1088_cast_fp16")]; tensor var_1089_to_fp16 = const()[name = tensor("op_1089_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_133_cast_fp16 = mul(x = var_1088_cast_fp16, y = var_1089_to_fp16)[name = tensor("aw_chunk_133_cast_fp16")]; tensor var_1092_equation_0 = const()[name = tensor("op_1092_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1092_cast_fp16 = einsum(equation = var_1092_equation_0, values = (var_1018_cast_fp16, var_953_cast_fp16))[name = tensor("op_1092_cast_fp16")]; tensor var_1093_to_fp16 = const()[name = tensor("op_1093_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_135_cast_fp16 = mul(x = var_1092_cast_fp16, y = var_1093_to_fp16)[name = tensor("aw_chunk_135_cast_fp16")]; tensor var_1096_equation_0 = const()[name = tensor("op_1096_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1096_cast_fp16 = einsum(equation = var_1096_equation_0, values = (var_1018_cast_fp16, var_954_cast_fp16))[name = tensor("op_1096_cast_fp16")]; tensor var_1097_to_fp16 = const()[name = tensor("op_1097_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_137_cast_fp16 = mul(x = var_1096_cast_fp16, y = var_1097_to_fp16)[name = tensor("aw_chunk_137_cast_fp16")]; tensor var_1100_equation_0 = const()[name = tensor("op_1100_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1100_cast_fp16 = einsum(equation = var_1100_equation_0, values = (var_1018_cast_fp16, var_955_cast_fp16))[name = tensor("op_1100_cast_fp16")]; tensor var_1101_to_fp16 = const()[name = tensor("op_1101_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_139_cast_fp16 = mul(x = var_1100_cast_fp16, y = var_1101_to_fp16)[name = tensor("aw_chunk_139_cast_fp16")]; tensor var_1104_equation_0 = const()[name = tensor("op_1104_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1104_cast_fp16 = einsum(equation = var_1104_equation_0, values = (var_1018_cast_fp16, var_956_cast_fp16))[name = tensor("op_1104_cast_fp16")]; tensor var_1105_to_fp16 = const()[name = tensor("op_1105_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_141_cast_fp16 = mul(x = var_1104_cast_fp16, y = var_1105_to_fp16)[name = tensor("aw_chunk_141_cast_fp16")]; tensor var_1108_equation_0 = const()[name = tensor("op_1108_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1108_cast_fp16 = einsum(equation = var_1108_equation_0, values = (var_1018_cast_fp16, var_957_cast_fp16))[name = tensor("op_1108_cast_fp16")]; tensor var_1109_to_fp16 = const()[name = tensor("op_1109_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_143_cast_fp16 = mul(x = var_1108_cast_fp16, y = var_1109_to_fp16)[name = tensor("aw_chunk_143_cast_fp16")]; tensor var_1112_equation_0 = const()[name = tensor("op_1112_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1112_cast_fp16 = einsum(equation = var_1112_equation_0, values = (var_1022_cast_fp16, var_958_cast_fp16))[name = tensor("op_1112_cast_fp16")]; tensor var_1113_to_fp16 = const()[name = tensor("op_1113_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_145_cast_fp16 = mul(x = var_1112_cast_fp16, y = var_1113_to_fp16)[name = tensor("aw_chunk_145_cast_fp16")]; tensor var_1116_equation_0 = const()[name = tensor("op_1116_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1116_cast_fp16 = einsum(equation = var_1116_equation_0, values = (var_1022_cast_fp16, var_959_cast_fp16))[name = tensor("op_1116_cast_fp16")]; tensor var_1117_to_fp16 = const()[name = tensor("op_1117_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_147_cast_fp16 = mul(x = var_1116_cast_fp16, y = var_1117_to_fp16)[name = tensor("aw_chunk_147_cast_fp16")]; tensor var_1120_equation_0 = const()[name = tensor("op_1120_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1120_cast_fp16 = einsum(equation = var_1120_equation_0, values = (var_1022_cast_fp16, var_960_cast_fp16))[name = tensor("op_1120_cast_fp16")]; tensor var_1121_to_fp16 = const()[name = tensor("op_1121_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_149_cast_fp16 = mul(x = var_1120_cast_fp16, y = var_1121_to_fp16)[name = tensor("aw_chunk_149_cast_fp16")]; tensor var_1124_equation_0 = const()[name = tensor("op_1124_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1124_cast_fp16 = einsum(equation = var_1124_equation_0, values = (var_1022_cast_fp16, var_961_cast_fp16))[name = tensor("op_1124_cast_fp16")]; tensor var_1125_to_fp16 = const()[name = tensor("op_1125_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_151_cast_fp16 = mul(x = var_1124_cast_fp16, y = var_1125_to_fp16)[name = tensor("aw_chunk_151_cast_fp16")]; tensor var_1128_equation_0 = const()[name = tensor("op_1128_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1128_cast_fp16 = einsum(equation = var_1128_equation_0, values = (var_1022_cast_fp16, var_962_cast_fp16))[name = tensor("op_1128_cast_fp16")]; tensor var_1129_to_fp16 = const()[name = tensor("op_1129_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_153_cast_fp16 = mul(x = var_1128_cast_fp16, y = var_1129_to_fp16)[name = tensor("aw_chunk_153_cast_fp16")]; tensor var_1132_equation_0 = const()[name = tensor("op_1132_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1132_cast_fp16 = einsum(equation = var_1132_equation_0, values = (var_1022_cast_fp16, var_963_cast_fp16))[name = tensor("op_1132_cast_fp16")]; tensor var_1133_to_fp16 = const()[name = tensor("op_1133_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_155_cast_fp16 = mul(x = var_1132_cast_fp16, y = var_1133_to_fp16)[name = tensor("aw_chunk_155_cast_fp16")]; tensor var_1136_equation_0 = const()[name = tensor("op_1136_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1136_cast_fp16 = einsum(equation = var_1136_equation_0, values = (var_1022_cast_fp16, var_964_cast_fp16))[name = tensor("op_1136_cast_fp16")]; tensor var_1137_to_fp16 = const()[name = tensor("op_1137_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_157_cast_fp16 = mul(x = var_1136_cast_fp16, y = var_1137_to_fp16)[name = tensor("aw_chunk_157_cast_fp16")]; tensor var_1140_equation_0 = const()[name = tensor("op_1140_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1140_cast_fp16 = einsum(equation = var_1140_equation_0, values = (var_1022_cast_fp16, var_965_cast_fp16))[name = tensor("op_1140_cast_fp16")]; tensor var_1141_to_fp16 = const()[name = tensor("op_1141_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_159_cast_fp16 = mul(x = var_1140_cast_fp16, y = var_1141_to_fp16)[name = tensor("aw_chunk_159_cast_fp16")]; tensor var_1144_equation_0 = const()[name = tensor("op_1144_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1144_cast_fp16 = einsum(equation = var_1144_equation_0, values = (var_1026_cast_fp16, var_966_cast_fp16))[name = tensor("op_1144_cast_fp16")]; tensor var_1145_to_fp16 = const()[name = tensor("op_1145_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_161_cast_fp16 = mul(x = var_1144_cast_fp16, y = var_1145_to_fp16)[name = tensor("aw_chunk_161_cast_fp16")]; tensor var_1148_equation_0 = const()[name = tensor("op_1148_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1148_cast_fp16 = einsum(equation = var_1148_equation_0, values = (var_1026_cast_fp16, var_967_cast_fp16))[name = tensor("op_1148_cast_fp16")]; tensor var_1149_to_fp16 = const()[name = tensor("op_1149_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_163_cast_fp16 = mul(x = var_1148_cast_fp16, y = var_1149_to_fp16)[name = tensor("aw_chunk_163_cast_fp16")]; tensor var_1152_equation_0 = const()[name = tensor("op_1152_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1152_cast_fp16 = einsum(equation = var_1152_equation_0, values = (var_1026_cast_fp16, var_968_cast_fp16))[name = tensor("op_1152_cast_fp16")]; tensor var_1153_to_fp16 = const()[name = tensor("op_1153_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_165_cast_fp16 = mul(x = var_1152_cast_fp16, y = var_1153_to_fp16)[name = tensor("aw_chunk_165_cast_fp16")]; tensor var_1156_equation_0 = const()[name = tensor("op_1156_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1156_cast_fp16 = einsum(equation = var_1156_equation_0, values = (var_1026_cast_fp16, var_969_cast_fp16))[name = tensor("op_1156_cast_fp16")]; tensor var_1157_to_fp16 = const()[name = tensor("op_1157_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_167_cast_fp16 = mul(x = var_1156_cast_fp16, y = var_1157_to_fp16)[name = tensor("aw_chunk_167_cast_fp16")]; tensor var_1160_equation_0 = const()[name = tensor("op_1160_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1160_cast_fp16 = einsum(equation = var_1160_equation_0, values = (var_1026_cast_fp16, var_970_cast_fp16))[name = tensor("op_1160_cast_fp16")]; tensor var_1161_to_fp16 = const()[name = tensor("op_1161_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_169_cast_fp16 = mul(x = var_1160_cast_fp16, y = var_1161_to_fp16)[name = tensor("aw_chunk_169_cast_fp16")]; tensor var_1164_equation_0 = const()[name = tensor("op_1164_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1164_cast_fp16 = einsum(equation = var_1164_equation_0, values = (var_1026_cast_fp16, var_971_cast_fp16))[name = tensor("op_1164_cast_fp16")]; tensor var_1165_to_fp16 = const()[name = tensor("op_1165_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_171_cast_fp16 = mul(x = var_1164_cast_fp16, y = var_1165_to_fp16)[name = tensor("aw_chunk_171_cast_fp16")]; tensor var_1168_equation_0 = const()[name = tensor("op_1168_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1168_cast_fp16 = einsum(equation = var_1168_equation_0, values = (var_1026_cast_fp16, var_972_cast_fp16))[name = tensor("op_1168_cast_fp16")]; tensor var_1169_to_fp16 = const()[name = tensor("op_1169_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_173_cast_fp16 = mul(x = var_1168_cast_fp16, y = var_1169_to_fp16)[name = tensor("aw_chunk_173_cast_fp16")]; tensor var_1172_equation_0 = const()[name = tensor("op_1172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1172_cast_fp16 = einsum(equation = var_1172_equation_0, values = (var_1026_cast_fp16, var_973_cast_fp16))[name = tensor("op_1172_cast_fp16")]; tensor var_1173_to_fp16 = const()[name = tensor("op_1173_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_175_cast_fp16 = mul(x = var_1172_cast_fp16, y = var_1173_to_fp16)[name = tensor("aw_chunk_175_cast_fp16")]; tensor var_1176_equation_0 = const()[name = tensor("op_1176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1176_cast_fp16 = einsum(equation = var_1176_equation_0, values = (var_1030_cast_fp16, var_974_cast_fp16))[name = tensor("op_1176_cast_fp16")]; tensor var_1177_to_fp16 = const()[name = tensor("op_1177_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_177_cast_fp16 = mul(x = var_1176_cast_fp16, y = var_1177_to_fp16)[name = tensor("aw_chunk_177_cast_fp16")]; tensor var_1180_equation_0 = const()[name = tensor("op_1180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1180_cast_fp16 = einsum(equation = var_1180_equation_0, values = (var_1030_cast_fp16, var_975_cast_fp16))[name = tensor("op_1180_cast_fp16")]; tensor var_1181_to_fp16 = const()[name = tensor("op_1181_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_179_cast_fp16 = mul(x = var_1180_cast_fp16, y = var_1181_to_fp16)[name = tensor("aw_chunk_179_cast_fp16")]; tensor var_1184_equation_0 = const()[name = tensor("op_1184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1184_cast_fp16 = einsum(equation = var_1184_equation_0, values = (var_1030_cast_fp16, var_976_cast_fp16))[name = tensor("op_1184_cast_fp16")]; tensor var_1185_to_fp16 = const()[name = tensor("op_1185_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_181_cast_fp16 = mul(x = var_1184_cast_fp16, y = var_1185_to_fp16)[name = tensor("aw_chunk_181_cast_fp16")]; tensor var_1188_equation_0 = const()[name = tensor("op_1188_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1188_cast_fp16 = einsum(equation = var_1188_equation_0, values = (var_1030_cast_fp16, var_977_cast_fp16))[name = tensor("op_1188_cast_fp16")]; tensor var_1189_to_fp16 = const()[name = tensor("op_1189_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_183_cast_fp16 = mul(x = var_1188_cast_fp16, y = var_1189_to_fp16)[name = tensor("aw_chunk_183_cast_fp16")]; tensor var_1192_equation_0 = const()[name = tensor("op_1192_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1192_cast_fp16 = einsum(equation = var_1192_equation_0, values = (var_1030_cast_fp16, var_978_cast_fp16))[name = tensor("op_1192_cast_fp16")]; tensor var_1193_to_fp16 = const()[name = tensor("op_1193_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_185_cast_fp16 = mul(x = var_1192_cast_fp16, y = var_1193_to_fp16)[name = tensor("aw_chunk_185_cast_fp16")]; tensor var_1196_equation_0 = const()[name = tensor("op_1196_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1196_cast_fp16 = einsum(equation = var_1196_equation_0, values = (var_1030_cast_fp16, var_979_cast_fp16))[name = tensor("op_1196_cast_fp16")]; tensor var_1197_to_fp16 = const()[name = tensor("op_1197_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_187_cast_fp16 = mul(x = var_1196_cast_fp16, y = var_1197_to_fp16)[name = tensor("aw_chunk_187_cast_fp16")]; tensor var_1200_equation_0 = const()[name = tensor("op_1200_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1200_cast_fp16 = einsum(equation = var_1200_equation_0, values = (var_1030_cast_fp16, var_980_cast_fp16))[name = tensor("op_1200_cast_fp16")]; tensor var_1201_to_fp16 = const()[name = tensor("op_1201_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_189_cast_fp16 = mul(x = var_1200_cast_fp16, y = var_1201_to_fp16)[name = tensor("aw_chunk_189_cast_fp16")]; tensor var_1204_equation_0 = const()[name = tensor("op_1204_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1204_cast_fp16 = einsum(equation = var_1204_equation_0, values = (var_1030_cast_fp16, var_981_cast_fp16))[name = tensor("op_1204_cast_fp16")]; tensor var_1205_to_fp16 = const()[name = tensor("op_1205_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_191_cast_fp16 = mul(x = var_1204_cast_fp16, y = var_1205_to_fp16)[name = tensor("aw_chunk_191_cast_fp16")]; tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1034_cast_fp16, var_982_cast_fp16))[name = tensor("op_1208_cast_fp16")]; tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_193_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("aw_chunk_193_cast_fp16")]; tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1034_cast_fp16, var_983_cast_fp16))[name = tensor("op_1212_cast_fp16")]; tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_195_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("aw_chunk_195_cast_fp16")]; tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1034_cast_fp16, var_984_cast_fp16))[name = tensor("op_1216_cast_fp16")]; tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_197_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("aw_chunk_197_cast_fp16")]; tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1034_cast_fp16, var_985_cast_fp16))[name = tensor("op_1220_cast_fp16")]; tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_199_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("aw_chunk_199_cast_fp16")]; tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1034_cast_fp16, var_986_cast_fp16))[name = tensor("op_1224_cast_fp16")]; tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_201_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("aw_chunk_201_cast_fp16")]; tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1034_cast_fp16, var_987_cast_fp16))[name = tensor("op_1228_cast_fp16")]; tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_203_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("aw_chunk_203_cast_fp16")]; tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1034_cast_fp16, var_988_cast_fp16))[name = tensor("op_1232_cast_fp16")]; tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_205_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("aw_chunk_205_cast_fp16")]; tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1034_cast_fp16, var_989_cast_fp16))[name = tensor("op_1236_cast_fp16")]; tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_207_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("aw_chunk_207_cast_fp16")]; tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1038_cast_fp16, var_990_cast_fp16))[name = tensor("op_1240_cast_fp16")]; tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_209_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("aw_chunk_209_cast_fp16")]; tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1038_cast_fp16, var_991_cast_fp16))[name = tensor("op_1244_cast_fp16")]; tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_211_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("aw_chunk_211_cast_fp16")]; tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1038_cast_fp16, var_992_cast_fp16))[name = tensor("op_1248_cast_fp16")]; tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_213_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("aw_chunk_213_cast_fp16")]; tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1038_cast_fp16, var_993_cast_fp16))[name = tensor("op_1252_cast_fp16")]; tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_215_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("aw_chunk_215_cast_fp16")]; tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1038_cast_fp16, var_994_cast_fp16))[name = tensor("op_1256_cast_fp16")]; tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_217_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("aw_chunk_217_cast_fp16")]; tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1038_cast_fp16, var_995_cast_fp16))[name = tensor("op_1260_cast_fp16")]; tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_219_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("aw_chunk_219_cast_fp16")]; tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1038_cast_fp16, var_996_cast_fp16))[name = tensor("op_1264_cast_fp16")]; tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_221_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("aw_chunk_221_cast_fp16")]; tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1038_cast_fp16, var_997_cast_fp16))[name = tensor("op_1268_cast_fp16")]; tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_223_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("aw_chunk_223_cast_fp16")]; tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1042_cast_fp16, var_998_cast_fp16))[name = tensor("op_1272_cast_fp16")]; tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_225_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("aw_chunk_225_cast_fp16")]; tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1042_cast_fp16, var_999_cast_fp16))[name = tensor("op_1276_cast_fp16")]; tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_227_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("aw_chunk_227_cast_fp16")]; tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1042_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1280_cast_fp16")]; tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_229_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("aw_chunk_229_cast_fp16")]; tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1042_cast_fp16, var_1001_cast_fp16))[name = tensor("op_1284_cast_fp16")]; tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_231_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("aw_chunk_231_cast_fp16")]; tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1042_cast_fp16, var_1002_cast_fp16))[name = tensor("op_1288_cast_fp16")]; tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_233_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("aw_chunk_233_cast_fp16")]; tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1042_cast_fp16, var_1003_cast_fp16))[name = tensor("op_1292_cast_fp16")]; tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_235_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("aw_chunk_235_cast_fp16")]; tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1042_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1296_cast_fp16")]; tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_237_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("aw_chunk_237_cast_fp16")]; tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1042_cast_fp16, var_1005_cast_fp16))[name = tensor("op_1300_cast_fp16")]; tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_239_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("aw_chunk_239_cast_fp16")]; tensor var_1304_equation_0 = const()[name = tensor("op_1304_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1304_cast_fp16 = einsum(equation = var_1304_equation_0, values = (var_1046_cast_fp16, var_1006_cast_fp16))[name = tensor("op_1304_cast_fp16")]; tensor var_1305_to_fp16 = const()[name = tensor("op_1305_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_241_cast_fp16 = mul(x = var_1304_cast_fp16, y = var_1305_to_fp16)[name = tensor("aw_chunk_241_cast_fp16")]; tensor var_1308_equation_0 = const()[name = tensor("op_1308_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1308_cast_fp16 = einsum(equation = var_1308_equation_0, values = (var_1046_cast_fp16, var_1007_cast_fp16))[name = tensor("op_1308_cast_fp16")]; tensor var_1309_to_fp16 = const()[name = tensor("op_1309_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_243_cast_fp16 = mul(x = var_1308_cast_fp16, y = var_1309_to_fp16)[name = tensor("aw_chunk_243_cast_fp16")]; tensor var_1312_equation_0 = const()[name = tensor("op_1312_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1312_cast_fp16 = einsum(equation = var_1312_equation_0, values = (var_1046_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1312_cast_fp16")]; tensor var_1313_to_fp16 = const()[name = tensor("op_1313_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_245_cast_fp16 = mul(x = var_1312_cast_fp16, y = var_1313_to_fp16)[name = tensor("aw_chunk_245_cast_fp16")]; tensor var_1316_equation_0 = const()[name = tensor("op_1316_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1316_cast_fp16 = einsum(equation = var_1316_equation_0, values = (var_1046_cast_fp16, var_1009_cast_fp16))[name = tensor("op_1316_cast_fp16")]; tensor var_1317_to_fp16 = const()[name = tensor("op_1317_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_247_cast_fp16 = mul(x = var_1316_cast_fp16, y = var_1317_to_fp16)[name = tensor("aw_chunk_247_cast_fp16")]; tensor var_1320_equation_0 = const()[name = tensor("op_1320_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1320_cast_fp16 = einsum(equation = var_1320_equation_0, values = (var_1046_cast_fp16, var_1010_cast_fp16))[name = tensor("op_1320_cast_fp16")]; tensor var_1321_to_fp16 = const()[name = tensor("op_1321_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_249_cast_fp16 = mul(x = var_1320_cast_fp16, y = var_1321_to_fp16)[name = tensor("aw_chunk_249_cast_fp16")]; tensor var_1324_equation_0 = const()[name = tensor("op_1324_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1324_cast_fp16 = einsum(equation = var_1324_equation_0, values = (var_1046_cast_fp16, var_1011_cast_fp16))[name = tensor("op_1324_cast_fp16")]; tensor var_1325_to_fp16 = const()[name = tensor("op_1325_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_251_cast_fp16 = mul(x = var_1324_cast_fp16, y = var_1325_to_fp16)[name = tensor("aw_chunk_251_cast_fp16")]; tensor var_1328_equation_0 = const()[name = tensor("op_1328_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1328_cast_fp16 = einsum(equation = var_1328_equation_0, values = (var_1046_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1328_cast_fp16")]; tensor var_1329_to_fp16 = const()[name = tensor("op_1329_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_253_cast_fp16 = mul(x = var_1328_cast_fp16, y = var_1329_to_fp16)[name = tensor("aw_chunk_253_cast_fp16")]; tensor var_1332_equation_0 = const()[name = tensor("op_1332_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1332_cast_fp16 = einsum(equation = var_1332_equation_0, values = (var_1046_cast_fp16, var_1013_cast_fp16))[name = tensor("op_1332_cast_fp16")]; tensor var_1333_to_fp16 = const()[name = tensor("op_1333_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_255_cast_fp16 = mul(x = var_1332_cast_fp16, y = var_1333_to_fp16)[name = tensor("aw_chunk_255_cast_fp16")]; tensor var_1335_cast_fp16 = softmax(axis = var_123, x = aw_chunk_129_cast_fp16)[name = tensor("op_1335_cast_fp16")]; tensor var_1336_cast_fp16 = softmax(axis = var_123, x = aw_chunk_131_cast_fp16)[name = tensor("op_1336_cast_fp16")]; tensor var_1337_cast_fp16 = softmax(axis = var_123, x = aw_chunk_133_cast_fp16)[name = tensor("op_1337_cast_fp16")]; tensor var_1338_cast_fp16 = softmax(axis = var_123, x = aw_chunk_135_cast_fp16)[name = tensor("op_1338_cast_fp16")]; tensor var_1339_cast_fp16 = softmax(axis = var_123, x = aw_chunk_137_cast_fp16)[name = tensor("op_1339_cast_fp16")]; tensor var_1340_cast_fp16 = softmax(axis = var_123, x = aw_chunk_139_cast_fp16)[name = tensor("op_1340_cast_fp16")]; tensor var_1341_cast_fp16 = softmax(axis = var_123, x = aw_chunk_141_cast_fp16)[name = tensor("op_1341_cast_fp16")]; tensor var_1342_cast_fp16 = softmax(axis = var_123, x = aw_chunk_143_cast_fp16)[name = tensor("op_1342_cast_fp16")]; tensor var_1343_cast_fp16 = softmax(axis = var_123, x = aw_chunk_145_cast_fp16)[name = tensor("op_1343_cast_fp16")]; tensor var_1344_cast_fp16 = softmax(axis = var_123, x = aw_chunk_147_cast_fp16)[name = tensor("op_1344_cast_fp16")]; tensor var_1345_cast_fp16 = softmax(axis = var_123, x = aw_chunk_149_cast_fp16)[name = tensor("op_1345_cast_fp16")]; tensor var_1346_cast_fp16 = softmax(axis = var_123, x = aw_chunk_151_cast_fp16)[name = tensor("op_1346_cast_fp16")]; tensor var_1347_cast_fp16 = softmax(axis = var_123, x = aw_chunk_153_cast_fp16)[name = tensor("op_1347_cast_fp16")]; tensor var_1348_cast_fp16 = softmax(axis = var_123, x = aw_chunk_155_cast_fp16)[name = tensor("op_1348_cast_fp16")]; tensor var_1349_cast_fp16 = softmax(axis = var_123, x = aw_chunk_157_cast_fp16)[name = tensor("op_1349_cast_fp16")]; tensor var_1350_cast_fp16 = softmax(axis = var_123, x = aw_chunk_159_cast_fp16)[name = tensor("op_1350_cast_fp16")]; tensor var_1351_cast_fp16 = softmax(axis = var_123, x = aw_chunk_161_cast_fp16)[name = tensor("op_1351_cast_fp16")]; tensor var_1352_cast_fp16 = softmax(axis = var_123, x = aw_chunk_163_cast_fp16)[name = tensor("op_1352_cast_fp16")]; tensor var_1353_cast_fp16 = softmax(axis = var_123, x = aw_chunk_165_cast_fp16)[name = tensor("op_1353_cast_fp16")]; tensor var_1354_cast_fp16 = softmax(axis = var_123, x = aw_chunk_167_cast_fp16)[name = tensor("op_1354_cast_fp16")]; tensor var_1355_cast_fp16 = softmax(axis = var_123, x = aw_chunk_169_cast_fp16)[name = tensor("op_1355_cast_fp16")]; tensor var_1356_cast_fp16 = softmax(axis = var_123, x = aw_chunk_171_cast_fp16)[name = tensor("op_1356_cast_fp16")]; tensor var_1357_cast_fp16 = softmax(axis = var_123, x = aw_chunk_173_cast_fp16)[name = tensor("op_1357_cast_fp16")]; tensor var_1358_cast_fp16 = softmax(axis = var_123, x = aw_chunk_175_cast_fp16)[name = tensor("op_1358_cast_fp16")]; tensor var_1359_cast_fp16 = softmax(axis = var_123, x = aw_chunk_177_cast_fp16)[name = tensor("op_1359_cast_fp16")]; tensor var_1360_cast_fp16 = softmax(axis = var_123, x = aw_chunk_179_cast_fp16)[name = tensor("op_1360_cast_fp16")]; tensor var_1361_cast_fp16 = softmax(axis = var_123, x = aw_chunk_181_cast_fp16)[name = tensor("op_1361_cast_fp16")]; tensor var_1362_cast_fp16 = softmax(axis = var_123, x = aw_chunk_183_cast_fp16)[name = tensor("op_1362_cast_fp16")]; tensor var_1363_cast_fp16 = softmax(axis = var_123, x = aw_chunk_185_cast_fp16)[name = tensor("op_1363_cast_fp16")]; tensor var_1364_cast_fp16 = softmax(axis = var_123, x = aw_chunk_187_cast_fp16)[name = tensor("op_1364_cast_fp16")]; tensor var_1365_cast_fp16 = softmax(axis = var_123, x = aw_chunk_189_cast_fp16)[name = tensor("op_1365_cast_fp16")]; tensor var_1366_cast_fp16 = softmax(axis = var_123, x = aw_chunk_191_cast_fp16)[name = tensor("op_1366_cast_fp16")]; tensor var_1367_cast_fp16 = softmax(axis = var_123, x = aw_chunk_193_cast_fp16)[name = tensor("op_1367_cast_fp16")]; tensor var_1368_cast_fp16 = softmax(axis = var_123, x = aw_chunk_195_cast_fp16)[name = tensor("op_1368_cast_fp16")]; tensor var_1369_cast_fp16 = softmax(axis = var_123, x = aw_chunk_197_cast_fp16)[name = tensor("op_1369_cast_fp16")]; tensor var_1370_cast_fp16 = softmax(axis = var_123, x = aw_chunk_199_cast_fp16)[name = tensor("op_1370_cast_fp16")]; tensor var_1371_cast_fp16 = softmax(axis = var_123, x = aw_chunk_201_cast_fp16)[name = tensor("op_1371_cast_fp16")]; tensor var_1372_cast_fp16 = softmax(axis = var_123, x = aw_chunk_203_cast_fp16)[name = tensor("op_1372_cast_fp16")]; tensor var_1373_cast_fp16 = softmax(axis = var_123, x = aw_chunk_205_cast_fp16)[name = tensor("op_1373_cast_fp16")]; tensor var_1374_cast_fp16 = softmax(axis = var_123, x = aw_chunk_207_cast_fp16)[name = tensor("op_1374_cast_fp16")]; tensor var_1375_cast_fp16 = softmax(axis = var_123, x = aw_chunk_209_cast_fp16)[name = tensor("op_1375_cast_fp16")]; tensor var_1376_cast_fp16 = softmax(axis = var_123, x = aw_chunk_211_cast_fp16)[name = tensor("op_1376_cast_fp16")]; tensor var_1377_cast_fp16 = softmax(axis = var_123, x = aw_chunk_213_cast_fp16)[name = tensor("op_1377_cast_fp16")]; tensor var_1378_cast_fp16 = softmax(axis = var_123, x = aw_chunk_215_cast_fp16)[name = tensor("op_1378_cast_fp16")]; tensor var_1379_cast_fp16 = softmax(axis = var_123, x = aw_chunk_217_cast_fp16)[name = tensor("op_1379_cast_fp16")]; tensor var_1380_cast_fp16 = softmax(axis = var_123, x = aw_chunk_219_cast_fp16)[name = tensor("op_1380_cast_fp16")]; tensor var_1381_cast_fp16 = softmax(axis = var_123, x = aw_chunk_221_cast_fp16)[name = tensor("op_1381_cast_fp16")]; tensor var_1382_cast_fp16 = softmax(axis = var_123, x = aw_chunk_223_cast_fp16)[name = tensor("op_1382_cast_fp16")]; tensor var_1383_cast_fp16 = softmax(axis = var_123, x = aw_chunk_225_cast_fp16)[name = tensor("op_1383_cast_fp16")]; tensor var_1384_cast_fp16 = softmax(axis = var_123, x = aw_chunk_227_cast_fp16)[name = tensor("op_1384_cast_fp16")]; tensor var_1385_cast_fp16 = softmax(axis = var_123, x = aw_chunk_229_cast_fp16)[name = tensor("op_1385_cast_fp16")]; tensor var_1386_cast_fp16 = softmax(axis = var_123, x = aw_chunk_231_cast_fp16)[name = tensor("op_1386_cast_fp16")]; tensor var_1387_cast_fp16 = softmax(axis = var_123, x = aw_chunk_233_cast_fp16)[name = tensor("op_1387_cast_fp16")]; tensor var_1388_cast_fp16 = softmax(axis = var_123, x = aw_chunk_235_cast_fp16)[name = tensor("op_1388_cast_fp16")]; tensor var_1389_cast_fp16 = softmax(axis = var_123, x = aw_chunk_237_cast_fp16)[name = tensor("op_1389_cast_fp16")]; tensor var_1390_cast_fp16 = softmax(axis = var_123, x = aw_chunk_239_cast_fp16)[name = tensor("op_1390_cast_fp16")]; tensor var_1391_cast_fp16 = softmax(axis = var_123, x = aw_chunk_241_cast_fp16)[name = tensor("op_1391_cast_fp16")]; tensor var_1392_cast_fp16 = softmax(axis = var_123, x = aw_chunk_243_cast_fp16)[name = tensor("op_1392_cast_fp16")]; tensor var_1393_cast_fp16 = softmax(axis = var_123, x = aw_chunk_245_cast_fp16)[name = tensor("op_1393_cast_fp16")]; tensor var_1394_cast_fp16 = softmax(axis = var_123, x = aw_chunk_247_cast_fp16)[name = tensor("op_1394_cast_fp16")]; tensor var_1395_cast_fp16 = softmax(axis = var_123, x = aw_chunk_249_cast_fp16)[name = tensor("op_1395_cast_fp16")]; tensor var_1396_cast_fp16 = softmax(axis = var_123, x = aw_chunk_251_cast_fp16)[name = tensor("op_1396_cast_fp16")]; tensor var_1397_cast_fp16 = softmax(axis = var_123, x = aw_chunk_253_cast_fp16)[name = tensor("op_1397_cast_fp16")]; tensor var_1398_cast_fp16 = softmax(axis = var_123, x = aw_chunk_255_cast_fp16)[name = tensor("op_1398_cast_fp16")]; tensor var_1400_equation_0 = const()[name = tensor("op_1400_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1400_cast_fp16 = einsum(equation = var_1400_equation_0, values = (var_1048_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1400_cast_fp16")]; tensor var_1402_equation_0 = const()[name = tensor("op_1402_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1402_cast_fp16 = einsum(equation = var_1402_equation_0, values = (var_1048_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1402_cast_fp16")]; tensor var_1404_equation_0 = const()[name = tensor("op_1404_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1404_cast_fp16 = einsum(equation = var_1404_equation_0, values = (var_1048_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1404_cast_fp16")]; tensor var_1406_equation_0 = const()[name = tensor("op_1406_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1406_cast_fp16 = einsum(equation = var_1406_equation_0, values = (var_1048_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1406_cast_fp16")]; tensor var_1408_equation_0 = const()[name = tensor("op_1408_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1408_cast_fp16 = einsum(equation = var_1408_equation_0, values = (var_1048_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1408_cast_fp16")]; tensor var_1410_equation_0 = const()[name = tensor("op_1410_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1410_cast_fp16 = einsum(equation = var_1410_equation_0, values = (var_1048_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1410_cast_fp16")]; tensor var_1412_equation_0 = const()[name = tensor("op_1412_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1412_cast_fp16 = einsum(equation = var_1412_equation_0, values = (var_1048_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1412_cast_fp16")]; tensor var_1414_equation_0 = const()[name = tensor("op_1414_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1414_cast_fp16 = einsum(equation = var_1414_equation_0, values = (var_1048_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1414_cast_fp16")]; tensor var_1416_equation_0 = const()[name = tensor("op_1416_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1416_cast_fp16 = einsum(equation = var_1416_equation_0, values = (var_1052_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1416_cast_fp16")]; tensor var_1418_equation_0 = const()[name = tensor("op_1418_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1418_cast_fp16 = einsum(equation = var_1418_equation_0, values = (var_1052_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1418_cast_fp16")]; tensor var_1420_equation_0 = const()[name = tensor("op_1420_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1420_cast_fp16 = einsum(equation = var_1420_equation_0, values = (var_1052_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1420_cast_fp16")]; tensor var_1422_equation_0 = const()[name = tensor("op_1422_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1422_cast_fp16 = einsum(equation = var_1422_equation_0, values = (var_1052_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1422_cast_fp16")]; tensor var_1424_equation_0 = const()[name = tensor("op_1424_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1424_cast_fp16 = einsum(equation = var_1424_equation_0, values = (var_1052_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1424_cast_fp16")]; tensor var_1426_equation_0 = const()[name = tensor("op_1426_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1426_cast_fp16 = einsum(equation = var_1426_equation_0, values = (var_1052_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1426_cast_fp16")]; tensor var_1428_equation_0 = const()[name = tensor("op_1428_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1428_cast_fp16 = einsum(equation = var_1428_equation_0, values = (var_1052_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1428_cast_fp16")]; tensor var_1430_equation_0 = const()[name = tensor("op_1430_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1430_cast_fp16 = einsum(equation = var_1430_equation_0, values = (var_1052_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1430_cast_fp16")]; tensor var_1432_equation_0 = const()[name = tensor("op_1432_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1432_cast_fp16 = einsum(equation = var_1432_equation_0, values = (var_1056_cast_fp16, var_1351_cast_fp16))[name = tensor("op_1432_cast_fp16")]; tensor var_1434_equation_0 = const()[name = tensor("op_1434_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1434_cast_fp16 = einsum(equation = var_1434_equation_0, values = (var_1056_cast_fp16, var_1352_cast_fp16))[name = tensor("op_1434_cast_fp16")]; tensor var_1436_equation_0 = const()[name = tensor("op_1436_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1436_cast_fp16 = einsum(equation = var_1436_equation_0, values = (var_1056_cast_fp16, var_1353_cast_fp16))[name = tensor("op_1436_cast_fp16")]; tensor var_1438_equation_0 = const()[name = tensor("op_1438_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1438_cast_fp16 = einsum(equation = var_1438_equation_0, values = (var_1056_cast_fp16, var_1354_cast_fp16))[name = tensor("op_1438_cast_fp16")]; tensor var_1440_equation_0 = const()[name = tensor("op_1440_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1440_cast_fp16 = einsum(equation = var_1440_equation_0, values = (var_1056_cast_fp16, var_1355_cast_fp16))[name = tensor("op_1440_cast_fp16")]; tensor var_1442_equation_0 = const()[name = tensor("op_1442_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1442_cast_fp16 = einsum(equation = var_1442_equation_0, values = (var_1056_cast_fp16, var_1356_cast_fp16))[name = tensor("op_1442_cast_fp16")]; tensor var_1444_equation_0 = const()[name = tensor("op_1444_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1444_cast_fp16 = einsum(equation = var_1444_equation_0, values = (var_1056_cast_fp16, var_1357_cast_fp16))[name = tensor("op_1444_cast_fp16")]; tensor var_1446_equation_0 = const()[name = tensor("op_1446_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1446_cast_fp16 = einsum(equation = var_1446_equation_0, values = (var_1056_cast_fp16, var_1358_cast_fp16))[name = tensor("op_1446_cast_fp16")]; tensor var_1448_equation_0 = const()[name = tensor("op_1448_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1448_cast_fp16 = einsum(equation = var_1448_equation_0, values = (var_1060_cast_fp16, var_1359_cast_fp16))[name = tensor("op_1448_cast_fp16")]; tensor var_1450_equation_0 = const()[name = tensor("op_1450_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1450_cast_fp16 = einsum(equation = var_1450_equation_0, values = (var_1060_cast_fp16, var_1360_cast_fp16))[name = tensor("op_1450_cast_fp16")]; tensor var_1452_equation_0 = const()[name = tensor("op_1452_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1452_cast_fp16 = einsum(equation = var_1452_equation_0, values = (var_1060_cast_fp16, var_1361_cast_fp16))[name = tensor("op_1452_cast_fp16")]; tensor var_1454_equation_0 = const()[name = tensor("op_1454_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1454_cast_fp16 = einsum(equation = var_1454_equation_0, values = (var_1060_cast_fp16, var_1362_cast_fp16))[name = tensor("op_1454_cast_fp16")]; tensor var_1456_equation_0 = const()[name = tensor("op_1456_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1456_cast_fp16 = einsum(equation = var_1456_equation_0, values = (var_1060_cast_fp16, var_1363_cast_fp16))[name = tensor("op_1456_cast_fp16")]; tensor var_1458_equation_0 = const()[name = tensor("op_1458_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1458_cast_fp16 = einsum(equation = var_1458_equation_0, values = (var_1060_cast_fp16, var_1364_cast_fp16))[name = tensor("op_1458_cast_fp16")]; tensor var_1460_equation_0 = const()[name = tensor("op_1460_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1460_cast_fp16 = einsum(equation = var_1460_equation_0, values = (var_1060_cast_fp16, var_1365_cast_fp16))[name = tensor("op_1460_cast_fp16")]; tensor var_1462_equation_0 = const()[name = tensor("op_1462_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1462_cast_fp16 = einsum(equation = var_1462_equation_0, values = (var_1060_cast_fp16, var_1366_cast_fp16))[name = tensor("op_1462_cast_fp16")]; tensor var_1464_equation_0 = const()[name = tensor("op_1464_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1464_cast_fp16 = einsum(equation = var_1464_equation_0, values = (var_1064_cast_fp16, var_1367_cast_fp16))[name = tensor("op_1464_cast_fp16")]; tensor var_1466_equation_0 = const()[name = tensor("op_1466_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1466_cast_fp16 = einsum(equation = var_1466_equation_0, values = (var_1064_cast_fp16, var_1368_cast_fp16))[name = tensor("op_1466_cast_fp16")]; tensor var_1468_equation_0 = const()[name = tensor("op_1468_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1468_cast_fp16 = einsum(equation = var_1468_equation_0, values = (var_1064_cast_fp16, var_1369_cast_fp16))[name = tensor("op_1468_cast_fp16")]; tensor var_1470_equation_0 = const()[name = tensor("op_1470_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1470_cast_fp16 = einsum(equation = var_1470_equation_0, values = (var_1064_cast_fp16, var_1370_cast_fp16))[name = tensor("op_1470_cast_fp16")]; tensor var_1472_equation_0 = const()[name = tensor("op_1472_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1472_cast_fp16 = einsum(equation = var_1472_equation_0, values = (var_1064_cast_fp16, var_1371_cast_fp16))[name = tensor("op_1472_cast_fp16")]; tensor var_1474_equation_0 = const()[name = tensor("op_1474_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1474_cast_fp16 = einsum(equation = var_1474_equation_0, values = (var_1064_cast_fp16, var_1372_cast_fp16))[name = tensor("op_1474_cast_fp16")]; tensor var_1476_equation_0 = const()[name = tensor("op_1476_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1476_cast_fp16 = einsum(equation = var_1476_equation_0, values = (var_1064_cast_fp16, var_1373_cast_fp16))[name = tensor("op_1476_cast_fp16")]; tensor var_1478_equation_0 = const()[name = tensor("op_1478_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1478_cast_fp16 = einsum(equation = var_1478_equation_0, values = (var_1064_cast_fp16, var_1374_cast_fp16))[name = tensor("op_1478_cast_fp16")]; tensor var_1480_equation_0 = const()[name = tensor("op_1480_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1480_cast_fp16 = einsum(equation = var_1480_equation_0, values = (var_1068_cast_fp16, var_1375_cast_fp16))[name = tensor("op_1480_cast_fp16")]; tensor var_1482_equation_0 = const()[name = tensor("op_1482_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1482_cast_fp16 = einsum(equation = var_1482_equation_0, values = (var_1068_cast_fp16, var_1376_cast_fp16))[name = tensor("op_1482_cast_fp16")]; tensor var_1484_equation_0 = const()[name = tensor("op_1484_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1484_cast_fp16 = einsum(equation = var_1484_equation_0, values = (var_1068_cast_fp16, var_1377_cast_fp16))[name = tensor("op_1484_cast_fp16")]; tensor var_1486_equation_0 = const()[name = tensor("op_1486_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1486_cast_fp16 = einsum(equation = var_1486_equation_0, values = (var_1068_cast_fp16, var_1378_cast_fp16))[name = tensor("op_1486_cast_fp16")]; tensor var_1488_equation_0 = const()[name = tensor("op_1488_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1488_cast_fp16 = einsum(equation = var_1488_equation_0, values = (var_1068_cast_fp16, var_1379_cast_fp16))[name = tensor("op_1488_cast_fp16")]; tensor var_1490_equation_0 = const()[name = tensor("op_1490_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1490_cast_fp16 = einsum(equation = var_1490_equation_0, values = (var_1068_cast_fp16, var_1380_cast_fp16))[name = tensor("op_1490_cast_fp16")]; tensor var_1492_equation_0 = const()[name = tensor("op_1492_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1492_cast_fp16 = einsum(equation = var_1492_equation_0, values = (var_1068_cast_fp16, var_1381_cast_fp16))[name = tensor("op_1492_cast_fp16")]; tensor var_1494_equation_0 = const()[name = tensor("op_1494_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1494_cast_fp16 = einsum(equation = var_1494_equation_0, values = (var_1068_cast_fp16, var_1382_cast_fp16))[name = tensor("op_1494_cast_fp16")]; tensor var_1496_equation_0 = const()[name = tensor("op_1496_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1496_cast_fp16 = einsum(equation = var_1496_equation_0, values = (var_1072_cast_fp16, var_1383_cast_fp16))[name = tensor("op_1496_cast_fp16")]; tensor var_1498_equation_0 = const()[name = tensor("op_1498_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1498_cast_fp16 = einsum(equation = var_1498_equation_0, values = (var_1072_cast_fp16, var_1384_cast_fp16))[name = tensor("op_1498_cast_fp16")]; tensor var_1500_equation_0 = const()[name = tensor("op_1500_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1500_cast_fp16 = einsum(equation = var_1500_equation_0, values = (var_1072_cast_fp16, var_1385_cast_fp16))[name = tensor("op_1500_cast_fp16")]; tensor var_1502_equation_0 = const()[name = tensor("op_1502_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1502_cast_fp16 = einsum(equation = var_1502_equation_0, values = (var_1072_cast_fp16, var_1386_cast_fp16))[name = tensor("op_1502_cast_fp16")]; tensor var_1504_equation_0 = const()[name = tensor("op_1504_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1504_cast_fp16 = einsum(equation = var_1504_equation_0, values = (var_1072_cast_fp16, var_1387_cast_fp16))[name = tensor("op_1504_cast_fp16")]; tensor var_1506_equation_0 = const()[name = tensor("op_1506_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1506_cast_fp16 = einsum(equation = var_1506_equation_0, values = (var_1072_cast_fp16, var_1388_cast_fp16))[name = tensor("op_1506_cast_fp16")]; tensor var_1508_equation_0 = const()[name = tensor("op_1508_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1508_cast_fp16 = einsum(equation = var_1508_equation_0, values = (var_1072_cast_fp16, var_1389_cast_fp16))[name = tensor("op_1508_cast_fp16")]; tensor var_1510_equation_0 = const()[name = tensor("op_1510_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1510_cast_fp16 = einsum(equation = var_1510_equation_0, values = (var_1072_cast_fp16, var_1390_cast_fp16))[name = tensor("op_1510_cast_fp16")]; tensor var_1512_equation_0 = const()[name = tensor("op_1512_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1512_cast_fp16 = einsum(equation = var_1512_equation_0, values = (var_1076_cast_fp16, var_1391_cast_fp16))[name = tensor("op_1512_cast_fp16")]; tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1076_cast_fp16, var_1392_cast_fp16))[name = tensor("op_1514_cast_fp16")]; tensor var_1516_equation_0 = const()[name = tensor("op_1516_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1516_cast_fp16 = einsum(equation = var_1516_equation_0, values = (var_1076_cast_fp16, var_1393_cast_fp16))[name = tensor("op_1516_cast_fp16")]; tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1076_cast_fp16, var_1394_cast_fp16))[name = tensor("op_1518_cast_fp16")]; tensor var_1520_equation_0 = const()[name = tensor("op_1520_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1520_cast_fp16 = einsum(equation = var_1520_equation_0, values = (var_1076_cast_fp16, var_1395_cast_fp16))[name = tensor("op_1520_cast_fp16")]; tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1076_cast_fp16, var_1396_cast_fp16))[name = tensor("op_1522_cast_fp16")]; tensor var_1524_equation_0 = const()[name = tensor("op_1524_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1524_cast_fp16 = einsum(equation = var_1524_equation_0, values = (var_1076_cast_fp16, var_1397_cast_fp16))[name = tensor("op_1524_cast_fp16")]; tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1076_cast_fp16, var_1398_cast_fp16))[name = tensor("op_1526_cast_fp16")]; tensor var_1528_interleave_0 = const()[name = tensor("op_1528_interleave_0"), val = tensor(false)]; tensor var_1528_cast_fp16 = concat(axis = var_95, interleave = var_1528_interleave_0, values = (var_1400_cast_fp16, var_1402_cast_fp16, var_1404_cast_fp16, var_1406_cast_fp16, var_1408_cast_fp16, var_1410_cast_fp16, var_1412_cast_fp16, var_1414_cast_fp16))[name = tensor("op_1528_cast_fp16")]; tensor var_1530_interleave_0 = const()[name = tensor("op_1530_interleave_0"), val = tensor(false)]; tensor var_1530_cast_fp16 = concat(axis = var_95, interleave = var_1530_interleave_0, values = (var_1416_cast_fp16, var_1418_cast_fp16, var_1420_cast_fp16, var_1422_cast_fp16, var_1424_cast_fp16, var_1426_cast_fp16, var_1428_cast_fp16, var_1430_cast_fp16))[name = tensor("op_1530_cast_fp16")]; tensor var_1532_interleave_0 = const()[name = tensor("op_1532_interleave_0"), val = tensor(false)]; tensor var_1532_cast_fp16 = concat(axis = var_95, interleave = var_1532_interleave_0, values = (var_1432_cast_fp16, var_1434_cast_fp16, var_1436_cast_fp16, var_1438_cast_fp16, var_1440_cast_fp16, var_1442_cast_fp16, var_1444_cast_fp16, var_1446_cast_fp16))[name = tensor("op_1532_cast_fp16")]; tensor var_1534_interleave_0 = const()[name = tensor("op_1534_interleave_0"), val = tensor(false)]; tensor var_1534_cast_fp16 = concat(axis = var_95, interleave = var_1534_interleave_0, values = (var_1448_cast_fp16, var_1450_cast_fp16, var_1452_cast_fp16, var_1454_cast_fp16, var_1456_cast_fp16, var_1458_cast_fp16, var_1460_cast_fp16, var_1462_cast_fp16))[name = tensor("op_1534_cast_fp16")]; tensor var_1536_interleave_0 = const()[name = tensor("op_1536_interleave_0"), val = tensor(false)]; tensor var_1536_cast_fp16 = concat(axis = var_95, interleave = var_1536_interleave_0, values = (var_1464_cast_fp16, var_1466_cast_fp16, var_1468_cast_fp16, var_1470_cast_fp16, var_1472_cast_fp16, var_1474_cast_fp16, var_1476_cast_fp16, var_1478_cast_fp16))[name = tensor("op_1536_cast_fp16")]; tensor var_1538_interleave_0 = const()[name = tensor("op_1538_interleave_0"), val = tensor(false)]; tensor var_1538_cast_fp16 = concat(axis = var_95, interleave = var_1538_interleave_0, values = (var_1480_cast_fp16, var_1482_cast_fp16, var_1484_cast_fp16, var_1486_cast_fp16, var_1488_cast_fp16, var_1490_cast_fp16, var_1492_cast_fp16, var_1494_cast_fp16))[name = tensor("op_1538_cast_fp16")]; tensor var_1540_interleave_0 = const()[name = tensor("op_1540_interleave_0"), val = tensor(false)]; tensor var_1540_cast_fp16 = concat(axis = var_95, interleave = var_1540_interleave_0, values = (var_1496_cast_fp16, var_1498_cast_fp16, var_1500_cast_fp16, var_1502_cast_fp16, var_1504_cast_fp16, var_1506_cast_fp16, var_1508_cast_fp16, var_1510_cast_fp16))[name = tensor("op_1540_cast_fp16")]; tensor var_1542_interleave_0 = const()[name = tensor("op_1542_interleave_0"), val = tensor(false)]; tensor var_1542_cast_fp16 = concat(axis = var_95, interleave = var_1542_interleave_0, values = (var_1512_cast_fp16, var_1514_cast_fp16, var_1516_cast_fp16, var_1518_cast_fp16, var_1520_cast_fp16, var_1522_cast_fp16, var_1524_cast_fp16, var_1526_cast_fp16))[name = tensor("op_1542_cast_fp16")]; tensor input_27_interleave_0 = const()[name = tensor("input_27_interleave_0"), val = tensor(false)]; tensor input_27_cast_fp16 = concat(axis = var_123, interleave = input_27_interleave_0, values = (var_1528_cast_fp16, var_1530_cast_fp16, var_1532_cast_fp16, var_1534_cast_fp16, var_1536_cast_fp16, var_1538_cast_fp16, var_1540_cast_fp16, var_1542_cast_fp16))[name = tensor("input_27_cast_fp16")]; tensor var_1548 = const()[name = tensor("op_1548"), val = tensor([1, 1])]; tensor var_1550 = const()[name = tensor("op_1550"), val = tensor([1, 1])]; tensor var_1552_pad_type_0 = const()[name = tensor("op_1552_pad_type_0"), val = tensor("custom")]; tensor var_1552_pad_0 = const()[name = tensor("op_1552_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10855744)))]; tensor down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11060608)))]; tensor var_1552_cast_fp16 = conv(bias = down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_1550, groups = var_123, pad = var_1552_pad_0, pad_type = var_1552_pad_type_0, strides = var_1548, weight = down_blocks_0_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_27_cast_fp16)[name = tensor("op_1552_cast_fp16")]; tensor inputs_5_cast_fp16 = add(x = var_1552_cast_fp16, y = inputs_3_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; tensor input_29_axes_0 = const()[name = tensor("input_29_axes_0"), val = tensor([1])]; tensor input_29_gamma_0_to_fp16 = const()[name = tensor("input_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11061312)))]; tensor input_29_beta_0_to_fp16 = const()[name = tensor("input_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11062016)))]; tensor var_1562_to_fp16 = const()[name = tensor("op_1562_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_29_cast_fp16 = layer_norm(axes = input_29_axes_0, beta = input_29_beta_0_to_fp16, epsilon = var_1562_to_fp16, gamma = input_29_gamma_0_to_fp16, x = inputs_5_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor var_1578 = const()[name = tensor("op_1578"), val = tensor([1, 1])]; tensor var_1580 = const()[name = tensor("op_1580"), val = tensor([1, 1])]; tensor var_1582_pad_type_0 = const()[name = tensor("op_1582_pad_type_0"), val = tensor("custom")]; tensor var_1582_pad_0 = const()[name = tensor("op_1582_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11062720)))]; tensor down_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12701184)))]; tensor var_1582_cast_fp16 = conv(bias = down_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_1580, groups = var_123, pad = var_1582_pad_0, pad_type = var_1582_pad_type_0, strides = var_1578, weight = down_blocks_0_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_29_cast_fp16)[name = tensor("op_1582_cast_fp16")]; tensor var_1583_split_sizes_0 = const()[name = tensor("op_1583_split_sizes_0"), val = tensor([1280, 1280])]; tensor var_1583_axis_0 = const()[name = tensor("op_1583_axis_0"), val = tensor(1)]; tensor var_1583_cast_fp16_0, tensor var_1583_cast_fp16_1 = split(axis = var_1583_axis_0, split_sizes = var_1583_split_sizes_0, x = var_1582_cast_fp16)[name = tensor("op_1583_cast_fp16")]; tensor var_1585_mode_0 = const()[name = tensor("op_1585_mode_0"), val = tensor("EXACT")]; tensor var_1585_cast_fp16 = gelu(mode = var_1585_mode_0, x = var_1583_cast_fp16_1)[name = tensor("op_1585_cast_fp16")]; tensor input_31_cast_fp16 = mul(x = var_1583_cast_fp16_0, y = var_1585_cast_fp16)[name = tensor("input_31_cast_fp16")]; tensor var_1589 = const()[name = tensor("op_1589"), val = tensor([1, 1])]; tensor var_1591 = const()[name = tensor("op_1591"), val = tensor([1, 1])]; tensor var_1593_pad_type_0 = const()[name = tensor("op_1593_pad_type_0"), val = tensor("custom")]; tensor var_1593_pad_0 = const()[name = tensor("op_1593_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(12706368)))]; tensor down_blocks_0_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13525632)))]; tensor var_1593_cast_fp16 = conv(bias = down_blocks_0_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_1591, groups = var_123, pad = var_1593_pad_0, pad_type = var_1593_pad_type_0, strides = var_1589, weight = down_blocks_0_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("op_1593_cast_fp16")]; tensor hidden_states_15_cast_fp16 = add(x = var_1593_cast_fp16, y = inputs_5_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; tensor var_1595 = const()[name = tensor("op_1595"), val = tensor([2, 320, 64, 64])]; tensor input_33_cast_fp16 = reshape(shape = var_1595, x = hidden_states_15_cast_fp16)[name = tensor("input_33_cast_fp16")]; tensor var_1599 = const()[name = tensor("op_1599"), val = tensor([1, 1])]; tensor var_1601 = const()[name = tensor("op_1601"), val = tensor([1, 1])]; tensor hidden_states_17_pad_type_0 = const()[name = tensor("hidden_states_17_pad_type_0"), val = tensor("custom")]; tensor hidden_states_17_pad_0 = const()[name = tensor("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_0_proj_out_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13526336)))]; tensor down_blocks_0_attentions_0_proj_out_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_0_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13731200)))]; tensor hidden_states_17_cast_fp16 = conv(bias = down_blocks_0_attentions_0_proj_out_bias_to_fp16, dilations = var_1601, groups = var_123, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = var_1599, weight = down_blocks_0_attentions_0_proj_out_weight_to_fp16, x = input_33_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; tensor input_35_cast_fp16 = add(x = hidden_states_17_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("input_35_cast_fp16")]; tensor reshape_12_shape_0 = const()[name = tensor("reshape_12_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_12_cast_fp16 = reshape(shape = reshape_12_shape_0, x = input_35_cast_fp16)[name = tensor("reshape_12_cast_fp16")]; tensor reduce_mean_9_axes_0 = const()[name = tensor("reduce_mean_9_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_9_keep_dims_0 = const()[name = tensor("reduce_mean_9_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_9_cast_fp16 = reduce_mean(axes = reduce_mean_9_axes_0, keep_dims = reduce_mean_9_keep_dims_0, x = reshape_12_cast_fp16)[name = tensor("reduce_mean_9_cast_fp16")]; tensor sub_6_cast_fp16 = sub(x = reshape_12_cast_fp16, y = reduce_mean_9_cast_fp16)[name = tensor("sub_6_cast_fp16")]; tensor square_3_cast_fp16 = square(x = sub_6_cast_fp16)[name = tensor("square_3_cast_fp16")]; tensor reduce_mean_11_axes_0 = const()[name = tensor("reduce_mean_11_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_11_keep_dims_0 = const()[name = tensor("reduce_mean_11_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_11_cast_fp16 = reduce_mean(axes = reduce_mean_11_axes_0, keep_dims = reduce_mean_11_keep_dims_0, x = square_3_cast_fp16)[name = tensor("reduce_mean_11_cast_fp16")]; tensor add_6_y_0_to_fp16 = const()[name = tensor("add_6_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_6_cast_fp16 = add(x = reduce_mean_11_cast_fp16, y = add_6_y_0_to_fp16)[name = tensor("add_6_cast_fp16")]; tensor sqrt_3_cast_fp16 = sqrt(x = add_6_cast_fp16)[name = tensor("sqrt_3_cast_fp16")]; tensor real_div_3_cast_fp16 = real_div(x = sub_6_cast_fp16, y = sqrt_3_cast_fp16)[name = tensor("real_div_3_cast_fp16")]; tensor reshape_13_shape_0 = const()[name = tensor("reshape_13_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_13_cast_fp16 = reshape(shape = reshape_13_shape_0, x = real_div_3_cast_fp16)[name = tensor("reshape_13_cast_fp16")]; tensor add_7_gamma_0_to_fp16 = const()[name = tensor("add_7_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13731904)))]; tensor add_7_beta_0_to_fp16 = const()[name = tensor("add_7_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13732608)))]; tensor add_7_epsilon_0_to_fp16 = const()[name = tensor("add_7_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_7_cast_fp16 = batch_norm(beta = add_7_beta_0_to_fp16, epsilon = add_7_epsilon_0_to_fp16, gamma = add_7_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_13_cast_fp16)[name = tensor("add_7_cast_fp16")]; tensor input_39_cast_fp16 = silu(x = add_7_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor var_1616 = const()[name = tensor("op_1616"), val = tensor([1, 1])]; tensor var_1618 = const()[name = tensor("op_1618"), val = tensor([1, 1])]; tensor hidden_states_19_pad_type_0 = const()[name = tensor("hidden_states_19_pad_type_0"), val = tensor("custom")]; tensor hidden_states_19_pad_0 = const()[name = tensor("hidden_states_19_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_0_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("down_blocks_0_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13733312)))]; tensor down_blocks_0_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("down_blocks_0_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15576576)))]; tensor hidden_states_19_cast_fp16 = conv(bias = down_blocks_0_resnets_1_conv1_bias_to_fp16, dilations = var_1618, groups = var_123, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = var_1616, weight = down_blocks_0_resnets_1_conv1_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; tensor var_1624 = const()[name = tensor("op_1624"), val = tensor([1, 1])]; tensor var_1626 = const()[name = tensor("op_1626"), val = tensor([1, 1])]; tensor temb_3_pad_type_0 = const()[name = tensor("temb_3_pad_type_0"), val = tensor("custom")]; tensor temb_3_pad_0 = const()[name = tensor("temb_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("down_blocks_0_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15577280)))]; tensor down_blocks_0_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("down_blocks_0_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16396544)))]; tensor temb_3_cast_fp16 = conv(bias = down_blocks_0_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_1626, groups = var_123, pad = temb_3_pad_0, pad_type = temb_3_pad_type_0, strides = var_1624, weight = down_blocks_0_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_3_cast_fp16")]; tensor input_43_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = temb_3_cast_fp16)[name = tensor("input_43_cast_fp16")]; tensor reshape_16_shape_0 = const()[name = tensor("reshape_16_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_16_cast_fp16 = reshape(shape = reshape_16_shape_0, x = input_43_cast_fp16)[name = tensor("reshape_16_cast_fp16")]; tensor reduce_mean_12_axes_0 = const()[name = tensor("reduce_mean_12_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_12_keep_dims_0 = const()[name = tensor("reduce_mean_12_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_12_cast_fp16 = reduce_mean(axes = reduce_mean_12_axes_0, keep_dims = reduce_mean_12_keep_dims_0, x = reshape_16_cast_fp16)[name = tensor("reduce_mean_12_cast_fp16")]; tensor sub_8_cast_fp16 = sub(x = reshape_16_cast_fp16, y = reduce_mean_12_cast_fp16)[name = tensor("sub_8_cast_fp16")]; tensor square_4_cast_fp16 = square(x = sub_8_cast_fp16)[name = tensor("square_4_cast_fp16")]; tensor reduce_mean_14_axes_0 = const()[name = tensor("reduce_mean_14_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_14_keep_dims_0 = const()[name = tensor("reduce_mean_14_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_14_cast_fp16 = reduce_mean(axes = reduce_mean_14_axes_0, keep_dims = reduce_mean_14_keep_dims_0, x = square_4_cast_fp16)[name = tensor("reduce_mean_14_cast_fp16")]; tensor add_8_y_0_to_fp16 = const()[name = tensor("add_8_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_8_cast_fp16 = add(x = reduce_mean_14_cast_fp16, y = add_8_y_0_to_fp16)[name = tensor("add_8_cast_fp16")]; tensor sqrt_4_cast_fp16 = sqrt(x = add_8_cast_fp16)[name = tensor("sqrt_4_cast_fp16")]; tensor real_div_4_cast_fp16 = real_div(x = sub_8_cast_fp16, y = sqrt_4_cast_fp16)[name = tensor("real_div_4_cast_fp16")]; tensor reshape_17_shape_0 = const()[name = tensor("reshape_17_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_17_cast_fp16 = reshape(shape = reshape_17_shape_0, x = real_div_4_cast_fp16)[name = tensor("reshape_17_cast_fp16")]; tensor add_9_gamma_0_to_fp16 = const()[name = tensor("add_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16397248)))]; tensor add_9_beta_0_to_fp16 = const()[name = tensor("add_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16397952)))]; tensor add_9_epsilon_0_to_fp16 = const()[name = tensor("add_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_9_cast_fp16 = batch_norm(beta = add_9_beta_0_to_fp16, epsilon = add_9_epsilon_0_to_fp16, gamma = add_9_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_17_cast_fp16)[name = tensor("add_9_cast_fp16")]; tensor input_47_cast_fp16 = silu(x = add_9_cast_fp16)[name = tensor("input_47_cast_fp16")]; tensor var_1636 = const()[name = tensor("op_1636"), val = tensor([1, 1])]; tensor var_1638 = const()[name = tensor("op_1638"), val = tensor([1, 1])]; tensor hidden_states_21_pad_type_0 = const()[name = tensor("hidden_states_21_pad_type_0"), val = tensor("custom")]; tensor hidden_states_21_pad_0 = const()[name = tensor("hidden_states_21_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_0_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("down_blocks_0_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(16398656)))]; tensor down_blocks_0_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("down_blocks_0_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18241920)))]; tensor hidden_states_21_cast_fp16 = conv(bias = down_blocks_0_resnets_1_conv2_bias_to_fp16, dilations = var_1638, groups = var_123, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = var_1636, weight = down_blocks_0_resnets_1_conv2_weight_to_fp16, x = input_47_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; tensor hidden_states_23_cast_fp16 = add(x = input_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; tensor reshape_20_shape_0 = const()[name = tensor("reshape_20_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_20_cast_fp16 = reshape(shape = reshape_20_shape_0, x = hidden_states_23_cast_fp16)[name = tensor("reshape_20_cast_fp16")]; tensor reduce_mean_15_axes_0 = const()[name = tensor("reduce_mean_15_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_15_keep_dims_0 = const()[name = tensor("reduce_mean_15_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_15_cast_fp16 = reduce_mean(axes = reduce_mean_15_axes_0, keep_dims = reduce_mean_15_keep_dims_0, x = reshape_20_cast_fp16)[name = tensor("reduce_mean_15_cast_fp16")]; tensor sub_10_cast_fp16 = sub(x = reshape_20_cast_fp16, y = reduce_mean_15_cast_fp16)[name = tensor("sub_10_cast_fp16")]; tensor square_5_cast_fp16 = square(x = sub_10_cast_fp16)[name = tensor("square_5_cast_fp16")]; tensor reduce_mean_17_axes_0 = const()[name = tensor("reduce_mean_17_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_17_keep_dims_0 = const()[name = tensor("reduce_mean_17_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_17_cast_fp16 = reduce_mean(axes = reduce_mean_17_axes_0, keep_dims = reduce_mean_17_keep_dims_0, x = square_5_cast_fp16)[name = tensor("reduce_mean_17_cast_fp16")]; tensor add_10_y_0_to_fp16 = const()[name = tensor("add_10_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_10_cast_fp16 = add(x = reduce_mean_17_cast_fp16, y = add_10_y_0_to_fp16)[name = tensor("add_10_cast_fp16")]; tensor sqrt_5_cast_fp16 = sqrt(x = add_10_cast_fp16)[name = tensor("sqrt_5_cast_fp16")]; tensor real_div_5_cast_fp16 = real_div(x = sub_10_cast_fp16, y = sqrt_5_cast_fp16)[name = tensor("real_div_5_cast_fp16")]; tensor reshape_21_shape_0 = const()[name = tensor("reshape_21_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_21_cast_fp16 = reshape(shape = reshape_21_shape_0, x = real_div_5_cast_fp16)[name = tensor("reshape_21_cast_fp16")]; tensor add_11_gamma_0_to_fp16 = const()[name = tensor("add_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18242624)))]; tensor add_11_beta_0_to_fp16 = const()[name = tensor("add_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18243328)))]; tensor add_11_epsilon_0_to_fp16 = const()[name = tensor("add_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_11_cast_fp16 = batch_norm(beta = add_11_beta_0_to_fp16, epsilon = add_11_epsilon_0_to_fp16, gamma = add_11_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_21_cast_fp16)[name = tensor("add_11_cast_fp16")]; tensor var_1658 = const()[name = tensor("op_1658"), val = tensor([1, 1])]; tensor var_1660 = const()[name = tensor("op_1660"), val = tensor([1, 1])]; tensor hidden_states_25_pad_type_0 = const()[name = tensor("hidden_states_25_pad_type_0"), val = tensor("custom")]; tensor hidden_states_25_pad_0 = const()[name = tensor("hidden_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_proj_in_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18244032)))]; tensor down_blocks_0_attentions_1_proj_in_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18448896)))]; tensor hidden_states_25_cast_fp16 = conv(bias = down_blocks_0_attentions_1_proj_in_bias_to_fp16, dilations = var_1660, groups = var_123, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = var_1658, weight = down_blocks_0_attentions_1_proj_in_weight_to_fp16, x = add_11_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; tensor var_1665 = const()[name = tensor("op_1665"), val = tensor([2, 320, 1, 4096])]; tensor inputs_7_cast_fp16 = reshape(shape = var_1665, x = hidden_states_25_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; tensor hidden_states_27_axes_0 = const()[name = tensor("hidden_states_27_axes_0"), val = tensor([1])]; tensor hidden_states_27_gamma_0_to_fp16 = const()[name = tensor("hidden_states_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18449600)))]; tensor hidden_states_27_beta_0_to_fp16 = const()[name = tensor("hidden_states_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18450304)))]; tensor var_1681_to_fp16 = const()[name = tensor("op_1681_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_27_cast_fp16 = layer_norm(axes = hidden_states_27_axes_0, beta = hidden_states_27_beta_0_to_fp16, epsilon = var_1681_to_fp16, gamma = hidden_states_27_gamma_0_to_fp16, x = inputs_7_cast_fp16)[name = tensor("hidden_states_27_cast_fp16")]; tensor var_1696 = const()[name = tensor("op_1696"), val = tensor([1, 1])]; tensor var_1698 = const()[name = tensor("op_1698"), val = tensor([1, 1])]; tensor q_5_pad_type_0 = const()[name = tensor("q_5_pad_type_0"), val = tensor("custom")]; tensor q_5_pad_0 = const()[name = tensor("q_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18451008)))]; tensor q_5_cast_fp16 = conv(dilations = var_1698, groups = var_123, pad = q_5_pad_0, pad_type = q_5_pad_type_0, strides = var_1696, weight = down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_27_cast_fp16)[name = tensor("q_5_cast_fp16")]; tensor var_1702 = const()[name = tensor("op_1702"), val = tensor([1, 1])]; tensor var_1704 = const()[name = tensor("op_1704"), val = tensor([1, 1])]; tensor k_9_pad_type_0 = const()[name = tensor("k_9_pad_type_0"), val = tensor("custom")]; tensor k_9_pad_0 = const()[name = tensor("k_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18655872)))]; tensor k_9_cast_fp16 = conv(dilations = var_1704, groups = var_123, pad = k_9_pad_0, pad_type = k_9_pad_type_0, strides = var_1702, weight = down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_27_cast_fp16)[name = tensor("k_9_cast_fp16")]; tensor var_1708 = const()[name = tensor("op_1708"), val = tensor([1, 1])]; tensor var_1710 = const()[name = tensor("op_1710"), val = tensor([1, 1])]; tensor v_5_pad_type_0 = const()[name = tensor("v_5_pad_type_0"), val = tensor("custom")]; tensor v_5_pad_0 = const()[name = tensor("v_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18860736)))]; tensor v_5_cast_fp16 = conv(dilations = var_1710, groups = var_123, pad = v_5_pad_0, pad_type = v_5_pad_type_0, strides = var_1708, weight = down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_27_cast_fp16)[name = tensor("v_5_cast_fp16")]; tensor var_1714_begin_0 = const()[name = tensor("op_1714_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1714_end_0 = const()[name = tensor("op_1714_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1714_end_mask_0 = const()[name = tensor("op_1714_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1714_cast_fp16 = slice_by_index(begin = var_1714_begin_0, end = var_1714_end_0, end_mask = var_1714_end_mask_0, x = q_5_cast_fp16)[name = tensor("op_1714_cast_fp16")]; tensor var_1718_begin_0 = const()[name = tensor("op_1718_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_1718_end_0 = const()[name = tensor("op_1718_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_1718_end_mask_0 = const()[name = tensor("op_1718_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1718_cast_fp16 = slice_by_index(begin = var_1718_begin_0, end = var_1718_end_0, end_mask = var_1718_end_mask_0, x = q_5_cast_fp16)[name = tensor("op_1718_cast_fp16")]; tensor var_1722_begin_0 = const()[name = tensor("op_1722_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_1722_end_0 = const()[name = tensor("op_1722_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_1722_end_mask_0 = const()[name = tensor("op_1722_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1722_cast_fp16 = slice_by_index(begin = var_1722_begin_0, end = var_1722_end_0, end_mask = var_1722_end_mask_0, x = q_5_cast_fp16)[name = tensor("op_1722_cast_fp16")]; tensor var_1726_begin_0 = const()[name = tensor("op_1726_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_1726_end_0 = const()[name = tensor("op_1726_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_1726_end_mask_0 = const()[name = tensor("op_1726_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1726_cast_fp16 = slice_by_index(begin = var_1726_begin_0, end = var_1726_end_0, end_mask = var_1726_end_mask_0, x = q_5_cast_fp16)[name = tensor("op_1726_cast_fp16")]; tensor var_1730_begin_0 = const()[name = tensor("op_1730_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_1730_end_0 = const()[name = tensor("op_1730_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_1730_end_mask_0 = const()[name = tensor("op_1730_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1730_cast_fp16 = slice_by_index(begin = var_1730_begin_0, end = var_1730_end_0, end_mask = var_1730_end_mask_0, x = q_5_cast_fp16)[name = tensor("op_1730_cast_fp16")]; tensor var_1734_begin_0 = const()[name = tensor("op_1734_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_1734_end_0 = const()[name = tensor("op_1734_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_1734_end_mask_0 = const()[name = tensor("op_1734_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1734_cast_fp16 = slice_by_index(begin = var_1734_begin_0, end = var_1734_end_0, end_mask = var_1734_end_mask_0, x = q_5_cast_fp16)[name = tensor("op_1734_cast_fp16")]; tensor var_1738_begin_0 = const()[name = tensor("op_1738_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_1738_end_0 = const()[name = tensor("op_1738_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_1738_end_mask_0 = const()[name = tensor("op_1738_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1738_cast_fp16 = slice_by_index(begin = var_1738_begin_0, end = var_1738_end_0, end_mask = var_1738_end_mask_0, x = q_5_cast_fp16)[name = tensor("op_1738_cast_fp16")]; tensor var_1742_begin_0 = const()[name = tensor("op_1742_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_1742_end_0 = const()[name = tensor("op_1742_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_1742_end_mask_0 = const()[name = tensor("op_1742_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1742_cast_fp16 = slice_by_index(begin = var_1742_begin_0, end = var_1742_end_0, end_mask = var_1742_end_mask_0, x = q_5_cast_fp16)[name = tensor("op_1742_cast_fp16")]; tensor var_1745_begin_0 = const()[name = tensor("op_1745_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1745_end_0 = const()[name = tensor("op_1745_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1745_end_mask_0 = const()[name = tensor("op_1745_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1745_cast_fp16 = slice_by_index(begin = var_1745_begin_0, end = var_1745_end_0, end_mask = var_1745_end_mask_0, x = var_1714_cast_fp16)[name = tensor("op_1745_cast_fp16")]; tensor var_1746_begin_0 = const()[name = tensor("op_1746_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1746_end_0 = const()[name = tensor("op_1746_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1746_end_mask_0 = const()[name = tensor("op_1746_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1746_cast_fp16 = slice_by_index(begin = var_1746_begin_0, end = var_1746_end_0, end_mask = var_1746_end_mask_0, x = var_1714_cast_fp16)[name = tensor("op_1746_cast_fp16")]; tensor var_1747_begin_0 = const()[name = tensor("op_1747_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1747_end_0 = const()[name = tensor("op_1747_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1747_end_mask_0 = const()[name = tensor("op_1747_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1747_cast_fp16 = slice_by_index(begin = var_1747_begin_0, end = var_1747_end_0, end_mask = var_1747_end_mask_0, x = var_1714_cast_fp16)[name = tensor("op_1747_cast_fp16")]; tensor var_1748_begin_0 = const()[name = tensor("op_1748_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1748_end_0 = const()[name = tensor("op_1748_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1748_end_mask_0 = const()[name = tensor("op_1748_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1748_cast_fp16 = slice_by_index(begin = var_1748_begin_0, end = var_1748_end_0, end_mask = var_1748_end_mask_0, x = var_1714_cast_fp16)[name = tensor("op_1748_cast_fp16")]; tensor var_1749_begin_0 = const()[name = tensor("op_1749_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1749_end_0 = const()[name = tensor("op_1749_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1749_end_mask_0 = const()[name = tensor("op_1749_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1749_cast_fp16 = slice_by_index(begin = var_1749_begin_0, end = var_1749_end_0, end_mask = var_1749_end_mask_0, x = var_1714_cast_fp16)[name = tensor("op_1749_cast_fp16")]; tensor var_1750_begin_0 = const()[name = tensor("op_1750_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1750_end_0 = const()[name = tensor("op_1750_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1750_end_mask_0 = const()[name = tensor("op_1750_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1750_cast_fp16 = slice_by_index(begin = var_1750_begin_0, end = var_1750_end_0, end_mask = var_1750_end_mask_0, x = var_1714_cast_fp16)[name = tensor("op_1750_cast_fp16")]; tensor var_1751_begin_0 = const()[name = tensor("op_1751_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1751_end_0 = const()[name = tensor("op_1751_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1751_end_mask_0 = const()[name = tensor("op_1751_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1751_cast_fp16 = slice_by_index(begin = var_1751_begin_0, end = var_1751_end_0, end_mask = var_1751_end_mask_0, x = var_1714_cast_fp16)[name = tensor("op_1751_cast_fp16")]; tensor var_1752_begin_0 = const()[name = tensor("op_1752_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1752_end_0 = const()[name = tensor("op_1752_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1752_end_mask_0 = const()[name = tensor("op_1752_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1752_cast_fp16 = slice_by_index(begin = var_1752_begin_0, end = var_1752_end_0, end_mask = var_1752_end_mask_0, x = var_1714_cast_fp16)[name = tensor("op_1752_cast_fp16")]; tensor var_1753_begin_0 = const()[name = tensor("op_1753_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1753_end_0 = const()[name = tensor("op_1753_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1753_end_mask_0 = const()[name = tensor("op_1753_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1753_cast_fp16 = slice_by_index(begin = var_1753_begin_0, end = var_1753_end_0, end_mask = var_1753_end_mask_0, x = var_1718_cast_fp16)[name = tensor("op_1753_cast_fp16")]; tensor var_1754_begin_0 = const()[name = tensor("op_1754_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1754_end_0 = const()[name = tensor("op_1754_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1754_end_mask_0 = const()[name = tensor("op_1754_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1754_cast_fp16 = slice_by_index(begin = var_1754_begin_0, end = var_1754_end_0, end_mask = var_1754_end_mask_0, x = var_1718_cast_fp16)[name = tensor("op_1754_cast_fp16")]; tensor var_1755_begin_0 = const()[name = tensor("op_1755_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1755_end_0 = const()[name = tensor("op_1755_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1755_end_mask_0 = const()[name = tensor("op_1755_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1755_cast_fp16 = slice_by_index(begin = var_1755_begin_0, end = var_1755_end_0, end_mask = var_1755_end_mask_0, x = var_1718_cast_fp16)[name = tensor("op_1755_cast_fp16")]; tensor var_1756_begin_0 = const()[name = tensor("op_1756_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1756_end_0 = const()[name = tensor("op_1756_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1756_end_mask_0 = const()[name = tensor("op_1756_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1756_cast_fp16 = slice_by_index(begin = var_1756_begin_0, end = var_1756_end_0, end_mask = var_1756_end_mask_0, x = var_1718_cast_fp16)[name = tensor("op_1756_cast_fp16")]; tensor var_1757_begin_0 = const()[name = tensor("op_1757_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1757_end_0 = const()[name = tensor("op_1757_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1757_end_mask_0 = const()[name = tensor("op_1757_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1757_cast_fp16 = slice_by_index(begin = var_1757_begin_0, end = var_1757_end_0, end_mask = var_1757_end_mask_0, x = var_1718_cast_fp16)[name = tensor("op_1757_cast_fp16")]; tensor var_1758_begin_0 = const()[name = tensor("op_1758_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1758_end_0 = const()[name = tensor("op_1758_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1758_end_mask_0 = const()[name = tensor("op_1758_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1758_cast_fp16 = slice_by_index(begin = var_1758_begin_0, end = var_1758_end_0, end_mask = var_1758_end_mask_0, x = var_1718_cast_fp16)[name = tensor("op_1758_cast_fp16")]; tensor var_1759_begin_0 = const()[name = tensor("op_1759_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1759_end_0 = const()[name = tensor("op_1759_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1759_end_mask_0 = const()[name = tensor("op_1759_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1759_cast_fp16 = slice_by_index(begin = var_1759_begin_0, end = var_1759_end_0, end_mask = var_1759_end_mask_0, x = var_1718_cast_fp16)[name = tensor("op_1759_cast_fp16")]; tensor var_1760_begin_0 = const()[name = tensor("op_1760_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1760_end_0 = const()[name = tensor("op_1760_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1760_end_mask_0 = const()[name = tensor("op_1760_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1760_cast_fp16 = slice_by_index(begin = var_1760_begin_0, end = var_1760_end_0, end_mask = var_1760_end_mask_0, x = var_1718_cast_fp16)[name = tensor("op_1760_cast_fp16")]; tensor var_1761_begin_0 = const()[name = tensor("op_1761_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1761_end_0 = const()[name = tensor("op_1761_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1761_end_mask_0 = const()[name = tensor("op_1761_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1761_cast_fp16 = slice_by_index(begin = var_1761_begin_0, end = var_1761_end_0, end_mask = var_1761_end_mask_0, x = var_1722_cast_fp16)[name = tensor("op_1761_cast_fp16")]; tensor var_1762_begin_0 = const()[name = tensor("op_1762_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1762_end_0 = const()[name = tensor("op_1762_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1762_end_mask_0 = const()[name = tensor("op_1762_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1762_cast_fp16 = slice_by_index(begin = var_1762_begin_0, end = var_1762_end_0, end_mask = var_1762_end_mask_0, x = var_1722_cast_fp16)[name = tensor("op_1762_cast_fp16")]; tensor var_1763_begin_0 = const()[name = tensor("op_1763_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1763_end_0 = const()[name = tensor("op_1763_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1763_end_mask_0 = const()[name = tensor("op_1763_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1763_cast_fp16 = slice_by_index(begin = var_1763_begin_0, end = var_1763_end_0, end_mask = var_1763_end_mask_0, x = var_1722_cast_fp16)[name = tensor("op_1763_cast_fp16")]; tensor var_1764_begin_0 = const()[name = tensor("op_1764_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1764_end_0 = const()[name = tensor("op_1764_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1764_end_mask_0 = const()[name = tensor("op_1764_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1764_cast_fp16 = slice_by_index(begin = var_1764_begin_0, end = var_1764_end_0, end_mask = var_1764_end_mask_0, x = var_1722_cast_fp16)[name = tensor("op_1764_cast_fp16")]; tensor var_1765_begin_0 = const()[name = tensor("op_1765_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1765_end_0 = const()[name = tensor("op_1765_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1765_end_mask_0 = const()[name = tensor("op_1765_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1765_cast_fp16 = slice_by_index(begin = var_1765_begin_0, end = var_1765_end_0, end_mask = var_1765_end_mask_0, x = var_1722_cast_fp16)[name = tensor("op_1765_cast_fp16")]; tensor var_1766_begin_0 = const()[name = tensor("op_1766_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1766_end_0 = const()[name = tensor("op_1766_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1766_end_mask_0 = const()[name = tensor("op_1766_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1766_cast_fp16 = slice_by_index(begin = var_1766_begin_0, end = var_1766_end_0, end_mask = var_1766_end_mask_0, x = var_1722_cast_fp16)[name = tensor("op_1766_cast_fp16")]; tensor var_1767_begin_0 = const()[name = tensor("op_1767_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1767_end_0 = const()[name = tensor("op_1767_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1767_end_mask_0 = const()[name = tensor("op_1767_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1767_cast_fp16 = slice_by_index(begin = var_1767_begin_0, end = var_1767_end_0, end_mask = var_1767_end_mask_0, x = var_1722_cast_fp16)[name = tensor("op_1767_cast_fp16")]; tensor var_1768_begin_0 = const()[name = tensor("op_1768_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1768_end_0 = const()[name = tensor("op_1768_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1768_end_mask_0 = const()[name = tensor("op_1768_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1768_cast_fp16 = slice_by_index(begin = var_1768_begin_0, end = var_1768_end_0, end_mask = var_1768_end_mask_0, x = var_1722_cast_fp16)[name = tensor("op_1768_cast_fp16")]; tensor var_1769_begin_0 = const()[name = tensor("op_1769_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1769_end_0 = const()[name = tensor("op_1769_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1769_end_mask_0 = const()[name = tensor("op_1769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1769_cast_fp16 = slice_by_index(begin = var_1769_begin_0, end = var_1769_end_0, end_mask = var_1769_end_mask_0, x = var_1726_cast_fp16)[name = tensor("op_1769_cast_fp16")]; tensor var_1770_begin_0 = const()[name = tensor("op_1770_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1770_end_0 = const()[name = tensor("op_1770_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1770_end_mask_0 = const()[name = tensor("op_1770_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1770_cast_fp16 = slice_by_index(begin = var_1770_begin_0, end = var_1770_end_0, end_mask = var_1770_end_mask_0, x = var_1726_cast_fp16)[name = tensor("op_1770_cast_fp16")]; tensor var_1771_begin_0 = const()[name = tensor("op_1771_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1771_end_0 = const()[name = tensor("op_1771_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1771_end_mask_0 = const()[name = tensor("op_1771_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1771_cast_fp16 = slice_by_index(begin = var_1771_begin_0, end = var_1771_end_0, end_mask = var_1771_end_mask_0, x = var_1726_cast_fp16)[name = tensor("op_1771_cast_fp16")]; tensor var_1772_begin_0 = const()[name = tensor("op_1772_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1772_end_0 = const()[name = tensor("op_1772_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1772_end_mask_0 = const()[name = tensor("op_1772_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1772_cast_fp16 = slice_by_index(begin = var_1772_begin_0, end = var_1772_end_0, end_mask = var_1772_end_mask_0, x = var_1726_cast_fp16)[name = tensor("op_1772_cast_fp16")]; tensor var_1773_begin_0 = const()[name = tensor("op_1773_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1773_end_0 = const()[name = tensor("op_1773_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1773_end_mask_0 = const()[name = tensor("op_1773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1773_cast_fp16 = slice_by_index(begin = var_1773_begin_0, end = var_1773_end_0, end_mask = var_1773_end_mask_0, x = var_1726_cast_fp16)[name = tensor("op_1773_cast_fp16")]; tensor var_1774_begin_0 = const()[name = tensor("op_1774_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1774_end_0 = const()[name = tensor("op_1774_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1774_end_mask_0 = const()[name = tensor("op_1774_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1774_cast_fp16 = slice_by_index(begin = var_1774_begin_0, end = var_1774_end_0, end_mask = var_1774_end_mask_0, x = var_1726_cast_fp16)[name = tensor("op_1774_cast_fp16")]; tensor var_1775_begin_0 = const()[name = tensor("op_1775_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1775_end_0 = const()[name = tensor("op_1775_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1775_end_mask_0 = const()[name = tensor("op_1775_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1775_cast_fp16 = slice_by_index(begin = var_1775_begin_0, end = var_1775_end_0, end_mask = var_1775_end_mask_0, x = var_1726_cast_fp16)[name = tensor("op_1775_cast_fp16")]; tensor var_1776_begin_0 = const()[name = tensor("op_1776_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1776_end_0 = const()[name = tensor("op_1776_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1776_end_mask_0 = const()[name = tensor("op_1776_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1776_cast_fp16 = slice_by_index(begin = var_1776_begin_0, end = var_1776_end_0, end_mask = var_1776_end_mask_0, x = var_1726_cast_fp16)[name = tensor("op_1776_cast_fp16")]; tensor var_1777_begin_0 = const()[name = tensor("op_1777_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1777_end_0 = const()[name = tensor("op_1777_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1777_end_mask_0 = const()[name = tensor("op_1777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1777_cast_fp16 = slice_by_index(begin = var_1777_begin_0, end = var_1777_end_0, end_mask = var_1777_end_mask_0, x = var_1730_cast_fp16)[name = tensor("op_1777_cast_fp16")]; tensor var_1778_begin_0 = const()[name = tensor("op_1778_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1778_end_0 = const()[name = tensor("op_1778_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1778_end_mask_0 = const()[name = tensor("op_1778_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1778_cast_fp16 = slice_by_index(begin = var_1778_begin_0, end = var_1778_end_0, end_mask = var_1778_end_mask_0, x = var_1730_cast_fp16)[name = tensor("op_1778_cast_fp16")]; tensor var_1779_begin_0 = const()[name = tensor("op_1779_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1779_end_0 = const()[name = tensor("op_1779_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1779_end_mask_0 = const()[name = tensor("op_1779_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1779_cast_fp16 = slice_by_index(begin = var_1779_begin_0, end = var_1779_end_0, end_mask = var_1779_end_mask_0, x = var_1730_cast_fp16)[name = tensor("op_1779_cast_fp16")]; tensor var_1780_begin_0 = const()[name = tensor("op_1780_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1780_end_0 = const()[name = tensor("op_1780_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1780_end_mask_0 = const()[name = tensor("op_1780_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1780_cast_fp16 = slice_by_index(begin = var_1780_begin_0, end = var_1780_end_0, end_mask = var_1780_end_mask_0, x = var_1730_cast_fp16)[name = tensor("op_1780_cast_fp16")]; tensor var_1781_begin_0 = const()[name = tensor("op_1781_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1781_end_0 = const()[name = tensor("op_1781_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1781_end_mask_0 = const()[name = tensor("op_1781_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1781_cast_fp16 = slice_by_index(begin = var_1781_begin_0, end = var_1781_end_0, end_mask = var_1781_end_mask_0, x = var_1730_cast_fp16)[name = tensor("op_1781_cast_fp16")]; tensor var_1782_begin_0 = const()[name = tensor("op_1782_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1782_end_0 = const()[name = tensor("op_1782_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1782_end_mask_0 = const()[name = tensor("op_1782_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1782_cast_fp16 = slice_by_index(begin = var_1782_begin_0, end = var_1782_end_0, end_mask = var_1782_end_mask_0, x = var_1730_cast_fp16)[name = tensor("op_1782_cast_fp16")]; tensor var_1783_begin_0 = const()[name = tensor("op_1783_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1783_end_0 = const()[name = tensor("op_1783_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1783_end_mask_0 = const()[name = tensor("op_1783_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1783_cast_fp16 = slice_by_index(begin = var_1783_begin_0, end = var_1783_end_0, end_mask = var_1783_end_mask_0, x = var_1730_cast_fp16)[name = tensor("op_1783_cast_fp16")]; tensor var_1784_begin_0 = const()[name = tensor("op_1784_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1784_end_0 = const()[name = tensor("op_1784_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1784_end_mask_0 = const()[name = tensor("op_1784_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1784_cast_fp16 = slice_by_index(begin = var_1784_begin_0, end = var_1784_end_0, end_mask = var_1784_end_mask_0, x = var_1730_cast_fp16)[name = tensor("op_1784_cast_fp16")]; tensor var_1785_begin_0 = const()[name = tensor("op_1785_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1785_end_0 = const()[name = tensor("op_1785_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1785_end_mask_0 = const()[name = tensor("op_1785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1785_cast_fp16 = slice_by_index(begin = var_1785_begin_0, end = var_1785_end_0, end_mask = var_1785_end_mask_0, x = var_1734_cast_fp16)[name = tensor("op_1785_cast_fp16")]; tensor var_1786_begin_0 = const()[name = tensor("op_1786_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1786_end_0 = const()[name = tensor("op_1786_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1786_end_mask_0 = const()[name = tensor("op_1786_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1786_cast_fp16 = slice_by_index(begin = var_1786_begin_0, end = var_1786_end_0, end_mask = var_1786_end_mask_0, x = var_1734_cast_fp16)[name = tensor("op_1786_cast_fp16")]; tensor var_1787_begin_0 = const()[name = tensor("op_1787_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1787_end_0 = const()[name = tensor("op_1787_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1787_end_mask_0 = const()[name = tensor("op_1787_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1787_cast_fp16 = slice_by_index(begin = var_1787_begin_0, end = var_1787_end_0, end_mask = var_1787_end_mask_0, x = var_1734_cast_fp16)[name = tensor("op_1787_cast_fp16")]; tensor var_1788_begin_0 = const()[name = tensor("op_1788_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1788_end_0 = const()[name = tensor("op_1788_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1788_end_mask_0 = const()[name = tensor("op_1788_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1788_cast_fp16 = slice_by_index(begin = var_1788_begin_0, end = var_1788_end_0, end_mask = var_1788_end_mask_0, x = var_1734_cast_fp16)[name = tensor("op_1788_cast_fp16")]; tensor var_1789_begin_0 = const()[name = tensor("op_1789_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1789_end_0 = const()[name = tensor("op_1789_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1789_end_mask_0 = const()[name = tensor("op_1789_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1789_cast_fp16 = slice_by_index(begin = var_1789_begin_0, end = var_1789_end_0, end_mask = var_1789_end_mask_0, x = var_1734_cast_fp16)[name = tensor("op_1789_cast_fp16")]; tensor var_1790_begin_0 = const()[name = tensor("op_1790_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1790_end_0 = const()[name = tensor("op_1790_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1790_end_mask_0 = const()[name = tensor("op_1790_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1790_cast_fp16 = slice_by_index(begin = var_1790_begin_0, end = var_1790_end_0, end_mask = var_1790_end_mask_0, x = var_1734_cast_fp16)[name = tensor("op_1790_cast_fp16")]; tensor var_1791_begin_0 = const()[name = tensor("op_1791_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1791_end_0 = const()[name = tensor("op_1791_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1791_end_mask_0 = const()[name = tensor("op_1791_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1791_cast_fp16 = slice_by_index(begin = var_1791_begin_0, end = var_1791_end_0, end_mask = var_1791_end_mask_0, x = var_1734_cast_fp16)[name = tensor("op_1791_cast_fp16")]; tensor var_1792_begin_0 = const()[name = tensor("op_1792_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1792_end_0 = const()[name = tensor("op_1792_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1792_end_mask_0 = const()[name = tensor("op_1792_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1792_cast_fp16 = slice_by_index(begin = var_1792_begin_0, end = var_1792_end_0, end_mask = var_1792_end_mask_0, x = var_1734_cast_fp16)[name = tensor("op_1792_cast_fp16")]; tensor var_1793_begin_0 = const()[name = tensor("op_1793_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1793_end_0 = const()[name = tensor("op_1793_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1793_end_mask_0 = const()[name = tensor("op_1793_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1793_cast_fp16 = slice_by_index(begin = var_1793_begin_0, end = var_1793_end_0, end_mask = var_1793_end_mask_0, x = var_1738_cast_fp16)[name = tensor("op_1793_cast_fp16")]; tensor var_1794_begin_0 = const()[name = tensor("op_1794_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1794_end_0 = const()[name = tensor("op_1794_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1794_end_mask_0 = const()[name = tensor("op_1794_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1794_cast_fp16 = slice_by_index(begin = var_1794_begin_0, end = var_1794_end_0, end_mask = var_1794_end_mask_0, x = var_1738_cast_fp16)[name = tensor("op_1794_cast_fp16")]; tensor var_1795_begin_0 = const()[name = tensor("op_1795_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1795_end_0 = const()[name = tensor("op_1795_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1795_end_mask_0 = const()[name = tensor("op_1795_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1795_cast_fp16 = slice_by_index(begin = var_1795_begin_0, end = var_1795_end_0, end_mask = var_1795_end_mask_0, x = var_1738_cast_fp16)[name = tensor("op_1795_cast_fp16")]; tensor var_1796_begin_0 = const()[name = tensor("op_1796_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1796_end_0 = const()[name = tensor("op_1796_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1796_end_mask_0 = const()[name = tensor("op_1796_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1796_cast_fp16 = slice_by_index(begin = var_1796_begin_0, end = var_1796_end_0, end_mask = var_1796_end_mask_0, x = var_1738_cast_fp16)[name = tensor("op_1796_cast_fp16")]; tensor var_1797_begin_0 = const()[name = tensor("op_1797_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1797_end_0 = const()[name = tensor("op_1797_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1797_end_mask_0 = const()[name = tensor("op_1797_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1797_cast_fp16 = slice_by_index(begin = var_1797_begin_0, end = var_1797_end_0, end_mask = var_1797_end_mask_0, x = var_1738_cast_fp16)[name = tensor("op_1797_cast_fp16")]; tensor var_1798_begin_0 = const()[name = tensor("op_1798_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1798_end_0 = const()[name = tensor("op_1798_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1798_end_mask_0 = const()[name = tensor("op_1798_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1798_cast_fp16 = slice_by_index(begin = var_1798_begin_0, end = var_1798_end_0, end_mask = var_1798_end_mask_0, x = var_1738_cast_fp16)[name = tensor("op_1798_cast_fp16")]; tensor var_1799_begin_0 = const()[name = tensor("op_1799_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1799_end_0 = const()[name = tensor("op_1799_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1799_end_mask_0 = const()[name = tensor("op_1799_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1799_cast_fp16 = slice_by_index(begin = var_1799_begin_0, end = var_1799_end_0, end_mask = var_1799_end_mask_0, x = var_1738_cast_fp16)[name = tensor("op_1799_cast_fp16")]; tensor var_1800_begin_0 = const()[name = tensor("op_1800_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1800_end_0 = const()[name = tensor("op_1800_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1800_end_mask_0 = const()[name = tensor("op_1800_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1800_cast_fp16 = slice_by_index(begin = var_1800_begin_0, end = var_1800_end_0, end_mask = var_1800_end_mask_0, x = var_1738_cast_fp16)[name = tensor("op_1800_cast_fp16")]; tensor var_1801_begin_0 = const()[name = tensor("op_1801_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1801_end_0 = const()[name = tensor("op_1801_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_1801_end_mask_0 = const()[name = tensor("op_1801_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1801_cast_fp16 = slice_by_index(begin = var_1801_begin_0, end = var_1801_end_0, end_mask = var_1801_end_mask_0, x = var_1742_cast_fp16)[name = tensor("op_1801_cast_fp16")]; tensor var_1802_begin_0 = const()[name = tensor("op_1802_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1802_end_0 = const()[name = tensor("op_1802_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_1802_end_mask_0 = const()[name = tensor("op_1802_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1802_cast_fp16 = slice_by_index(begin = var_1802_begin_0, end = var_1802_end_0, end_mask = var_1802_end_mask_0, x = var_1742_cast_fp16)[name = tensor("op_1802_cast_fp16")]; tensor var_1803_begin_0 = const()[name = tensor("op_1803_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1803_end_0 = const()[name = tensor("op_1803_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_1803_end_mask_0 = const()[name = tensor("op_1803_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1803_cast_fp16 = slice_by_index(begin = var_1803_begin_0, end = var_1803_end_0, end_mask = var_1803_end_mask_0, x = var_1742_cast_fp16)[name = tensor("op_1803_cast_fp16")]; tensor var_1804_begin_0 = const()[name = tensor("op_1804_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_1804_end_0 = const()[name = tensor("op_1804_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_1804_end_mask_0 = const()[name = tensor("op_1804_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1804_cast_fp16 = slice_by_index(begin = var_1804_begin_0, end = var_1804_end_0, end_mask = var_1804_end_mask_0, x = var_1742_cast_fp16)[name = tensor("op_1804_cast_fp16")]; tensor var_1805_begin_0 = const()[name = tensor("op_1805_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_1805_end_0 = const()[name = tensor("op_1805_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_1805_end_mask_0 = const()[name = tensor("op_1805_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1805_cast_fp16 = slice_by_index(begin = var_1805_begin_0, end = var_1805_end_0, end_mask = var_1805_end_mask_0, x = var_1742_cast_fp16)[name = tensor("op_1805_cast_fp16")]; tensor var_1806_begin_0 = const()[name = tensor("op_1806_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_1806_end_0 = const()[name = tensor("op_1806_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_1806_end_mask_0 = const()[name = tensor("op_1806_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1806_cast_fp16 = slice_by_index(begin = var_1806_begin_0, end = var_1806_end_0, end_mask = var_1806_end_mask_0, x = var_1742_cast_fp16)[name = tensor("op_1806_cast_fp16")]; tensor var_1807_begin_0 = const()[name = tensor("op_1807_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_1807_end_0 = const()[name = tensor("op_1807_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_1807_end_mask_0 = const()[name = tensor("op_1807_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1807_cast_fp16 = slice_by_index(begin = var_1807_begin_0, end = var_1807_end_0, end_mask = var_1807_end_mask_0, x = var_1742_cast_fp16)[name = tensor("op_1807_cast_fp16")]; tensor var_1808_begin_0 = const()[name = tensor("op_1808_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_1808_end_0 = const()[name = tensor("op_1808_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1808_end_mask_0 = const()[name = tensor("op_1808_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1808_cast_fp16 = slice_by_index(begin = var_1808_begin_0, end = var_1808_end_0, end_mask = var_1808_end_mask_0, x = var_1742_cast_fp16)[name = tensor("op_1808_cast_fp16")]; tensor k_11_perm_0 = const()[name = tensor("k_11_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_1813_begin_0 = const()[name = tensor("op_1813_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1813_end_0 = const()[name = tensor("op_1813_end_0"), val = tensor([2, 4096, 1, 40])]; tensor var_1813_end_mask_0 = const()[name = tensor("op_1813_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_29 = transpose(perm = k_11_perm_0, x = k_9_cast_fp16)[name = tensor("transpose_29")]; tensor var_1813_cast_fp16 = slice_by_index(begin = var_1813_begin_0, end = var_1813_end_0, end_mask = var_1813_end_mask_0, x = transpose_29)[name = tensor("op_1813_cast_fp16")]; tensor var_1817_begin_0 = const()[name = tensor("op_1817_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_1817_end_0 = const()[name = tensor("op_1817_end_0"), val = tensor([2, 4096, 1, 80])]; tensor var_1817_end_mask_0 = const()[name = tensor("op_1817_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1817_cast_fp16 = slice_by_index(begin = var_1817_begin_0, end = var_1817_end_0, end_mask = var_1817_end_mask_0, x = transpose_29)[name = tensor("op_1817_cast_fp16")]; tensor var_1821_begin_0 = const()[name = tensor("op_1821_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_1821_end_0 = const()[name = tensor("op_1821_end_0"), val = tensor([2, 4096, 1, 120])]; tensor var_1821_end_mask_0 = const()[name = tensor("op_1821_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1821_cast_fp16 = slice_by_index(begin = var_1821_begin_0, end = var_1821_end_0, end_mask = var_1821_end_mask_0, x = transpose_29)[name = tensor("op_1821_cast_fp16")]; tensor var_1825_begin_0 = const()[name = tensor("op_1825_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_1825_end_0 = const()[name = tensor("op_1825_end_0"), val = tensor([2, 4096, 1, 160])]; tensor var_1825_end_mask_0 = const()[name = tensor("op_1825_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1825_cast_fp16 = slice_by_index(begin = var_1825_begin_0, end = var_1825_end_0, end_mask = var_1825_end_mask_0, x = transpose_29)[name = tensor("op_1825_cast_fp16")]; tensor var_1829_begin_0 = const()[name = tensor("op_1829_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_1829_end_0 = const()[name = tensor("op_1829_end_0"), val = tensor([2, 4096, 1, 200])]; tensor var_1829_end_mask_0 = const()[name = tensor("op_1829_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1829_cast_fp16 = slice_by_index(begin = var_1829_begin_0, end = var_1829_end_0, end_mask = var_1829_end_mask_0, x = transpose_29)[name = tensor("op_1829_cast_fp16")]; tensor var_1833_begin_0 = const()[name = tensor("op_1833_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_1833_end_0 = const()[name = tensor("op_1833_end_0"), val = tensor([2, 4096, 1, 240])]; tensor var_1833_end_mask_0 = const()[name = tensor("op_1833_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1833_cast_fp16 = slice_by_index(begin = var_1833_begin_0, end = var_1833_end_0, end_mask = var_1833_end_mask_0, x = transpose_29)[name = tensor("op_1833_cast_fp16")]; tensor var_1837_begin_0 = const()[name = tensor("op_1837_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_1837_end_0 = const()[name = tensor("op_1837_end_0"), val = tensor([2, 4096, 1, 280])]; tensor var_1837_end_mask_0 = const()[name = tensor("op_1837_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1837_cast_fp16 = slice_by_index(begin = var_1837_begin_0, end = var_1837_end_0, end_mask = var_1837_end_mask_0, x = transpose_29)[name = tensor("op_1837_cast_fp16")]; tensor var_1841_begin_0 = const()[name = tensor("op_1841_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_1841_end_0 = const()[name = tensor("op_1841_end_0"), val = tensor([2, 4096, 1, 320])]; tensor var_1841_end_mask_0 = const()[name = tensor("op_1841_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1841_cast_fp16 = slice_by_index(begin = var_1841_begin_0, end = var_1841_end_0, end_mask = var_1841_end_mask_0, x = transpose_29)[name = tensor("op_1841_cast_fp16")]; tensor var_1843_begin_0 = const()[name = tensor("op_1843_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1843_end_0 = const()[name = tensor("op_1843_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_1843_end_mask_0 = const()[name = tensor("op_1843_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1843_cast_fp16 = slice_by_index(begin = var_1843_begin_0, end = var_1843_end_0, end_mask = var_1843_end_mask_0, x = v_5_cast_fp16)[name = tensor("op_1843_cast_fp16")]; tensor var_1847_begin_0 = const()[name = tensor("op_1847_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_1847_end_0 = const()[name = tensor("op_1847_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_1847_end_mask_0 = const()[name = tensor("op_1847_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1847_cast_fp16 = slice_by_index(begin = var_1847_begin_0, end = var_1847_end_0, end_mask = var_1847_end_mask_0, x = v_5_cast_fp16)[name = tensor("op_1847_cast_fp16")]; tensor var_1851_begin_0 = const()[name = tensor("op_1851_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_1851_end_0 = const()[name = tensor("op_1851_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_1851_end_mask_0 = const()[name = tensor("op_1851_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1851_cast_fp16 = slice_by_index(begin = var_1851_begin_0, end = var_1851_end_0, end_mask = var_1851_end_mask_0, x = v_5_cast_fp16)[name = tensor("op_1851_cast_fp16")]; tensor var_1855_begin_0 = const()[name = tensor("op_1855_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_1855_end_0 = const()[name = tensor("op_1855_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_1855_end_mask_0 = const()[name = tensor("op_1855_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1855_cast_fp16 = slice_by_index(begin = var_1855_begin_0, end = var_1855_end_0, end_mask = var_1855_end_mask_0, x = v_5_cast_fp16)[name = tensor("op_1855_cast_fp16")]; tensor var_1859_begin_0 = const()[name = tensor("op_1859_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_1859_end_0 = const()[name = tensor("op_1859_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_1859_end_mask_0 = const()[name = tensor("op_1859_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1859_cast_fp16 = slice_by_index(begin = var_1859_begin_0, end = var_1859_end_0, end_mask = var_1859_end_mask_0, x = v_5_cast_fp16)[name = tensor("op_1859_cast_fp16")]; tensor var_1863_begin_0 = const()[name = tensor("op_1863_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_1863_end_0 = const()[name = tensor("op_1863_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_1863_end_mask_0 = const()[name = tensor("op_1863_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1863_cast_fp16 = slice_by_index(begin = var_1863_begin_0, end = var_1863_end_0, end_mask = var_1863_end_mask_0, x = v_5_cast_fp16)[name = tensor("op_1863_cast_fp16")]; tensor var_1867_begin_0 = const()[name = tensor("op_1867_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_1867_end_0 = const()[name = tensor("op_1867_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_1867_end_mask_0 = const()[name = tensor("op_1867_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1867_cast_fp16 = slice_by_index(begin = var_1867_begin_0, end = var_1867_end_0, end_mask = var_1867_end_mask_0, x = v_5_cast_fp16)[name = tensor("op_1867_cast_fp16")]; tensor var_1871_begin_0 = const()[name = tensor("op_1871_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_1871_end_0 = const()[name = tensor("op_1871_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_1871_end_mask_0 = const()[name = tensor("op_1871_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1871_cast_fp16 = slice_by_index(begin = var_1871_begin_0, end = var_1871_end_0, end_mask = var_1871_end_mask_0, x = v_5_cast_fp16)[name = tensor("op_1871_cast_fp16")]; tensor var_1875_equation_0 = const()[name = tensor("op_1875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1875_cast_fp16 = einsum(equation = var_1875_equation_0, values = (var_1813_cast_fp16, var_1745_cast_fp16))[name = tensor("op_1875_cast_fp16")]; tensor var_1876_to_fp16 = const()[name = tensor("op_1876_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_257_cast_fp16 = mul(x = var_1875_cast_fp16, y = var_1876_to_fp16)[name = tensor("aw_chunk_257_cast_fp16")]; tensor var_1879_equation_0 = const()[name = tensor("op_1879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1879_cast_fp16 = einsum(equation = var_1879_equation_0, values = (var_1813_cast_fp16, var_1746_cast_fp16))[name = tensor("op_1879_cast_fp16")]; tensor var_1880_to_fp16 = const()[name = tensor("op_1880_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_259_cast_fp16 = mul(x = var_1879_cast_fp16, y = var_1880_to_fp16)[name = tensor("aw_chunk_259_cast_fp16")]; tensor var_1883_equation_0 = const()[name = tensor("op_1883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1883_cast_fp16 = einsum(equation = var_1883_equation_0, values = (var_1813_cast_fp16, var_1747_cast_fp16))[name = tensor("op_1883_cast_fp16")]; tensor var_1884_to_fp16 = const()[name = tensor("op_1884_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_261_cast_fp16 = mul(x = var_1883_cast_fp16, y = var_1884_to_fp16)[name = tensor("aw_chunk_261_cast_fp16")]; tensor var_1887_equation_0 = const()[name = tensor("op_1887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1887_cast_fp16 = einsum(equation = var_1887_equation_0, values = (var_1813_cast_fp16, var_1748_cast_fp16))[name = tensor("op_1887_cast_fp16")]; tensor var_1888_to_fp16 = const()[name = tensor("op_1888_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_263_cast_fp16 = mul(x = var_1887_cast_fp16, y = var_1888_to_fp16)[name = tensor("aw_chunk_263_cast_fp16")]; tensor var_1891_equation_0 = const()[name = tensor("op_1891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1891_cast_fp16 = einsum(equation = var_1891_equation_0, values = (var_1813_cast_fp16, var_1749_cast_fp16))[name = tensor("op_1891_cast_fp16")]; tensor var_1892_to_fp16 = const()[name = tensor("op_1892_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_265_cast_fp16 = mul(x = var_1891_cast_fp16, y = var_1892_to_fp16)[name = tensor("aw_chunk_265_cast_fp16")]; tensor var_1895_equation_0 = const()[name = tensor("op_1895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1895_cast_fp16 = einsum(equation = var_1895_equation_0, values = (var_1813_cast_fp16, var_1750_cast_fp16))[name = tensor("op_1895_cast_fp16")]; tensor var_1896_to_fp16 = const()[name = tensor("op_1896_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_267_cast_fp16 = mul(x = var_1895_cast_fp16, y = var_1896_to_fp16)[name = tensor("aw_chunk_267_cast_fp16")]; tensor var_1899_equation_0 = const()[name = tensor("op_1899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1899_cast_fp16 = einsum(equation = var_1899_equation_0, values = (var_1813_cast_fp16, var_1751_cast_fp16))[name = tensor("op_1899_cast_fp16")]; tensor var_1900_to_fp16 = const()[name = tensor("op_1900_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_269_cast_fp16 = mul(x = var_1899_cast_fp16, y = var_1900_to_fp16)[name = tensor("aw_chunk_269_cast_fp16")]; tensor var_1903_equation_0 = const()[name = tensor("op_1903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1903_cast_fp16 = einsum(equation = var_1903_equation_0, values = (var_1813_cast_fp16, var_1752_cast_fp16))[name = tensor("op_1903_cast_fp16")]; tensor var_1904_to_fp16 = const()[name = tensor("op_1904_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_271_cast_fp16 = mul(x = var_1903_cast_fp16, y = var_1904_to_fp16)[name = tensor("aw_chunk_271_cast_fp16")]; tensor var_1907_equation_0 = const()[name = tensor("op_1907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1907_cast_fp16 = einsum(equation = var_1907_equation_0, values = (var_1817_cast_fp16, var_1753_cast_fp16))[name = tensor("op_1907_cast_fp16")]; tensor var_1908_to_fp16 = const()[name = tensor("op_1908_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_273_cast_fp16 = mul(x = var_1907_cast_fp16, y = var_1908_to_fp16)[name = tensor("aw_chunk_273_cast_fp16")]; tensor var_1911_equation_0 = const()[name = tensor("op_1911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1911_cast_fp16 = einsum(equation = var_1911_equation_0, values = (var_1817_cast_fp16, var_1754_cast_fp16))[name = tensor("op_1911_cast_fp16")]; tensor var_1912_to_fp16 = const()[name = tensor("op_1912_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_275_cast_fp16 = mul(x = var_1911_cast_fp16, y = var_1912_to_fp16)[name = tensor("aw_chunk_275_cast_fp16")]; tensor var_1915_equation_0 = const()[name = tensor("op_1915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1915_cast_fp16 = einsum(equation = var_1915_equation_0, values = (var_1817_cast_fp16, var_1755_cast_fp16))[name = tensor("op_1915_cast_fp16")]; tensor var_1916_to_fp16 = const()[name = tensor("op_1916_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_277_cast_fp16 = mul(x = var_1915_cast_fp16, y = var_1916_to_fp16)[name = tensor("aw_chunk_277_cast_fp16")]; tensor var_1919_equation_0 = const()[name = tensor("op_1919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1919_cast_fp16 = einsum(equation = var_1919_equation_0, values = (var_1817_cast_fp16, var_1756_cast_fp16))[name = tensor("op_1919_cast_fp16")]; tensor var_1920_to_fp16 = const()[name = tensor("op_1920_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_279_cast_fp16 = mul(x = var_1919_cast_fp16, y = var_1920_to_fp16)[name = tensor("aw_chunk_279_cast_fp16")]; tensor var_1923_equation_0 = const()[name = tensor("op_1923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1923_cast_fp16 = einsum(equation = var_1923_equation_0, values = (var_1817_cast_fp16, var_1757_cast_fp16))[name = tensor("op_1923_cast_fp16")]; tensor var_1924_to_fp16 = const()[name = tensor("op_1924_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_281_cast_fp16 = mul(x = var_1923_cast_fp16, y = var_1924_to_fp16)[name = tensor("aw_chunk_281_cast_fp16")]; tensor var_1927_equation_0 = const()[name = tensor("op_1927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1927_cast_fp16 = einsum(equation = var_1927_equation_0, values = (var_1817_cast_fp16, var_1758_cast_fp16))[name = tensor("op_1927_cast_fp16")]; tensor var_1928_to_fp16 = const()[name = tensor("op_1928_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_283_cast_fp16 = mul(x = var_1927_cast_fp16, y = var_1928_to_fp16)[name = tensor("aw_chunk_283_cast_fp16")]; tensor var_1931_equation_0 = const()[name = tensor("op_1931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1931_cast_fp16 = einsum(equation = var_1931_equation_0, values = (var_1817_cast_fp16, var_1759_cast_fp16))[name = tensor("op_1931_cast_fp16")]; tensor var_1932_to_fp16 = const()[name = tensor("op_1932_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_285_cast_fp16 = mul(x = var_1931_cast_fp16, y = var_1932_to_fp16)[name = tensor("aw_chunk_285_cast_fp16")]; tensor var_1935_equation_0 = const()[name = tensor("op_1935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1935_cast_fp16 = einsum(equation = var_1935_equation_0, values = (var_1817_cast_fp16, var_1760_cast_fp16))[name = tensor("op_1935_cast_fp16")]; tensor var_1936_to_fp16 = const()[name = tensor("op_1936_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_287_cast_fp16 = mul(x = var_1935_cast_fp16, y = var_1936_to_fp16)[name = tensor("aw_chunk_287_cast_fp16")]; tensor var_1939_equation_0 = const()[name = tensor("op_1939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1939_cast_fp16 = einsum(equation = var_1939_equation_0, values = (var_1821_cast_fp16, var_1761_cast_fp16))[name = tensor("op_1939_cast_fp16")]; tensor var_1940_to_fp16 = const()[name = tensor("op_1940_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_289_cast_fp16 = mul(x = var_1939_cast_fp16, y = var_1940_to_fp16)[name = tensor("aw_chunk_289_cast_fp16")]; tensor var_1943_equation_0 = const()[name = tensor("op_1943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1943_cast_fp16 = einsum(equation = var_1943_equation_0, values = (var_1821_cast_fp16, var_1762_cast_fp16))[name = tensor("op_1943_cast_fp16")]; tensor var_1944_to_fp16 = const()[name = tensor("op_1944_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_291_cast_fp16 = mul(x = var_1943_cast_fp16, y = var_1944_to_fp16)[name = tensor("aw_chunk_291_cast_fp16")]; tensor var_1947_equation_0 = const()[name = tensor("op_1947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1947_cast_fp16 = einsum(equation = var_1947_equation_0, values = (var_1821_cast_fp16, var_1763_cast_fp16))[name = tensor("op_1947_cast_fp16")]; tensor var_1948_to_fp16 = const()[name = tensor("op_1948_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_293_cast_fp16 = mul(x = var_1947_cast_fp16, y = var_1948_to_fp16)[name = tensor("aw_chunk_293_cast_fp16")]; tensor var_1951_equation_0 = const()[name = tensor("op_1951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1951_cast_fp16 = einsum(equation = var_1951_equation_0, values = (var_1821_cast_fp16, var_1764_cast_fp16))[name = tensor("op_1951_cast_fp16")]; tensor var_1952_to_fp16 = const()[name = tensor("op_1952_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_295_cast_fp16 = mul(x = var_1951_cast_fp16, y = var_1952_to_fp16)[name = tensor("aw_chunk_295_cast_fp16")]; tensor var_1955_equation_0 = const()[name = tensor("op_1955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1955_cast_fp16 = einsum(equation = var_1955_equation_0, values = (var_1821_cast_fp16, var_1765_cast_fp16))[name = tensor("op_1955_cast_fp16")]; tensor var_1956_to_fp16 = const()[name = tensor("op_1956_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_297_cast_fp16 = mul(x = var_1955_cast_fp16, y = var_1956_to_fp16)[name = tensor("aw_chunk_297_cast_fp16")]; tensor var_1959_equation_0 = const()[name = tensor("op_1959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1959_cast_fp16 = einsum(equation = var_1959_equation_0, values = (var_1821_cast_fp16, var_1766_cast_fp16))[name = tensor("op_1959_cast_fp16")]; tensor var_1960_to_fp16 = const()[name = tensor("op_1960_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_299_cast_fp16 = mul(x = var_1959_cast_fp16, y = var_1960_to_fp16)[name = tensor("aw_chunk_299_cast_fp16")]; tensor var_1963_equation_0 = const()[name = tensor("op_1963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1963_cast_fp16 = einsum(equation = var_1963_equation_0, values = (var_1821_cast_fp16, var_1767_cast_fp16))[name = tensor("op_1963_cast_fp16")]; tensor var_1964_to_fp16 = const()[name = tensor("op_1964_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_301_cast_fp16 = mul(x = var_1963_cast_fp16, y = var_1964_to_fp16)[name = tensor("aw_chunk_301_cast_fp16")]; tensor var_1967_equation_0 = const()[name = tensor("op_1967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1967_cast_fp16 = einsum(equation = var_1967_equation_0, values = (var_1821_cast_fp16, var_1768_cast_fp16))[name = tensor("op_1967_cast_fp16")]; tensor var_1968_to_fp16 = const()[name = tensor("op_1968_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_303_cast_fp16 = mul(x = var_1967_cast_fp16, y = var_1968_to_fp16)[name = tensor("aw_chunk_303_cast_fp16")]; tensor var_1971_equation_0 = const()[name = tensor("op_1971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1971_cast_fp16 = einsum(equation = var_1971_equation_0, values = (var_1825_cast_fp16, var_1769_cast_fp16))[name = tensor("op_1971_cast_fp16")]; tensor var_1972_to_fp16 = const()[name = tensor("op_1972_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_305_cast_fp16 = mul(x = var_1971_cast_fp16, y = var_1972_to_fp16)[name = tensor("aw_chunk_305_cast_fp16")]; tensor var_1975_equation_0 = const()[name = tensor("op_1975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1975_cast_fp16 = einsum(equation = var_1975_equation_0, values = (var_1825_cast_fp16, var_1770_cast_fp16))[name = tensor("op_1975_cast_fp16")]; tensor var_1976_to_fp16 = const()[name = tensor("op_1976_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_307_cast_fp16 = mul(x = var_1975_cast_fp16, y = var_1976_to_fp16)[name = tensor("aw_chunk_307_cast_fp16")]; tensor var_1979_equation_0 = const()[name = tensor("op_1979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1979_cast_fp16 = einsum(equation = var_1979_equation_0, values = (var_1825_cast_fp16, var_1771_cast_fp16))[name = tensor("op_1979_cast_fp16")]; tensor var_1980_to_fp16 = const()[name = tensor("op_1980_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_309_cast_fp16 = mul(x = var_1979_cast_fp16, y = var_1980_to_fp16)[name = tensor("aw_chunk_309_cast_fp16")]; tensor var_1983_equation_0 = const()[name = tensor("op_1983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1983_cast_fp16 = einsum(equation = var_1983_equation_0, values = (var_1825_cast_fp16, var_1772_cast_fp16))[name = tensor("op_1983_cast_fp16")]; tensor var_1984_to_fp16 = const()[name = tensor("op_1984_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_311_cast_fp16 = mul(x = var_1983_cast_fp16, y = var_1984_to_fp16)[name = tensor("aw_chunk_311_cast_fp16")]; tensor var_1987_equation_0 = const()[name = tensor("op_1987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1987_cast_fp16 = einsum(equation = var_1987_equation_0, values = (var_1825_cast_fp16, var_1773_cast_fp16))[name = tensor("op_1987_cast_fp16")]; tensor var_1988_to_fp16 = const()[name = tensor("op_1988_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_313_cast_fp16 = mul(x = var_1987_cast_fp16, y = var_1988_to_fp16)[name = tensor("aw_chunk_313_cast_fp16")]; tensor var_1991_equation_0 = const()[name = tensor("op_1991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1991_cast_fp16 = einsum(equation = var_1991_equation_0, values = (var_1825_cast_fp16, var_1774_cast_fp16))[name = tensor("op_1991_cast_fp16")]; tensor var_1992_to_fp16 = const()[name = tensor("op_1992_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_315_cast_fp16 = mul(x = var_1991_cast_fp16, y = var_1992_to_fp16)[name = tensor("aw_chunk_315_cast_fp16")]; tensor var_1995_equation_0 = const()[name = tensor("op_1995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1995_cast_fp16 = einsum(equation = var_1995_equation_0, values = (var_1825_cast_fp16, var_1775_cast_fp16))[name = tensor("op_1995_cast_fp16")]; tensor var_1996_to_fp16 = const()[name = tensor("op_1996_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_317_cast_fp16 = mul(x = var_1995_cast_fp16, y = var_1996_to_fp16)[name = tensor("aw_chunk_317_cast_fp16")]; tensor var_1999_equation_0 = const()[name = tensor("op_1999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_1999_cast_fp16 = einsum(equation = var_1999_equation_0, values = (var_1825_cast_fp16, var_1776_cast_fp16))[name = tensor("op_1999_cast_fp16")]; tensor var_2000_to_fp16 = const()[name = tensor("op_2000_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_319_cast_fp16 = mul(x = var_1999_cast_fp16, y = var_2000_to_fp16)[name = tensor("aw_chunk_319_cast_fp16")]; tensor var_2003_equation_0 = const()[name = tensor("op_2003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2003_cast_fp16 = einsum(equation = var_2003_equation_0, values = (var_1829_cast_fp16, var_1777_cast_fp16))[name = tensor("op_2003_cast_fp16")]; tensor var_2004_to_fp16 = const()[name = tensor("op_2004_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_321_cast_fp16 = mul(x = var_2003_cast_fp16, y = var_2004_to_fp16)[name = tensor("aw_chunk_321_cast_fp16")]; tensor var_2007_equation_0 = const()[name = tensor("op_2007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2007_cast_fp16 = einsum(equation = var_2007_equation_0, values = (var_1829_cast_fp16, var_1778_cast_fp16))[name = tensor("op_2007_cast_fp16")]; tensor var_2008_to_fp16 = const()[name = tensor("op_2008_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_323_cast_fp16 = mul(x = var_2007_cast_fp16, y = var_2008_to_fp16)[name = tensor("aw_chunk_323_cast_fp16")]; tensor var_2011_equation_0 = const()[name = tensor("op_2011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2011_cast_fp16 = einsum(equation = var_2011_equation_0, values = (var_1829_cast_fp16, var_1779_cast_fp16))[name = tensor("op_2011_cast_fp16")]; tensor var_2012_to_fp16 = const()[name = tensor("op_2012_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_325_cast_fp16 = mul(x = var_2011_cast_fp16, y = var_2012_to_fp16)[name = tensor("aw_chunk_325_cast_fp16")]; tensor var_2015_equation_0 = const()[name = tensor("op_2015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2015_cast_fp16 = einsum(equation = var_2015_equation_0, values = (var_1829_cast_fp16, var_1780_cast_fp16))[name = tensor("op_2015_cast_fp16")]; tensor var_2016_to_fp16 = const()[name = tensor("op_2016_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_327_cast_fp16 = mul(x = var_2015_cast_fp16, y = var_2016_to_fp16)[name = tensor("aw_chunk_327_cast_fp16")]; tensor var_2019_equation_0 = const()[name = tensor("op_2019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2019_cast_fp16 = einsum(equation = var_2019_equation_0, values = (var_1829_cast_fp16, var_1781_cast_fp16))[name = tensor("op_2019_cast_fp16")]; tensor var_2020_to_fp16 = const()[name = tensor("op_2020_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_329_cast_fp16 = mul(x = var_2019_cast_fp16, y = var_2020_to_fp16)[name = tensor("aw_chunk_329_cast_fp16")]; tensor var_2023_equation_0 = const()[name = tensor("op_2023_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2023_cast_fp16 = einsum(equation = var_2023_equation_0, values = (var_1829_cast_fp16, var_1782_cast_fp16))[name = tensor("op_2023_cast_fp16")]; tensor var_2024_to_fp16 = const()[name = tensor("op_2024_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_331_cast_fp16 = mul(x = var_2023_cast_fp16, y = var_2024_to_fp16)[name = tensor("aw_chunk_331_cast_fp16")]; tensor var_2027_equation_0 = const()[name = tensor("op_2027_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2027_cast_fp16 = einsum(equation = var_2027_equation_0, values = (var_1829_cast_fp16, var_1783_cast_fp16))[name = tensor("op_2027_cast_fp16")]; tensor var_2028_to_fp16 = const()[name = tensor("op_2028_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_333_cast_fp16 = mul(x = var_2027_cast_fp16, y = var_2028_to_fp16)[name = tensor("aw_chunk_333_cast_fp16")]; tensor var_2031_equation_0 = const()[name = tensor("op_2031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2031_cast_fp16 = einsum(equation = var_2031_equation_0, values = (var_1829_cast_fp16, var_1784_cast_fp16))[name = tensor("op_2031_cast_fp16")]; tensor var_2032_to_fp16 = const()[name = tensor("op_2032_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_335_cast_fp16 = mul(x = var_2031_cast_fp16, y = var_2032_to_fp16)[name = tensor("aw_chunk_335_cast_fp16")]; tensor var_2035_equation_0 = const()[name = tensor("op_2035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2035_cast_fp16 = einsum(equation = var_2035_equation_0, values = (var_1833_cast_fp16, var_1785_cast_fp16))[name = tensor("op_2035_cast_fp16")]; tensor var_2036_to_fp16 = const()[name = tensor("op_2036_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_337_cast_fp16 = mul(x = var_2035_cast_fp16, y = var_2036_to_fp16)[name = tensor("aw_chunk_337_cast_fp16")]; tensor var_2039_equation_0 = const()[name = tensor("op_2039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2039_cast_fp16 = einsum(equation = var_2039_equation_0, values = (var_1833_cast_fp16, var_1786_cast_fp16))[name = tensor("op_2039_cast_fp16")]; tensor var_2040_to_fp16 = const()[name = tensor("op_2040_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_339_cast_fp16 = mul(x = var_2039_cast_fp16, y = var_2040_to_fp16)[name = tensor("aw_chunk_339_cast_fp16")]; tensor var_2043_equation_0 = const()[name = tensor("op_2043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2043_cast_fp16 = einsum(equation = var_2043_equation_0, values = (var_1833_cast_fp16, var_1787_cast_fp16))[name = tensor("op_2043_cast_fp16")]; tensor var_2044_to_fp16 = const()[name = tensor("op_2044_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_341_cast_fp16 = mul(x = var_2043_cast_fp16, y = var_2044_to_fp16)[name = tensor("aw_chunk_341_cast_fp16")]; tensor var_2047_equation_0 = const()[name = tensor("op_2047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2047_cast_fp16 = einsum(equation = var_2047_equation_0, values = (var_1833_cast_fp16, var_1788_cast_fp16))[name = tensor("op_2047_cast_fp16")]; tensor var_2048_to_fp16 = const()[name = tensor("op_2048_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_343_cast_fp16 = mul(x = var_2047_cast_fp16, y = var_2048_to_fp16)[name = tensor("aw_chunk_343_cast_fp16")]; tensor var_2051_equation_0 = const()[name = tensor("op_2051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2051_cast_fp16 = einsum(equation = var_2051_equation_0, values = (var_1833_cast_fp16, var_1789_cast_fp16))[name = tensor("op_2051_cast_fp16")]; tensor var_2052_to_fp16 = const()[name = tensor("op_2052_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_345_cast_fp16 = mul(x = var_2051_cast_fp16, y = var_2052_to_fp16)[name = tensor("aw_chunk_345_cast_fp16")]; tensor var_2055_equation_0 = const()[name = tensor("op_2055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2055_cast_fp16 = einsum(equation = var_2055_equation_0, values = (var_1833_cast_fp16, var_1790_cast_fp16))[name = tensor("op_2055_cast_fp16")]; tensor var_2056_to_fp16 = const()[name = tensor("op_2056_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_347_cast_fp16 = mul(x = var_2055_cast_fp16, y = var_2056_to_fp16)[name = tensor("aw_chunk_347_cast_fp16")]; tensor var_2059_equation_0 = const()[name = tensor("op_2059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2059_cast_fp16 = einsum(equation = var_2059_equation_0, values = (var_1833_cast_fp16, var_1791_cast_fp16))[name = tensor("op_2059_cast_fp16")]; tensor var_2060_to_fp16 = const()[name = tensor("op_2060_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_349_cast_fp16 = mul(x = var_2059_cast_fp16, y = var_2060_to_fp16)[name = tensor("aw_chunk_349_cast_fp16")]; tensor var_2063_equation_0 = const()[name = tensor("op_2063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2063_cast_fp16 = einsum(equation = var_2063_equation_0, values = (var_1833_cast_fp16, var_1792_cast_fp16))[name = tensor("op_2063_cast_fp16")]; tensor var_2064_to_fp16 = const()[name = tensor("op_2064_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_351_cast_fp16 = mul(x = var_2063_cast_fp16, y = var_2064_to_fp16)[name = tensor("aw_chunk_351_cast_fp16")]; tensor var_2067_equation_0 = const()[name = tensor("op_2067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2067_cast_fp16 = einsum(equation = var_2067_equation_0, values = (var_1837_cast_fp16, var_1793_cast_fp16))[name = tensor("op_2067_cast_fp16")]; tensor var_2068_to_fp16 = const()[name = tensor("op_2068_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_353_cast_fp16 = mul(x = var_2067_cast_fp16, y = var_2068_to_fp16)[name = tensor("aw_chunk_353_cast_fp16")]; tensor var_2071_equation_0 = const()[name = tensor("op_2071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2071_cast_fp16 = einsum(equation = var_2071_equation_0, values = (var_1837_cast_fp16, var_1794_cast_fp16))[name = tensor("op_2071_cast_fp16")]; tensor var_2072_to_fp16 = const()[name = tensor("op_2072_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_355_cast_fp16 = mul(x = var_2071_cast_fp16, y = var_2072_to_fp16)[name = tensor("aw_chunk_355_cast_fp16")]; tensor var_2075_equation_0 = const()[name = tensor("op_2075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2075_cast_fp16 = einsum(equation = var_2075_equation_0, values = (var_1837_cast_fp16, var_1795_cast_fp16))[name = tensor("op_2075_cast_fp16")]; tensor var_2076_to_fp16 = const()[name = tensor("op_2076_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_357_cast_fp16 = mul(x = var_2075_cast_fp16, y = var_2076_to_fp16)[name = tensor("aw_chunk_357_cast_fp16")]; tensor var_2079_equation_0 = const()[name = tensor("op_2079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2079_cast_fp16 = einsum(equation = var_2079_equation_0, values = (var_1837_cast_fp16, var_1796_cast_fp16))[name = tensor("op_2079_cast_fp16")]; tensor var_2080_to_fp16 = const()[name = tensor("op_2080_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_359_cast_fp16 = mul(x = var_2079_cast_fp16, y = var_2080_to_fp16)[name = tensor("aw_chunk_359_cast_fp16")]; tensor var_2083_equation_0 = const()[name = tensor("op_2083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2083_cast_fp16 = einsum(equation = var_2083_equation_0, values = (var_1837_cast_fp16, var_1797_cast_fp16))[name = tensor("op_2083_cast_fp16")]; tensor var_2084_to_fp16 = const()[name = tensor("op_2084_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_361_cast_fp16 = mul(x = var_2083_cast_fp16, y = var_2084_to_fp16)[name = tensor("aw_chunk_361_cast_fp16")]; tensor var_2087_equation_0 = const()[name = tensor("op_2087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2087_cast_fp16 = einsum(equation = var_2087_equation_0, values = (var_1837_cast_fp16, var_1798_cast_fp16))[name = tensor("op_2087_cast_fp16")]; tensor var_2088_to_fp16 = const()[name = tensor("op_2088_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_363_cast_fp16 = mul(x = var_2087_cast_fp16, y = var_2088_to_fp16)[name = tensor("aw_chunk_363_cast_fp16")]; tensor var_2091_equation_0 = const()[name = tensor("op_2091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2091_cast_fp16 = einsum(equation = var_2091_equation_0, values = (var_1837_cast_fp16, var_1799_cast_fp16))[name = tensor("op_2091_cast_fp16")]; tensor var_2092_to_fp16 = const()[name = tensor("op_2092_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_365_cast_fp16 = mul(x = var_2091_cast_fp16, y = var_2092_to_fp16)[name = tensor("aw_chunk_365_cast_fp16")]; tensor var_2095_equation_0 = const()[name = tensor("op_2095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2095_cast_fp16 = einsum(equation = var_2095_equation_0, values = (var_1837_cast_fp16, var_1800_cast_fp16))[name = tensor("op_2095_cast_fp16")]; tensor var_2096_to_fp16 = const()[name = tensor("op_2096_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_367_cast_fp16 = mul(x = var_2095_cast_fp16, y = var_2096_to_fp16)[name = tensor("aw_chunk_367_cast_fp16")]; tensor var_2099_equation_0 = const()[name = tensor("op_2099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2099_cast_fp16 = einsum(equation = var_2099_equation_0, values = (var_1841_cast_fp16, var_1801_cast_fp16))[name = tensor("op_2099_cast_fp16")]; tensor var_2100_to_fp16 = const()[name = tensor("op_2100_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_369_cast_fp16 = mul(x = var_2099_cast_fp16, y = var_2100_to_fp16)[name = tensor("aw_chunk_369_cast_fp16")]; tensor var_2103_equation_0 = const()[name = tensor("op_2103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2103_cast_fp16 = einsum(equation = var_2103_equation_0, values = (var_1841_cast_fp16, var_1802_cast_fp16))[name = tensor("op_2103_cast_fp16")]; tensor var_2104_to_fp16 = const()[name = tensor("op_2104_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_371_cast_fp16 = mul(x = var_2103_cast_fp16, y = var_2104_to_fp16)[name = tensor("aw_chunk_371_cast_fp16")]; tensor var_2107_equation_0 = const()[name = tensor("op_2107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2107_cast_fp16 = einsum(equation = var_2107_equation_0, values = (var_1841_cast_fp16, var_1803_cast_fp16))[name = tensor("op_2107_cast_fp16")]; tensor var_2108_to_fp16 = const()[name = tensor("op_2108_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_373_cast_fp16 = mul(x = var_2107_cast_fp16, y = var_2108_to_fp16)[name = tensor("aw_chunk_373_cast_fp16")]; tensor var_2111_equation_0 = const()[name = tensor("op_2111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2111_cast_fp16 = einsum(equation = var_2111_equation_0, values = (var_1841_cast_fp16, var_1804_cast_fp16))[name = tensor("op_2111_cast_fp16")]; tensor var_2112_to_fp16 = const()[name = tensor("op_2112_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_375_cast_fp16 = mul(x = var_2111_cast_fp16, y = var_2112_to_fp16)[name = tensor("aw_chunk_375_cast_fp16")]; tensor var_2115_equation_0 = const()[name = tensor("op_2115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2115_cast_fp16 = einsum(equation = var_2115_equation_0, values = (var_1841_cast_fp16, var_1805_cast_fp16))[name = tensor("op_2115_cast_fp16")]; tensor var_2116_to_fp16 = const()[name = tensor("op_2116_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_377_cast_fp16 = mul(x = var_2115_cast_fp16, y = var_2116_to_fp16)[name = tensor("aw_chunk_377_cast_fp16")]; tensor var_2119_equation_0 = const()[name = tensor("op_2119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2119_cast_fp16 = einsum(equation = var_2119_equation_0, values = (var_1841_cast_fp16, var_1806_cast_fp16))[name = tensor("op_2119_cast_fp16")]; tensor var_2120_to_fp16 = const()[name = tensor("op_2120_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_379_cast_fp16 = mul(x = var_2119_cast_fp16, y = var_2120_to_fp16)[name = tensor("aw_chunk_379_cast_fp16")]; tensor var_2123_equation_0 = const()[name = tensor("op_2123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2123_cast_fp16 = einsum(equation = var_2123_equation_0, values = (var_1841_cast_fp16, var_1807_cast_fp16))[name = tensor("op_2123_cast_fp16")]; tensor var_2124_to_fp16 = const()[name = tensor("op_2124_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_381_cast_fp16 = mul(x = var_2123_cast_fp16, y = var_2124_to_fp16)[name = tensor("aw_chunk_381_cast_fp16")]; tensor var_2127_equation_0 = const()[name = tensor("op_2127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2127_cast_fp16 = einsum(equation = var_2127_equation_0, values = (var_1841_cast_fp16, var_1808_cast_fp16))[name = tensor("op_2127_cast_fp16")]; tensor var_2128_to_fp16 = const()[name = tensor("op_2128_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_383_cast_fp16 = mul(x = var_2127_cast_fp16, y = var_2128_to_fp16)[name = tensor("aw_chunk_383_cast_fp16")]; tensor var_2130_cast_fp16 = softmax(axis = var_123, x = aw_chunk_257_cast_fp16)[name = tensor("op_2130_cast_fp16")]; tensor var_2131_cast_fp16 = softmax(axis = var_123, x = aw_chunk_259_cast_fp16)[name = tensor("op_2131_cast_fp16")]; tensor var_2132_cast_fp16 = softmax(axis = var_123, x = aw_chunk_261_cast_fp16)[name = tensor("op_2132_cast_fp16")]; tensor var_2133_cast_fp16 = softmax(axis = var_123, x = aw_chunk_263_cast_fp16)[name = tensor("op_2133_cast_fp16")]; tensor var_2134_cast_fp16 = softmax(axis = var_123, x = aw_chunk_265_cast_fp16)[name = tensor("op_2134_cast_fp16")]; tensor var_2135_cast_fp16 = softmax(axis = var_123, x = aw_chunk_267_cast_fp16)[name = tensor("op_2135_cast_fp16")]; tensor var_2136_cast_fp16 = softmax(axis = var_123, x = aw_chunk_269_cast_fp16)[name = tensor("op_2136_cast_fp16")]; tensor var_2137_cast_fp16 = softmax(axis = var_123, x = aw_chunk_271_cast_fp16)[name = tensor("op_2137_cast_fp16")]; tensor var_2138_cast_fp16 = softmax(axis = var_123, x = aw_chunk_273_cast_fp16)[name = tensor("op_2138_cast_fp16")]; tensor var_2139_cast_fp16 = softmax(axis = var_123, x = aw_chunk_275_cast_fp16)[name = tensor("op_2139_cast_fp16")]; tensor var_2140_cast_fp16 = softmax(axis = var_123, x = aw_chunk_277_cast_fp16)[name = tensor("op_2140_cast_fp16")]; tensor var_2141_cast_fp16 = softmax(axis = var_123, x = aw_chunk_279_cast_fp16)[name = tensor("op_2141_cast_fp16")]; tensor var_2142_cast_fp16 = softmax(axis = var_123, x = aw_chunk_281_cast_fp16)[name = tensor("op_2142_cast_fp16")]; tensor var_2143_cast_fp16 = softmax(axis = var_123, x = aw_chunk_283_cast_fp16)[name = tensor("op_2143_cast_fp16")]; tensor var_2144_cast_fp16 = softmax(axis = var_123, x = aw_chunk_285_cast_fp16)[name = tensor("op_2144_cast_fp16")]; tensor var_2145_cast_fp16 = softmax(axis = var_123, x = aw_chunk_287_cast_fp16)[name = tensor("op_2145_cast_fp16")]; tensor var_2146_cast_fp16 = softmax(axis = var_123, x = aw_chunk_289_cast_fp16)[name = tensor("op_2146_cast_fp16")]; tensor var_2147_cast_fp16 = softmax(axis = var_123, x = aw_chunk_291_cast_fp16)[name = tensor("op_2147_cast_fp16")]; tensor var_2148_cast_fp16 = softmax(axis = var_123, x = aw_chunk_293_cast_fp16)[name = tensor("op_2148_cast_fp16")]; tensor var_2149_cast_fp16 = softmax(axis = var_123, x = aw_chunk_295_cast_fp16)[name = tensor("op_2149_cast_fp16")]; tensor var_2150_cast_fp16 = softmax(axis = var_123, x = aw_chunk_297_cast_fp16)[name = tensor("op_2150_cast_fp16")]; tensor var_2151_cast_fp16 = softmax(axis = var_123, x = aw_chunk_299_cast_fp16)[name = tensor("op_2151_cast_fp16")]; tensor var_2152_cast_fp16 = softmax(axis = var_123, x = aw_chunk_301_cast_fp16)[name = tensor("op_2152_cast_fp16")]; tensor var_2153_cast_fp16 = softmax(axis = var_123, x = aw_chunk_303_cast_fp16)[name = tensor("op_2153_cast_fp16")]; tensor var_2154_cast_fp16 = softmax(axis = var_123, x = aw_chunk_305_cast_fp16)[name = tensor("op_2154_cast_fp16")]; tensor var_2155_cast_fp16 = softmax(axis = var_123, x = aw_chunk_307_cast_fp16)[name = tensor("op_2155_cast_fp16")]; tensor var_2156_cast_fp16 = softmax(axis = var_123, x = aw_chunk_309_cast_fp16)[name = tensor("op_2156_cast_fp16")]; tensor var_2157_cast_fp16 = softmax(axis = var_123, x = aw_chunk_311_cast_fp16)[name = tensor("op_2157_cast_fp16")]; tensor var_2158_cast_fp16 = softmax(axis = var_123, x = aw_chunk_313_cast_fp16)[name = tensor("op_2158_cast_fp16")]; tensor var_2159_cast_fp16 = softmax(axis = var_123, x = aw_chunk_315_cast_fp16)[name = tensor("op_2159_cast_fp16")]; tensor var_2160_cast_fp16 = softmax(axis = var_123, x = aw_chunk_317_cast_fp16)[name = tensor("op_2160_cast_fp16")]; tensor var_2161_cast_fp16 = softmax(axis = var_123, x = aw_chunk_319_cast_fp16)[name = tensor("op_2161_cast_fp16")]; tensor var_2162_cast_fp16 = softmax(axis = var_123, x = aw_chunk_321_cast_fp16)[name = tensor("op_2162_cast_fp16")]; tensor var_2163_cast_fp16 = softmax(axis = var_123, x = aw_chunk_323_cast_fp16)[name = tensor("op_2163_cast_fp16")]; tensor var_2164_cast_fp16 = softmax(axis = var_123, x = aw_chunk_325_cast_fp16)[name = tensor("op_2164_cast_fp16")]; tensor var_2165_cast_fp16 = softmax(axis = var_123, x = aw_chunk_327_cast_fp16)[name = tensor("op_2165_cast_fp16")]; tensor var_2166_cast_fp16 = softmax(axis = var_123, x = aw_chunk_329_cast_fp16)[name = tensor("op_2166_cast_fp16")]; tensor var_2167_cast_fp16 = softmax(axis = var_123, x = aw_chunk_331_cast_fp16)[name = tensor("op_2167_cast_fp16")]; tensor var_2168_cast_fp16 = softmax(axis = var_123, x = aw_chunk_333_cast_fp16)[name = tensor("op_2168_cast_fp16")]; tensor var_2169_cast_fp16 = softmax(axis = var_123, x = aw_chunk_335_cast_fp16)[name = tensor("op_2169_cast_fp16")]; tensor var_2170_cast_fp16 = softmax(axis = var_123, x = aw_chunk_337_cast_fp16)[name = tensor("op_2170_cast_fp16")]; tensor var_2171_cast_fp16 = softmax(axis = var_123, x = aw_chunk_339_cast_fp16)[name = tensor("op_2171_cast_fp16")]; tensor var_2172_cast_fp16 = softmax(axis = var_123, x = aw_chunk_341_cast_fp16)[name = tensor("op_2172_cast_fp16")]; tensor var_2173_cast_fp16 = softmax(axis = var_123, x = aw_chunk_343_cast_fp16)[name = tensor("op_2173_cast_fp16")]; tensor var_2174_cast_fp16 = softmax(axis = var_123, x = aw_chunk_345_cast_fp16)[name = tensor("op_2174_cast_fp16")]; tensor var_2175_cast_fp16 = softmax(axis = var_123, x = aw_chunk_347_cast_fp16)[name = tensor("op_2175_cast_fp16")]; tensor var_2176_cast_fp16 = softmax(axis = var_123, x = aw_chunk_349_cast_fp16)[name = tensor("op_2176_cast_fp16")]; tensor var_2177_cast_fp16 = softmax(axis = var_123, x = aw_chunk_351_cast_fp16)[name = tensor("op_2177_cast_fp16")]; tensor var_2178_cast_fp16 = softmax(axis = var_123, x = aw_chunk_353_cast_fp16)[name = tensor("op_2178_cast_fp16")]; tensor var_2179_cast_fp16 = softmax(axis = var_123, x = aw_chunk_355_cast_fp16)[name = tensor("op_2179_cast_fp16")]; tensor var_2180_cast_fp16 = softmax(axis = var_123, x = aw_chunk_357_cast_fp16)[name = tensor("op_2180_cast_fp16")]; tensor var_2181_cast_fp16 = softmax(axis = var_123, x = aw_chunk_359_cast_fp16)[name = tensor("op_2181_cast_fp16")]; tensor var_2182_cast_fp16 = softmax(axis = var_123, x = aw_chunk_361_cast_fp16)[name = tensor("op_2182_cast_fp16")]; tensor var_2183_cast_fp16 = softmax(axis = var_123, x = aw_chunk_363_cast_fp16)[name = tensor("op_2183_cast_fp16")]; tensor var_2184_cast_fp16 = softmax(axis = var_123, x = aw_chunk_365_cast_fp16)[name = tensor("op_2184_cast_fp16")]; tensor var_2185_cast_fp16 = softmax(axis = var_123, x = aw_chunk_367_cast_fp16)[name = tensor("op_2185_cast_fp16")]; tensor var_2186_cast_fp16 = softmax(axis = var_123, x = aw_chunk_369_cast_fp16)[name = tensor("op_2186_cast_fp16")]; tensor var_2187_cast_fp16 = softmax(axis = var_123, x = aw_chunk_371_cast_fp16)[name = tensor("op_2187_cast_fp16")]; tensor var_2188_cast_fp16 = softmax(axis = var_123, x = aw_chunk_373_cast_fp16)[name = tensor("op_2188_cast_fp16")]; tensor var_2189_cast_fp16 = softmax(axis = var_123, x = aw_chunk_375_cast_fp16)[name = tensor("op_2189_cast_fp16")]; tensor var_2190_cast_fp16 = softmax(axis = var_123, x = aw_chunk_377_cast_fp16)[name = tensor("op_2190_cast_fp16")]; tensor var_2191_cast_fp16 = softmax(axis = var_123, x = aw_chunk_379_cast_fp16)[name = tensor("op_2191_cast_fp16")]; tensor var_2192_cast_fp16 = softmax(axis = var_123, x = aw_chunk_381_cast_fp16)[name = tensor("op_2192_cast_fp16")]; tensor var_2193_cast_fp16 = softmax(axis = var_123, x = aw_chunk_383_cast_fp16)[name = tensor("op_2193_cast_fp16")]; tensor var_2195_equation_0 = const()[name = tensor("op_2195_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2195_cast_fp16 = einsum(equation = var_2195_equation_0, values = (var_1843_cast_fp16, var_2130_cast_fp16))[name = tensor("op_2195_cast_fp16")]; tensor var_2197_equation_0 = const()[name = tensor("op_2197_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2197_cast_fp16 = einsum(equation = var_2197_equation_0, values = (var_1843_cast_fp16, var_2131_cast_fp16))[name = tensor("op_2197_cast_fp16")]; tensor var_2199_equation_0 = const()[name = tensor("op_2199_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2199_cast_fp16 = einsum(equation = var_2199_equation_0, values = (var_1843_cast_fp16, var_2132_cast_fp16))[name = tensor("op_2199_cast_fp16")]; tensor var_2201_equation_0 = const()[name = tensor("op_2201_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2201_cast_fp16 = einsum(equation = var_2201_equation_0, values = (var_1843_cast_fp16, var_2133_cast_fp16))[name = tensor("op_2201_cast_fp16")]; tensor var_2203_equation_0 = const()[name = tensor("op_2203_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2203_cast_fp16 = einsum(equation = var_2203_equation_0, values = (var_1843_cast_fp16, var_2134_cast_fp16))[name = tensor("op_2203_cast_fp16")]; tensor var_2205_equation_0 = const()[name = tensor("op_2205_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2205_cast_fp16 = einsum(equation = var_2205_equation_0, values = (var_1843_cast_fp16, var_2135_cast_fp16))[name = tensor("op_2205_cast_fp16")]; tensor var_2207_equation_0 = const()[name = tensor("op_2207_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2207_cast_fp16 = einsum(equation = var_2207_equation_0, values = (var_1843_cast_fp16, var_2136_cast_fp16))[name = tensor("op_2207_cast_fp16")]; tensor var_2209_equation_0 = const()[name = tensor("op_2209_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2209_cast_fp16 = einsum(equation = var_2209_equation_0, values = (var_1843_cast_fp16, var_2137_cast_fp16))[name = tensor("op_2209_cast_fp16")]; tensor var_2211_equation_0 = const()[name = tensor("op_2211_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2211_cast_fp16 = einsum(equation = var_2211_equation_0, values = (var_1847_cast_fp16, var_2138_cast_fp16))[name = tensor("op_2211_cast_fp16")]; tensor var_2213_equation_0 = const()[name = tensor("op_2213_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2213_cast_fp16 = einsum(equation = var_2213_equation_0, values = (var_1847_cast_fp16, var_2139_cast_fp16))[name = tensor("op_2213_cast_fp16")]; tensor var_2215_equation_0 = const()[name = tensor("op_2215_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2215_cast_fp16 = einsum(equation = var_2215_equation_0, values = (var_1847_cast_fp16, var_2140_cast_fp16))[name = tensor("op_2215_cast_fp16")]; tensor var_2217_equation_0 = const()[name = tensor("op_2217_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2217_cast_fp16 = einsum(equation = var_2217_equation_0, values = (var_1847_cast_fp16, var_2141_cast_fp16))[name = tensor("op_2217_cast_fp16")]; tensor var_2219_equation_0 = const()[name = tensor("op_2219_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2219_cast_fp16 = einsum(equation = var_2219_equation_0, values = (var_1847_cast_fp16, var_2142_cast_fp16))[name = tensor("op_2219_cast_fp16")]; tensor var_2221_equation_0 = const()[name = tensor("op_2221_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2221_cast_fp16 = einsum(equation = var_2221_equation_0, values = (var_1847_cast_fp16, var_2143_cast_fp16))[name = tensor("op_2221_cast_fp16")]; tensor var_2223_equation_0 = const()[name = tensor("op_2223_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2223_cast_fp16 = einsum(equation = var_2223_equation_0, values = (var_1847_cast_fp16, var_2144_cast_fp16))[name = tensor("op_2223_cast_fp16")]; tensor var_2225_equation_0 = const()[name = tensor("op_2225_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2225_cast_fp16 = einsum(equation = var_2225_equation_0, values = (var_1847_cast_fp16, var_2145_cast_fp16))[name = tensor("op_2225_cast_fp16")]; tensor var_2227_equation_0 = const()[name = tensor("op_2227_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2227_cast_fp16 = einsum(equation = var_2227_equation_0, values = (var_1851_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2227_cast_fp16")]; tensor var_2229_equation_0 = const()[name = tensor("op_2229_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2229_cast_fp16 = einsum(equation = var_2229_equation_0, values = (var_1851_cast_fp16, var_2147_cast_fp16))[name = tensor("op_2229_cast_fp16")]; tensor var_2231_equation_0 = const()[name = tensor("op_2231_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2231_cast_fp16 = einsum(equation = var_2231_equation_0, values = (var_1851_cast_fp16, var_2148_cast_fp16))[name = tensor("op_2231_cast_fp16")]; tensor var_2233_equation_0 = const()[name = tensor("op_2233_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2233_cast_fp16 = einsum(equation = var_2233_equation_0, values = (var_1851_cast_fp16, var_2149_cast_fp16))[name = tensor("op_2233_cast_fp16")]; tensor var_2235_equation_0 = const()[name = tensor("op_2235_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2235_cast_fp16 = einsum(equation = var_2235_equation_0, values = (var_1851_cast_fp16, var_2150_cast_fp16))[name = tensor("op_2235_cast_fp16")]; tensor var_2237_equation_0 = const()[name = tensor("op_2237_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2237_cast_fp16 = einsum(equation = var_2237_equation_0, values = (var_1851_cast_fp16, var_2151_cast_fp16))[name = tensor("op_2237_cast_fp16")]; tensor var_2239_equation_0 = const()[name = tensor("op_2239_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2239_cast_fp16 = einsum(equation = var_2239_equation_0, values = (var_1851_cast_fp16, var_2152_cast_fp16))[name = tensor("op_2239_cast_fp16")]; tensor var_2241_equation_0 = const()[name = tensor("op_2241_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2241_cast_fp16 = einsum(equation = var_2241_equation_0, values = (var_1851_cast_fp16, var_2153_cast_fp16))[name = tensor("op_2241_cast_fp16")]; tensor var_2243_equation_0 = const()[name = tensor("op_2243_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2243_cast_fp16 = einsum(equation = var_2243_equation_0, values = (var_1855_cast_fp16, var_2154_cast_fp16))[name = tensor("op_2243_cast_fp16")]; tensor var_2245_equation_0 = const()[name = tensor("op_2245_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2245_cast_fp16 = einsum(equation = var_2245_equation_0, values = (var_1855_cast_fp16, var_2155_cast_fp16))[name = tensor("op_2245_cast_fp16")]; tensor var_2247_equation_0 = const()[name = tensor("op_2247_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2247_cast_fp16 = einsum(equation = var_2247_equation_0, values = (var_1855_cast_fp16, var_2156_cast_fp16))[name = tensor("op_2247_cast_fp16")]; tensor var_2249_equation_0 = const()[name = tensor("op_2249_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2249_cast_fp16 = einsum(equation = var_2249_equation_0, values = (var_1855_cast_fp16, var_2157_cast_fp16))[name = tensor("op_2249_cast_fp16")]; tensor var_2251_equation_0 = const()[name = tensor("op_2251_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2251_cast_fp16 = einsum(equation = var_2251_equation_0, values = (var_1855_cast_fp16, var_2158_cast_fp16))[name = tensor("op_2251_cast_fp16")]; tensor var_2253_equation_0 = const()[name = tensor("op_2253_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2253_cast_fp16 = einsum(equation = var_2253_equation_0, values = (var_1855_cast_fp16, var_2159_cast_fp16))[name = tensor("op_2253_cast_fp16")]; tensor var_2255_equation_0 = const()[name = tensor("op_2255_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2255_cast_fp16 = einsum(equation = var_2255_equation_0, values = (var_1855_cast_fp16, var_2160_cast_fp16))[name = tensor("op_2255_cast_fp16")]; tensor var_2257_equation_0 = const()[name = tensor("op_2257_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2257_cast_fp16 = einsum(equation = var_2257_equation_0, values = (var_1855_cast_fp16, var_2161_cast_fp16))[name = tensor("op_2257_cast_fp16")]; tensor var_2259_equation_0 = const()[name = tensor("op_2259_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2259_cast_fp16 = einsum(equation = var_2259_equation_0, values = (var_1859_cast_fp16, var_2162_cast_fp16))[name = tensor("op_2259_cast_fp16")]; tensor var_2261_equation_0 = const()[name = tensor("op_2261_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2261_cast_fp16 = einsum(equation = var_2261_equation_0, values = (var_1859_cast_fp16, var_2163_cast_fp16))[name = tensor("op_2261_cast_fp16")]; tensor var_2263_equation_0 = const()[name = tensor("op_2263_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2263_cast_fp16 = einsum(equation = var_2263_equation_0, values = (var_1859_cast_fp16, var_2164_cast_fp16))[name = tensor("op_2263_cast_fp16")]; tensor var_2265_equation_0 = const()[name = tensor("op_2265_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2265_cast_fp16 = einsum(equation = var_2265_equation_0, values = (var_1859_cast_fp16, var_2165_cast_fp16))[name = tensor("op_2265_cast_fp16")]; tensor var_2267_equation_0 = const()[name = tensor("op_2267_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2267_cast_fp16 = einsum(equation = var_2267_equation_0, values = (var_1859_cast_fp16, var_2166_cast_fp16))[name = tensor("op_2267_cast_fp16")]; tensor var_2269_equation_0 = const()[name = tensor("op_2269_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2269_cast_fp16 = einsum(equation = var_2269_equation_0, values = (var_1859_cast_fp16, var_2167_cast_fp16))[name = tensor("op_2269_cast_fp16")]; tensor var_2271_equation_0 = const()[name = tensor("op_2271_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2271_cast_fp16 = einsum(equation = var_2271_equation_0, values = (var_1859_cast_fp16, var_2168_cast_fp16))[name = tensor("op_2271_cast_fp16")]; tensor var_2273_equation_0 = const()[name = tensor("op_2273_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2273_cast_fp16 = einsum(equation = var_2273_equation_0, values = (var_1859_cast_fp16, var_2169_cast_fp16))[name = tensor("op_2273_cast_fp16")]; tensor var_2275_equation_0 = const()[name = tensor("op_2275_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2275_cast_fp16 = einsum(equation = var_2275_equation_0, values = (var_1863_cast_fp16, var_2170_cast_fp16))[name = tensor("op_2275_cast_fp16")]; tensor var_2277_equation_0 = const()[name = tensor("op_2277_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2277_cast_fp16 = einsum(equation = var_2277_equation_0, values = (var_1863_cast_fp16, var_2171_cast_fp16))[name = tensor("op_2277_cast_fp16")]; tensor var_2279_equation_0 = const()[name = tensor("op_2279_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2279_cast_fp16 = einsum(equation = var_2279_equation_0, values = (var_1863_cast_fp16, var_2172_cast_fp16))[name = tensor("op_2279_cast_fp16")]; tensor var_2281_equation_0 = const()[name = tensor("op_2281_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2281_cast_fp16 = einsum(equation = var_2281_equation_0, values = (var_1863_cast_fp16, var_2173_cast_fp16))[name = tensor("op_2281_cast_fp16")]; tensor var_2283_equation_0 = const()[name = tensor("op_2283_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2283_cast_fp16 = einsum(equation = var_2283_equation_0, values = (var_1863_cast_fp16, var_2174_cast_fp16))[name = tensor("op_2283_cast_fp16")]; tensor var_2285_equation_0 = const()[name = tensor("op_2285_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2285_cast_fp16 = einsum(equation = var_2285_equation_0, values = (var_1863_cast_fp16, var_2175_cast_fp16))[name = tensor("op_2285_cast_fp16")]; tensor var_2287_equation_0 = const()[name = tensor("op_2287_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2287_cast_fp16 = einsum(equation = var_2287_equation_0, values = (var_1863_cast_fp16, var_2176_cast_fp16))[name = tensor("op_2287_cast_fp16")]; tensor var_2289_equation_0 = const()[name = tensor("op_2289_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2289_cast_fp16 = einsum(equation = var_2289_equation_0, values = (var_1863_cast_fp16, var_2177_cast_fp16))[name = tensor("op_2289_cast_fp16")]; tensor var_2291_equation_0 = const()[name = tensor("op_2291_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2291_cast_fp16 = einsum(equation = var_2291_equation_0, values = (var_1867_cast_fp16, var_2178_cast_fp16))[name = tensor("op_2291_cast_fp16")]; tensor var_2293_equation_0 = const()[name = tensor("op_2293_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2293_cast_fp16 = einsum(equation = var_2293_equation_0, values = (var_1867_cast_fp16, var_2179_cast_fp16))[name = tensor("op_2293_cast_fp16")]; tensor var_2295_equation_0 = const()[name = tensor("op_2295_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2295_cast_fp16 = einsum(equation = var_2295_equation_0, values = (var_1867_cast_fp16, var_2180_cast_fp16))[name = tensor("op_2295_cast_fp16")]; tensor var_2297_equation_0 = const()[name = tensor("op_2297_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2297_cast_fp16 = einsum(equation = var_2297_equation_0, values = (var_1867_cast_fp16, var_2181_cast_fp16))[name = tensor("op_2297_cast_fp16")]; tensor var_2299_equation_0 = const()[name = tensor("op_2299_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2299_cast_fp16 = einsum(equation = var_2299_equation_0, values = (var_1867_cast_fp16, var_2182_cast_fp16))[name = tensor("op_2299_cast_fp16")]; tensor var_2301_equation_0 = const()[name = tensor("op_2301_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2301_cast_fp16 = einsum(equation = var_2301_equation_0, values = (var_1867_cast_fp16, var_2183_cast_fp16))[name = tensor("op_2301_cast_fp16")]; tensor var_2303_equation_0 = const()[name = tensor("op_2303_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2303_cast_fp16 = einsum(equation = var_2303_equation_0, values = (var_1867_cast_fp16, var_2184_cast_fp16))[name = tensor("op_2303_cast_fp16")]; tensor var_2305_equation_0 = const()[name = tensor("op_2305_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2305_cast_fp16 = einsum(equation = var_2305_equation_0, values = (var_1867_cast_fp16, var_2185_cast_fp16))[name = tensor("op_2305_cast_fp16")]; tensor var_2307_equation_0 = const()[name = tensor("op_2307_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2307_cast_fp16 = einsum(equation = var_2307_equation_0, values = (var_1871_cast_fp16, var_2186_cast_fp16))[name = tensor("op_2307_cast_fp16")]; tensor var_2309_equation_0 = const()[name = tensor("op_2309_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2309_cast_fp16 = einsum(equation = var_2309_equation_0, values = (var_1871_cast_fp16, var_2187_cast_fp16))[name = tensor("op_2309_cast_fp16")]; tensor var_2311_equation_0 = const()[name = tensor("op_2311_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2311_cast_fp16 = einsum(equation = var_2311_equation_0, values = (var_1871_cast_fp16, var_2188_cast_fp16))[name = tensor("op_2311_cast_fp16")]; tensor var_2313_equation_0 = const()[name = tensor("op_2313_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2313_cast_fp16 = einsum(equation = var_2313_equation_0, values = (var_1871_cast_fp16, var_2189_cast_fp16))[name = tensor("op_2313_cast_fp16")]; tensor var_2315_equation_0 = const()[name = tensor("op_2315_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2315_cast_fp16 = einsum(equation = var_2315_equation_0, values = (var_1871_cast_fp16, var_2190_cast_fp16))[name = tensor("op_2315_cast_fp16")]; tensor var_2317_equation_0 = const()[name = tensor("op_2317_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2317_cast_fp16 = einsum(equation = var_2317_equation_0, values = (var_1871_cast_fp16, var_2191_cast_fp16))[name = tensor("op_2317_cast_fp16")]; tensor var_2319_equation_0 = const()[name = tensor("op_2319_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2319_cast_fp16 = einsum(equation = var_2319_equation_0, values = (var_1871_cast_fp16, var_2192_cast_fp16))[name = tensor("op_2319_cast_fp16")]; tensor var_2321_equation_0 = const()[name = tensor("op_2321_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2321_cast_fp16 = einsum(equation = var_2321_equation_0, values = (var_1871_cast_fp16, var_2193_cast_fp16))[name = tensor("op_2321_cast_fp16")]; tensor var_2323_interleave_0 = const()[name = tensor("op_2323_interleave_0"), val = tensor(false)]; tensor var_2323_cast_fp16 = concat(axis = var_95, interleave = var_2323_interleave_0, values = (var_2195_cast_fp16, var_2197_cast_fp16, var_2199_cast_fp16, var_2201_cast_fp16, var_2203_cast_fp16, var_2205_cast_fp16, var_2207_cast_fp16, var_2209_cast_fp16))[name = tensor("op_2323_cast_fp16")]; tensor var_2325_interleave_0 = const()[name = tensor("op_2325_interleave_0"), val = tensor(false)]; tensor var_2325_cast_fp16 = concat(axis = var_95, interleave = var_2325_interleave_0, values = (var_2211_cast_fp16, var_2213_cast_fp16, var_2215_cast_fp16, var_2217_cast_fp16, var_2219_cast_fp16, var_2221_cast_fp16, var_2223_cast_fp16, var_2225_cast_fp16))[name = tensor("op_2325_cast_fp16")]; tensor var_2327_interleave_0 = const()[name = tensor("op_2327_interleave_0"), val = tensor(false)]; tensor var_2327_cast_fp16 = concat(axis = var_95, interleave = var_2327_interleave_0, values = (var_2227_cast_fp16, var_2229_cast_fp16, var_2231_cast_fp16, var_2233_cast_fp16, var_2235_cast_fp16, var_2237_cast_fp16, var_2239_cast_fp16, var_2241_cast_fp16))[name = tensor("op_2327_cast_fp16")]; tensor var_2329_interleave_0 = const()[name = tensor("op_2329_interleave_0"), val = tensor(false)]; tensor var_2329_cast_fp16 = concat(axis = var_95, interleave = var_2329_interleave_0, values = (var_2243_cast_fp16, var_2245_cast_fp16, var_2247_cast_fp16, var_2249_cast_fp16, var_2251_cast_fp16, var_2253_cast_fp16, var_2255_cast_fp16, var_2257_cast_fp16))[name = tensor("op_2329_cast_fp16")]; tensor var_2331_interleave_0 = const()[name = tensor("op_2331_interleave_0"), val = tensor(false)]; tensor var_2331_cast_fp16 = concat(axis = var_95, interleave = var_2331_interleave_0, values = (var_2259_cast_fp16, var_2261_cast_fp16, var_2263_cast_fp16, var_2265_cast_fp16, var_2267_cast_fp16, var_2269_cast_fp16, var_2271_cast_fp16, var_2273_cast_fp16))[name = tensor("op_2331_cast_fp16")]; tensor var_2333_interleave_0 = const()[name = tensor("op_2333_interleave_0"), val = tensor(false)]; tensor var_2333_cast_fp16 = concat(axis = var_95, interleave = var_2333_interleave_0, values = (var_2275_cast_fp16, var_2277_cast_fp16, var_2279_cast_fp16, var_2281_cast_fp16, var_2283_cast_fp16, var_2285_cast_fp16, var_2287_cast_fp16, var_2289_cast_fp16))[name = tensor("op_2333_cast_fp16")]; tensor var_2335_interleave_0 = const()[name = tensor("op_2335_interleave_0"), val = tensor(false)]; tensor var_2335_cast_fp16 = concat(axis = var_95, interleave = var_2335_interleave_0, values = (var_2291_cast_fp16, var_2293_cast_fp16, var_2295_cast_fp16, var_2297_cast_fp16, var_2299_cast_fp16, var_2301_cast_fp16, var_2303_cast_fp16, var_2305_cast_fp16))[name = tensor("op_2335_cast_fp16")]; tensor var_2337_interleave_0 = const()[name = tensor("op_2337_interleave_0"), val = tensor(false)]; tensor var_2337_cast_fp16 = concat(axis = var_95, interleave = var_2337_interleave_0, values = (var_2307_cast_fp16, var_2309_cast_fp16, var_2311_cast_fp16, var_2313_cast_fp16, var_2315_cast_fp16, var_2317_cast_fp16, var_2319_cast_fp16, var_2321_cast_fp16))[name = tensor("op_2337_cast_fp16")]; tensor input_51_interleave_0 = const()[name = tensor("input_51_interleave_0"), val = tensor(false)]; tensor input_51_cast_fp16 = concat(axis = var_123, interleave = input_51_interleave_0, values = (var_2323_cast_fp16, var_2325_cast_fp16, var_2327_cast_fp16, var_2329_cast_fp16, var_2331_cast_fp16, var_2333_cast_fp16, var_2335_cast_fp16, var_2337_cast_fp16))[name = tensor("input_51_cast_fp16")]; tensor var_2343 = const()[name = tensor("op_2343"), val = tensor([1, 1])]; tensor var_2345 = const()[name = tensor("op_2345"), val = tensor([1, 1])]; tensor var_2347_pad_type_0 = const()[name = tensor("op_2347_pad_type_0"), val = tensor("custom")]; tensor var_2347_pad_0 = const()[name = tensor("op_2347_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19065600)))]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19270464)))]; tensor var_2347_cast_fp16 = conv(bias = down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_2345, groups = var_123, pad = var_2347_pad_0, pad_type = var_2347_pad_type_0, strides = var_2343, weight = down_blocks_0_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_51_cast_fp16)[name = tensor("op_2347_cast_fp16")]; tensor inputs_9_cast_fp16 = add(x = var_2347_cast_fp16, y = inputs_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; tensor hidden_states_29_axes_0 = const()[name = tensor("hidden_states_29_axes_0"), val = tensor([1])]; tensor hidden_states_29_gamma_0_to_fp16 = const()[name = tensor("hidden_states_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19271168)))]; tensor hidden_states_29_beta_0_to_fp16 = const()[name = tensor("hidden_states_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19271872)))]; tensor var_2357_to_fp16 = const()[name = tensor("op_2357_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_29_cast_fp16 = layer_norm(axes = hidden_states_29_axes_0, beta = hidden_states_29_beta_0_to_fp16, epsilon = var_2357_to_fp16, gamma = hidden_states_29_gamma_0_to_fp16, x = inputs_9_cast_fp16)[name = tensor("hidden_states_29_cast_fp16")]; tensor var_2372 = const()[name = tensor("op_2372"), val = tensor([1, 1])]; tensor var_2374 = const()[name = tensor("op_2374"), val = tensor([1, 1])]; tensor q_7_pad_type_0 = const()[name = tensor("q_7_pad_type_0"), val = tensor("custom")]; tensor q_7_pad_0 = const()[name = tensor("q_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19272576)))]; tensor q_7_cast_fp16 = conv(dilations = var_2374, groups = var_123, pad = q_7_pad_0, pad_type = q_7_pad_type_0, strides = var_2372, weight = down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_29_cast_fp16)[name = tensor("q_7_cast_fp16")]; tensor var_2378 = const()[name = tensor("op_2378"), val = tensor([1, 1])]; tensor var_2380 = const()[name = tensor("op_2380"), val = tensor([1, 1])]; tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19477440)))]; tensor k_13_cast_fp16 = conv(dilations = var_2380, groups = var_123, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_2378, weight = down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_13_cast_fp16")]; tensor var_2384 = const()[name = tensor("op_2384"), val = tensor([1, 1])]; tensor var_2386 = const()[name = tensor("op_2386"), val = tensor([1, 1])]; tensor v_7_pad_type_0 = const()[name = tensor("v_7_pad_type_0"), val = tensor("custom")]; tensor v_7_pad_0 = const()[name = tensor("v_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19969024)))]; tensor v_7_cast_fp16 = conv(dilations = var_2386, groups = var_123, pad = v_7_pad_0, pad_type = v_7_pad_type_0, strides = var_2384, weight = down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_7_cast_fp16")]; tensor var_2390_begin_0 = const()[name = tensor("op_2390_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2390_end_0 = const()[name = tensor("op_2390_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2390_end_mask_0 = const()[name = tensor("op_2390_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2390_cast_fp16 = slice_by_index(begin = var_2390_begin_0, end = var_2390_end_0, end_mask = var_2390_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_2390_cast_fp16")]; tensor var_2394_begin_0 = const()[name = tensor("op_2394_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_2394_end_0 = const()[name = tensor("op_2394_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_2394_end_mask_0 = const()[name = tensor("op_2394_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2394_cast_fp16 = slice_by_index(begin = var_2394_begin_0, end = var_2394_end_0, end_mask = var_2394_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_2394_cast_fp16")]; tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_2398_cast_fp16")]; tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_2402_cast_fp16")]; tensor var_2406_begin_0 = const()[name = tensor("op_2406_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_2406_end_0 = const()[name = tensor("op_2406_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_2406_end_mask_0 = const()[name = tensor("op_2406_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2406_cast_fp16 = slice_by_index(begin = var_2406_begin_0, end = var_2406_end_0, end_mask = var_2406_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_2406_cast_fp16")]; tensor var_2410_begin_0 = const()[name = tensor("op_2410_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_2410_end_0 = const()[name = tensor("op_2410_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_2410_end_mask_0 = const()[name = tensor("op_2410_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2410_cast_fp16 = slice_by_index(begin = var_2410_begin_0, end = var_2410_end_0, end_mask = var_2410_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_2410_cast_fp16")]; tensor var_2414_begin_0 = const()[name = tensor("op_2414_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_2414_end_0 = const()[name = tensor("op_2414_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_2414_end_mask_0 = const()[name = tensor("op_2414_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2414_cast_fp16 = slice_by_index(begin = var_2414_begin_0, end = var_2414_end_0, end_mask = var_2414_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_2414_cast_fp16")]; tensor var_2418_begin_0 = const()[name = tensor("op_2418_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_2418_end_0 = const()[name = tensor("op_2418_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_2418_end_mask_0 = const()[name = tensor("op_2418_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2418_cast_fp16 = slice_by_index(begin = var_2418_begin_0, end = var_2418_end_0, end_mask = var_2418_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_2418_cast_fp16")]; tensor var_2421_begin_0 = const()[name = tensor("op_2421_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2421_end_0 = const()[name = tensor("op_2421_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_2421_end_mask_0 = const()[name = tensor("op_2421_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2421_cast_fp16 = slice_by_index(begin = var_2421_begin_0, end = var_2421_end_0, end_mask = var_2421_end_mask_0, x = var_2390_cast_fp16)[name = tensor("op_2421_cast_fp16")]; tensor var_2422_begin_0 = const()[name = tensor("op_2422_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2422_end_0 = const()[name = tensor("op_2422_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_2422_end_mask_0 = const()[name = tensor("op_2422_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2422_cast_fp16 = slice_by_index(begin = var_2422_begin_0, end = var_2422_end_0, end_mask = var_2422_end_mask_0, x = var_2390_cast_fp16)[name = tensor("op_2422_cast_fp16")]; tensor var_2423_begin_0 = const()[name = tensor("op_2423_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2423_end_0 = const()[name = tensor("op_2423_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_2423_end_mask_0 = const()[name = tensor("op_2423_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2423_cast_fp16 = slice_by_index(begin = var_2423_begin_0, end = var_2423_end_0, end_mask = var_2423_end_mask_0, x = var_2390_cast_fp16)[name = tensor("op_2423_cast_fp16")]; tensor var_2424_begin_0 = const()[name = tensor("op_2424_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_2424_end_0 = const()[name = tensor("op_2424_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_2424_end_mask_0 = const()[name = tensor("op_2424_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2424_cast_fp16 = slice_by_index(begin = var_2424_begin_0, end = var_2424_end_0, end_mask = var_2424_end_mask_0, x = var_2390_cast_fp16)[name = tensor("op_2424_cast_fp16")]; tensor var_2425_begin_0 = const()[name = tensor("op_2425_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_2425_end_0 = const()[name = tensor("op_2425_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_2425_end_mask_0 = const()[name = tensor("op_2425_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2425_cast_fp16 = slice_by_index(begin = var_2425_begin_0, end = var_2425_end_0, end_mask = var_2425_end_mask_0, x = var_2390_cast_fp16)[name = tensor("op_2425_cast_fp16")]; tensor var_2426_begin_0 = const()[name = tensor("op_2426_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_2426_end_0 = const()[name = tensor("op_2426_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_2426_end_mask_0 = const()[name = tensor("op_2426_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2426_cast_fp16 = slice_by_index(begin = var_2426_begin_0, end = var_2426_end_0, end_mask = var_2426_end_mask_0, x = var_2390_cast_fp16)[name = tensor("op_2426_cast_fp16")]; tensor var_2427_begin_0 = const()[name = tensor("op_2427_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_2427_end_0 = const()[name = tensor("op_2427_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_2427_end_mask_0 = const()[name = tensor("op_2427_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2427_cast_fp16 = slice_by_index(begin = var_2427_begin_0, end = var_2427_end_0, end_mask = var_2427_end_mask_0, x = var_2390_cast_fp16)[name = tensor("op_2427_cast_fp16")]; tensor var_2428_begin_0 = const()[name = tensor("op_2428_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_2428_end_0 = const()[name = tensor("op_2428_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2428_end_mask_0 = const()[name = tensor("op_2428_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2428_cast_fp16 = slice_by_index(begin = var_2428_begin_0, end = var_2428_end_0, end_mask = var_2428_end_mask_0, x = var_2390_cast_fp16)[name = tensor("op_2428_cast_fp16")]; tensor var_2429_begin_0 = const()[name = tensor("op_2429_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2429_end_0 = const()[name = tensor("op_2429_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_2429_end_mask_0 = const()[name = tensor("op_2429_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2429_cast_fp16 = slice_by_index(begin = var_2429_begin_0, end = var_2429_end_0, end_mask = var_2429_end_mask_0, x = var_2394_cast_fp16)[name = tensor("op_2429_cast_fp16")]; tensor var_2430_begin_0 = const()[name = tensor("op_2430_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2430_end_0 = const()[name = tensor("op_2430_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_2430_end_mask_0 = const()[name = tensor("op_2430_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2430_cast_fp16 = slice_by_index(begin = var_2430_begin_0, end = var_2430_end_0, end_mask = var_2430_end_mask_0, x = var_2394_cast_fp16)[name = tensor("op_2430_cast_fp16")]; tensor var_2431_begin_0 = const()[name = tensor("op_2431_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2431_end_0 = const()[name = tensor("op_2431_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_2431_end_mask_0 = const()[name = tensor("op_2431_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2431_cast_fp16 = slice_by_index(begin = var_2431_begin_0, end = var_2431_end_0, end_mask = var_2431_end_mask_0, x = var_2394_cast_fp16)[name = tensor("op_2431_cast_fp16")]; tensor var_2432_begin_0 = const()[name = tensor("op_2432_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_2432_end_0 = const()[name = tensor("op_2432_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_2432_end_mask_0 = const()[name = tensor("op_2432_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2432_cast_fp16 = slice_by_index(begin = var_2432_begin_0, end = var_2432_end_0, end_mask = var_2432_end_mask_0, x = var_2394_cast_fp16)[name = tensor("op_2432_cast_fp16")]; tensor var_2433_begin_0 = const()[name = tensor("op_2433_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_2433_end_0 = const()[name = tensor("op_2433_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_2433_end_mask_0 = const()[name = tensor("op_2433_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2433_cast_fp16 = slice_by_index(begin = var_2433_begin_0, end = var_2433_end_0, end_mask = var_2433_end_mask_0, x = var_2394_cast_fp16)[name = tensor("op_2433_cast_fp16")]; tensor var_2434_begin_0 = const()[name = tensor("op_2434_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_2434_end_0 = const()[name = tensor("op_2434_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_2434_end_mask_0 = const()[name = tensor("op_2434_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2434_cast_fp16 = slice_by_index(begin = var_2434_begin_0, end = var_2434_end_0, end_mask = var_2434_end_mask_0, x = var_2394_cast_fp16)[name = tensor("op_2434_cast_fp16")]; tensor var_2435_begin_0 = const()[name = tensor("op_2435_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_2435_end_0 = const()[name = tensor("op_2435_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_2435_end_mask_0 = const()[name = tensor("op_2435_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2435_cast_fp16 = slice_by_index(begin = var_2435_begin_0, end = var_2435_end_0, end_mask = var_2435_end_mask_0, x = var_2394_cast_fp16)[name = tensor("op_2435_cast_fp16")]; tensor var_2436_begin_0 = const()[name = tensor("op_2436_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_2436_end_0 = const()[name = tensor("op_2436_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2436_end_mask_0 = const()[name = tensor("op_2436_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2436_cast_fp16 = slice_by_index(begin = var_2436_begin_0, end = var_2436_end_0, end_mask = var_2436_end_mask_0, x = var_2394_cast_fp16)[name = tensor("op_2436_cast_fp16")]; tensor var_2437_begin_0 = const()[name = tensor("op_2437_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2437_end_0 = const()[name = tensor("op_2437_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_2437_end_mask_0 = const()[name = tensor("op_2437_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2437_cast_fp16 = slice_by_index(begin = var_2437_begin_0, end = var_2437_end_0, end_mask = var_2437_end_mask_0, x = var_2398_cast_fp16)[name = tensor("op_2437_cast_fp16")]; tensor var_2438_begin_0 = const()[name = tensor("op_2438_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2438_end_0 = const()[name = tensor("op_2438_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_2438_end_mask_0 = const()[name = tensor("op_2438_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2438_cast_fp16 = slice_by_index(begin = var_2438_begin_0, end = var_2438_end_0, end_mask = var_2438_end_mask_0, x = var_2398_cast_fp16)[name = tensor("op_2438_cast_fp16")]; tensor var_2439_begin_0 = const()[name = tensor("op_2439_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2439_end_0 = const()[name = tensor("op_2439_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_2439_end_mask_0 = const()[name = tensor("op_2439_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2439_cast_fp16 = slice_by_index(begin = var_2439_begin_0, end = var_2439_end_0, end_mask = var_2439_end_mask_0, x = var_2398_cast_fp16)[name = tensor("op_2439_cast_fp16")]; tensor var_2440_begin_0 = const()[name = tensor("op_2440_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_2440_end_0 = const()[name = tensor("op_2440_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_2440_end_mask_0 = const()[name = tensor("op_2440_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2440_cast_fp16 = slice_by_index(begin = var_2440_begin_0, end = var_2440_end_0, end_mask = var_2440_end_mask_0, x = var_2398_cast_fp16)[name = tensor("op_2440_cast_fp16")]; tensor var_2441_begin_0 = const()[name = tensor("op_2441_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_2441_end_0 = const()[name = tensor("op_2441_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_2441_end_mask_0 = const()[name = tensor("op_2441_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2441_cast_fp16 = slice_by_index(begin = var_2441_begin_0, end = var_2441_end_0, end_mask = var_2441_end_mask_0, x = var_2398_cast_fp16)[name = tensor("op_2441_cast_fp16")]; tensor var_2442_begin_0 = const()[name = tensor("op_2442_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_2442_end_0 = const()[name = tensor("op_2442_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_2442_end_mask_0 = const()[name = tensor("op_2442_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2442_cast_fp16 = slice_by_index(begin = var_2442_begin_0, end = var_2442_end_0, end_mask = var_2442_end_mask_0, x = var_2398_cast_fp16)[name = tensor("op_2442_cast_fp16")]; tensor var_2443_begin_0 = const()[name = tensor("op_2443_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_2443_end_0 = const()[name = tensor("op_2443_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_2443_end_mask_0 = const()[name = tensor("op_2443_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2443_cast_fp16 = slice_by_index(begin = var_2443_begin_0, end = var_2443_end_0, end_mask = var_2443_end_mask_0, x = var_2398_cast_fp16)[name = tensor("op_2443_cast_fp16")]; tensor var_2444_begin_0 = const()[name = tensor("op_2444_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_2444_end_0 = const()[name = tensor("op_2444_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2444_end_mask_0 = const()[name = tensor("op_2444_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2444_cast_fp16 = slice_by_index(begin = var_2444_begin_0, end = var_2444_end_0, end_mask = var_2444_end_mask_0, x = var_2398_cast_fp16)[name = tensor("op_2444_cast_fp16")]; tensor var_2445_begin_0 = const()[name = tensor("op_2445_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2445_end_0 = const()[name = tensor("op_2445_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_2445_end_mask_0 = const()[name = tensor("op_2445_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2445_cast_fp16 = slice_by_index(begin = var_2445_begin_0, end = var_2445_end_0, end_mask = var_2445_end_mask_0, x = var_2402_cast_fp16)[name = tensor("op_2445_cast_fp16")]; tensor var_2446_begin_0 = const()[name = tensor("op_2446_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2446_end_0 = const()[name = tensor("op_2446_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_2446_end_mask_0 = const()[name = tensor("op_2446_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2446_cast_fp16 = slice_by_index(begin = var_2446_begin_0, end = var_2446_end_0, end_mask = var_2446_end_mask_0, x = var_2402_cast_fp16)[name = tensor("op_2446_cast_fp16")]; tensor var_2447_begin_0 = const()[name = tensor("op_2447_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2447_end_0 = const()[name = tensor("op_2447_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_2447_end_mask_0 = const()[name = tensor("op_2447_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2447_cast_fp16 = slice_by_index(begin = var_2447_begin_0, end = var_2447_end_0, end_mask = var_2447_end_mask_0, x = var_2402_cast_fp16)[name = tensor("op_2447_cast_fp16")]; tensor var_2448_begin_0 = const()[name = tensor("op_2448_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_2448_end_0 = const()[name = tensor("op_2448_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_2448_end_mask_0 = const()[name = tensor("op_2448_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2448_cast_fp16 = slice_by_index(begin = var_2448_begin_0, end = var_2448_end_0, end_mask = var_2448_end_mask_0, x = var_2402_cast_fp16)[name = tensor("op_2448_cast_fp16")]; tensor var_2449_begin_0 = const()[name = tensor("op_2449_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_2449_end_0 = const()[name = tensor("op_2449_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_2449_end_mask_0 = const()[name = tensor("op_2449_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2449_cast_fp16 = slice_by_index(begin = var_2449_begin_0, end = var_2449_end_0, end_mask = var_2449_end_mask_0, x = var_2402_cast_fp16)[name = tensor("op_2449_cast_fp16")]; tensor var_2450_begin_0 = const()[name = tensor("op_2450_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_2450_end_0 = const()[name = tensor("op_2450_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_2450_end_mask_0 = const()[name = tensor("op_2450_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2450_cast_fp16 = slice_by_index(begin = var_2450_begin_0, end = var_2450_end_0, end_mask = var_2450_end_mask_0, x = var_2402_cast_fp16)[name = tensor("op_2450_cast_fp16")]; tensor var_2451_begin_0 = const()[name = tensor("op_2451_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_2451_end_0 = const()[name = tensor("op_2451_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_2451_end_mask_0 = const()[name = tensor("op_2451_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2451_cast_fp16 = slice_by_index(begin = var_2451_begin_0, end = var_2451_end_0, end_mask = var_2451_end_mask_0, x = var_2402_cast_fp16)[name = tensor("op_2451_cast_fp16")]; tensor var_2452_begin_0 = const()[name = tensor("op_2452_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_2452_end_0 = const()[name = tensor("op_2452_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2452_end_mask_0 = const()[name = tensor("op_2452_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2452_cast_fp16 = slice_by_index(begin = var_2452_begin_0, end = var_2452_end_0, end_mask = var_2452_end_mask_0, x = var_2402_cast_fp16)[name = tensor("op_2452_cast_fp16")]; tensor var_2453_begin_0 = const()[name = tensor("op_2453_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2453_end_0 = const()[name = tensor("op_2453_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_2453_end_mask_0 = const()[name = tensor("op_2453_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2453_cast_fp16 = slice_by_index(begin = var_2453_begin_0, end = var_2453_end_0, end_mask = var_2453_end_mask_0, x = var_2406_cast_fp16)[name = tensor("op_2453_cast_fp16")]; tensor var_2454_begin_0 = const()[name = tensor("op_2454_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2454_end_0 = const()[name = tensor("op_2454_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_2454_end_mask_0 = const()[name = tensor("op_2454_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2454_cast_fp16 = slice_by_index(begin = var_2454_begin_0, end = var_2454_end_0, end_mask = var_2454_end_mask_0, x = var_2406_cast_fp16)[name = tensor("op_2454_cast_fp16")]; tensor var_2455_begin_0 = const()[name = tensor("op_2455_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2455_end_0 = const()[name = tensor("op_2455_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_2455_end_mask_0 = const()[name = tensor("op_2455_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2455_cast_fp16 = slice_by_index(begin = var_2455_begin_0, end = var_2455_end_0, end_mask = var_2455_end_mask_0, x = var_2406_cast_fp16)[name = tensor("op_2455_cast_fp16")]; tensor var_2456_begin_0 = const()[name = tensor("op_2456_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_2456_end_0 = const()[name = tensor("op_2456_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_2456_end_mask_0 = const()[name = tensor("op_2456_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2456_cast_fp16 = slice_by_index(begin = var_2456_begin_0, end = var_2456_end_0, end_mask = var_2456_end_mask_0, x = var_2406_cast_fp16)[name = tensor("op_2456_cast_fp16")]; tensor var_2457_begin_0 = const()[name = tensor("op_2457_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_2457_end_0 = const()[name = tensor("op_2457_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_2457_end_mask_0 = const()[name = tensor("op_2457_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2457_cast_fp16 = slice_by_index(begin = var_2457_begin_0, end = var_2457_end_0, end_mask = var_2457_end_mask_0, x = var_2406_cast_fp16)[name = tensor("op_2457_cast_fp16")]; tensor var_2458_begin_0 = const()[name = tensor("op_2458_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_2458_end_0 = const()[name = tensor("op_2458_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_2458_end_mask_0 = const()[name = tensor("op_2458_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2458_cast_fp16 = slice_by_index(begin = var_2458_begin_0, end = var_2458_end_0, end_mask = var_2458_end_mask_0, x = var_2406_cast_fp16)[name = tensor("op_2458_cast_fp16")]; tensor var_2459_begin_0 = const()[name = tensor("op_2459_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_2459_end_0 = const()[name = tensor("op_2459_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_2459_end_mask_0 = const()[name = tensor("op_2459_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2459_cast_fp16 = slice_by_index(begin = var_2459_begin_0, end = var_2459_end_0, end_mask = var_2459_end_mask_0, x = var_2406_cast_fp16)[name = tensor("op_2459_cast_fp16")]; tensor var_2460_begin_0 = const()[name = tensor("op_2460_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_2460_end_0 = const()[name = tensor("op_2460_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2460_end_mask_0 = const()[name = tensor("op_2460_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2460_cast_fp16 = slice_by_index(begin = var_2460_begin_0, end = var_2460_end_0, end_mask = var_2460_end_mask_0, x = var_2406_cast_fp16)[name = tensor("op_2460_cast_fp16")]; tensor var_2461_begin_0 = const()[name = tensor("op_2461_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2461_end_0 = const()[name = tensor("op_2461_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_2461_end_mask_0 = const()[name = tensor("op_2461_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2461_cast_fp16 = slice_by_index(begin = var_2461_begin_0, end = var_2461_end_0, end_mask = var_2461_end_mask_0, x = var_2410_cast_fp16)[name = tensor("op_2461_cast_fp16")]; tensor var_2462_begin_0 = const()[name = tensor("op_2462_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2462_end_0 = const()[name = tensor("op_2462_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_2462_end_mask_0 = const()[name = tensor("op_2462_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2462_cast_fp16 = slice_by_index(begin = var_2462_begin_0, end = var_2462_end_0, end_mask = var_2462_end_mask_0, x = var_2410_cast_fp16)[name = tensor("op_2462_cast_fp16")]; tensor var_2463_begin_0 = const()[name = tensor("op_2463_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2463_end_0 = const()[name = tensor("op_2463_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_2463_end_mask_0 = const()[name = tensor("op_2463_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2463_cast_fp16 = slice_by_index(begin = var_2463_begin_0, end = var_2463_end_0, end_mask = var_2463_end_mask_0, x = var_2410_cast_fp16)[name = tensor("op_2463_cast_fp16")]; tensor var_2464_begin_0 = const()[name = tensor("op_2464_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_2464_end_0 = const()[name = tensor("op_2464_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_2464_end_mask_0 = const()[name = tensor("op_2464_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2464_cast_fp16 = slice_by_index(begin = var_2464_begin_0, end = var_2464_end_0, end_mask = var_2464_end_mask_0, x = var_2410_cast_fp16)[name = tensor("op_2464_cast_fp16")]; tensor var_2465_begin_0 = const()[name = tensor("op_2465_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_2465_end_0 = const()[name = tensor("op_2465_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_2465_end_mask_0 = const()[name = tensor("op_2465_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2465_cast_fp16 = slice_by_index(begin = var_2465_begin_0, end = var_2465_end_0, end_mask = var_2465_end_mask_0, x = var_2410_cast_fp16)[name = tensor("op_2465_cast_fp16")]; tensor var_2466_begin_0 = const()[name = tensor("op_2466_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_2466_end_0 = const()[name = tensor("op_2466_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_2466_end_mask_0 = const()[name = tensor("op_2466_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2466_cast_fp16 = slice_by_index(begin = var_2466_begin_0, end = var_2466_end_0, end_mask = var_2466_end_mask_0, x = var_2410_cast_fp16)[name = tensor("op_2466_cast_fp16")]; tensor var_2467_begin_0 = const()[name = tensor("op_2467_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_2467_end_0 = const()[name = tensor("op_2467_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_2467_end_mask_0 = const()[name = tensor("op_2467_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2467_cast_fp16 = slice_by_index(begin = var_2467_begin_0, end = var_2467_end_0, end_mask = var_2467_end_mask_0, x = var_2410_cast_fp16)[name = tensor("op_2467_cast_fp16")]; tensor var_2468_begin_0 = const()[name = tensor("op_2468_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_2468_end_0 = const()[name = tensor("op_2468_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2468_end_mask_0 = const()[name = tensor("op_2468_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2468_cast_fp16 = slice_by_index(begin = var_2468_begin_0, end = var_2468_end_0, end_mask = var_2468_end_mask_0, x = var_2410_cast_fp16)[name = tensor("op_2468_cast_fp16")]; tensor var_2469_begin_0 = const()[name = tensor("op_2469_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2469_end_0 = const()[name = tensor("op_2469_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_2469_end_mask_0 = const()[name = tensor("op_2469_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2469_cast_fp16 = slice_by_index(begin = var_2469_begin_0, end = var_2469_end_0, end_mask = var_2469_end_mask_0, x = var_2414_cast_fp16)[name = tensor("op_2469_cast_fp16")]; tensor var_2470_begin_0 = const()[name = tensor("op_2470_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2470_end_0 = const()[name = tensor("op_2470_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_2470_end_mask_0 = const()[name = tensor("op_2470_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2470_cast_fp16 = slice_by_index(begin = var_2470_begin_0, end = var_2470_end_0, end_mask = var_2470_end_mask_0, x = var_2414_cast_fp16)[name = tensor("op_2470_cast_fp16")]; tensor var_2471_begin_0 = const()[name = tensor("op_2471_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2471_end_0 = const()[name = tensor("op_2471_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_2471_end_mask_0 = const()[name = tensor("op_2471_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2471_cast_fp16 = slice_by_index(begin = var_2471_begin_0, end = var_2471_end_0, end_mask = var_2471_end_mask_0, x = var_2414_cast_fp16)[name = tensor("op_2471_cast_fp16")]; tensor var_2472_begin_0 = const()[name = tensor("op_2472_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_2472_end_0 = const()[name = tensor("op_2472_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_2472_end_mask_0 = const()[name = tensor("op_2472_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2472_cast_fp16 = slice_by_index(begin = var_2472_begin_0, end = var_2472_end_0, end_mask = var_2472_end_mask_0, x = var_2414_cast_fp16)[name = tensor("op_2472_cast_fp16")]; tensor var_2473_begin_0 = const()[name = tensor("op_2473_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_2473_end_0 = const()[name = tensor("op_2473_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_2473_end_mask_0 = const()[name = tensor("op_2473_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2473_cast_fp16 = slice_by_index(begin = var_2473_begin_0, end = var_2473_end_0, end_mask = var_2473_end_mask_0, x = var_2414_cast_fp16)[name = tensor("op_2473_cast_fp16")]; tensor var_2474_begin_0 = const()[name = tensor("op_2474_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_2474_end_0 = const()[name = tensor("op_2474_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_2474_end_mask_0 = const()[name = tensor("op_2474_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2474_cast_fp16 = slice_by_index(begin = var_2474_begin_0, end = var_2474_end_0, end_mask = var_2474_end_mask_0, x = var_2414_cast_fp16)[name = tensor("op_2474_cast_fp16")]; tensor var_2475_begin_0 = const()[name = tensor("op_2475_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_2475_end_0 = const()[name = tensor("op_2475_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_2475_end_mask_0 = const()[name = tensor("op_2475_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2475_cast_fp16 = slice_by_index(begin = var_2475_begin_0, end = var_2475_end_0, end_mask = var_2475_end_mask_0, x = var_2414_cast_fp16)[name = tensor("op_2475_cast_fp16")]; tensor var_2476_begin_0 = const()[name = tensor("op_2476_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_2476_end_0 = const()[name = tensor("op_2476_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2476_end_mask_0 = const()[name = tensor("op_2476_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2476_cast_fp16 = slice_by_index(begin = var_2476_begin_0, end = var_2476_end_0, end_mask = var_2476_end_mask_0, x = var_2414_cast_fp16)[name = tensor("op_2476_cast_fp16")]; tensor var_2477_begin_0 = const()[name = tensor("op_2477_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2477_end_0 = const()[name = tensor("op_2477_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_2477_end_mask_0 = const()[name = tensor("op_2477_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2477_cast_fp16 = slice_by_index(begin = var_2477_begin_0, end = var_2477_end_0, end_mask = var_2477_end_mask_0, x = var_2418_cast_fp16)[name = tensor("op_2477_cast_fp16")]; tensor var_2478_begin_0 = const()[name = tensor("op_2478_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2478_end_0 = const()[name = tensor("op_2478_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_2478_end_mask_0 = const()[name = tensor("op_2478_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2478_cast_fp16 = slice_by_index(begin = var_2478_begin_0, end = var_2478_end_0, end_mask = var_2478_end_mask_0, x = var_2418_cast_fp16)[name = tensor("op_2478_cast_fp16")]; tensor var_2479_begin_0 = const()[name = tensor("op_2479_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2479_end_0 = const()[name = tensor("op_2479_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_2479_end_mask_0 = const()[name = tensor("op_2479_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2479_cast_fp16 = slice_by_index(begin = var_2479_begin_0, end = var_2479_end_0, end_mask = var_2479_end_mask_0, x = var_2418_cast_fp16)[name = tensor("op_2479_cast_fp16")]; tensor var_2480_begin_0 = const()[name = tensor("op_2480_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_2480_end_0 = const()[name = tensor("op_2480_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_2480_end_mask_0 = const()[name = tensor("op_2480_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2480_cast_fp16 = slice_by_index(begin = var_2480_begin_0, end = var_2480_end_0, end_mask = var_2480_end_mask_0, x = var_2418_cast_fp16)[name = tensor("op_2480_cast_fp16")]; tensor var_2481_begin_0 = const()[name = tensor("op_2481_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_2481_end_0 = const()[name = tensor("op_2481_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_2481_end_mask_0 = const()[name = tensor("op_2481_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2481_cast_fp16 = slice_by_index(begin = var_2481_begin_0, end = var_2481_end_0, end_mask = var_2481_end_mask_0, x = var_2418_cast_fp16)[name = tensor("op_2481_cast_fp16")]; tensor var_2482_begin_0 = const()[name = tensor("op_2482_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_2482_end_0 = const()[name = tensor("op_2482_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_2482_end_mask_0 = const()[name = tensor("op_2482_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2482_cast_fp16 = slice_by_index(begin = var_2482_begin_0, end = var_2482_end_0, end_mask = var_2482_end_mask_0, x = var_2418_cast_fp16)[name = tensor("op_2482_cast_fp16")]; tensor var_2483_begin_0 = const()[name = tensor("op_2483_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_2483_end_0 = const()[name = tensor("op_2483_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_2483_end_mask_0 = const()[name = tensor("op_2483_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2483_cast_fp16 = slice_by_index(begin = var_2483_begin_0, end = var_2483_end_0, end_mask = var_2483_end_mask_0, x = var_2418_cast_fp16)[name = tensor("op_2483_cast_fp16")]; tensor var_2484_begin_0 = const()[name = tensor("op_2484_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_2484_end_0 = const()[name = tensor("op_2484_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_2484_end_mask_0 = const()[name = tensor("op_2484_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2484_cast_fp16 = slice_by_index(begin = var_2484_begin_0, end = var_2484_end_0, end_mask = var_2484_end_mask_0, x = var_2418_cast_fp16)[name = tensor("op_2484_cast_fp16")]; tensor k_15_perm_0 = const()[name = tensor("k_15_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_2489_begin_0 = const()[name = tensor("op_2489_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2489_end_0 = const()[name = tensor("op_2489_end_0"), val = tensor([2, 77, 1, 40])]; tensor var_2489_end_mask_0 = const()[name = tensor("op_2489_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_28 = transpose(perm = k_15_perm_0, x = k_13_cast_fp16)[name = tensor("transpose_28")]; tensor var_2489_cast_fp16 = slice_by_index(begin = var_2489_begin_0, end = var_2489_end_0, end_mask = var_2489_end_mask_0, x = transpose_28)[name = tensor("op_2489_cast_fp16")]; tensor var_2493_begin_0 = const()[name = tensor("op_2493_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_2493_end_0 = const()[name = tensor("op_2493_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_2493_end_mask_0 = const()[name = tensor("op_2493_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2493_cast_fp16 = slice_by_index(begin = var_2493_begin_0, end = var_2493_end_0, end_mask = var_2493_end_mask_0, x = transpose_28)[name = tensor("op_2493_cast_fp16")]; tensor var_2497_begin_0 = const()[name = tensor("op_2497_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_2497_end_0 = const()[name = tensor("op_2497_end_0"), val = tensor([2, 77, 1, 120])]; tensor var_2497_end_mask_0 = const()[name = tensor("op_2497_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2497_cast_fp16 = slice_by_index(begin = var_2497_begin_0, end = var_2497_end_0, end_mask = var_2497_end_mask_0, x = transpose_28)[name = tensor("op_2497_cast_fp16")]; tensor var_2501_begin_0 = const()[name = tensor("op_2501_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_2501_end_0 = const()[name = tensor("op_2501_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_2501_end_mask_0 = const()[name = tensor("op_2501_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2501_cast_fp16 = slice_by_index(begin = var_2501_begin_0, end = var_2501_end_0, end_mask = var_2501_end_mask_0, x = transpose_28)[name = tensor("op_2501_cast_fp16")]; tensor var_2505_begin_0 = const()[name = tensor("op_2505_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_2505_end_0 = const()[name = tensor("op_2505_end_0"), val = tensor([2, 77, 1, 200])]; tensor var_2505_end_mask_0 = const()[name = tensor("op_2505_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2505_cast_fp16 = slice_by_index(begin = var_2505_begin_0, end = var_2505_end_0, end_mask = var_2505_end_mask_0, x = transpose_28)[name = tensor("op_2505_cast_fp16")]; tensor var_2509_begin_0 = const()[name = tensor("op_2509_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_2509_end_0 = const()[name = tensor("op_2509_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_2509_end_mask_0 = const()[name = tensor("op_2509_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2509_cast_fp16 = slice_by_index(begin = var_2509_begin_0, end = var_2509_end_0, end_mask = var_2509_end_mask_0, x = transpose_28)[name = tensor("op_2509_cast_fp16")]; tensor var_2513_begin_0 = const()[name = tensor("op_2513_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_2513_end_0 = const()[name = tensor("op_2513_end_0"), val = tensor([2, 77, 1, 280])]; tensor var_2513_end_mask_0 = const()[name = tensor("op_2513_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2513_cast_fp16 = slice_by_index(begin = var_2513_begin_0, end = var_2513_end_0, end_mask = var_2513_end_mask_0, x = transpose_28)[name = tensor("op_2513_cast_fp16")]; tensor var_2517_begin_0 = const()[name = tensor("op_2517_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_2517_end_0 = const()[name = tensor("op_2517_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_2517_end_mask_0 = const()[name = tensor("op_2517_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2517_cast_fp16 = slice_by_index(begin = var_2517_begin_0, end = var_2517_end_0, end_mask = var_2517_end_mask_0, x = transpose_28)[name = tensor("op_2517_cast_fp16")]; tensor var_2519_begin_0 = const()[name = tensor("op_2519_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2519_end_0 = const()[name = tensor("op_2519_end_0"), val = tensor([2, 40, 1, 77])]; tensor var_2519_end_mask_0 = const()[name = tensor("op_2519_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2519_cast_fp16 = slice_by_index(begin = var_2519_begin_0, end = var_2519_end_0, end_mask = var_2519_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_2519_cast_fp16")]; tensor var_2523_begin_0 = const()[name = tensor("op_2523_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_2523_end_0 = const()[name = tensor("op_2523_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_2523_end_mask_0 = const()[name = tensor("op_2523_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2523_cast_fp16 = slice_by_index(begin = var_2523_begin_0, end = var_2523_end_0, end_mask = var_2523_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_2523_cast_fp16")]; tensor var_2527_begin_0 = const()[name = tensor("op_2527_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_2527_end_0 = const()[name = tensor("op_2527_end_0"), val = tensor([2, 120, 1, 77])]; tensor var_2527_end_mask_0 = const()[name = tensor("op_2527_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2527_cast_fp16 = slice_by_index(begin = var_2527_begin_0, end = var_2527_end_0, end_mask = var_2527_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_2527_cast_fp16")]; tensor var_2531_begin_0 = const()[name = tensor("op_2531_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_2531_end_0 = const()[name = tensor("op_2531_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_2531_end_mask_0 = const()[name = tensor("op_2531_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2531_cast_fp16 = slice_by_index(begin = var_2531_begin_0, end = var_2531_end_0, end_mask = var_2531_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_2531_cast_fp16")]; tensor var_2535_begin_0 = const()[name = tensor("op_2535_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_2535_end_0 = const()[name = tensor("op_2535_end_0"), val = tensor([2, 200, 1, 77])]; tensor var_2535_end_mask_0 = const()[name = tensor("op_2535_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2535_cast_fp16 = slice_by_index(begin = var_2535_begin_0, end = var_2535_end_0, end_mask = var_2535_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_2535_cast_fp16")]; tensor var_2539_begin_0 = const()[name = tensor("op_2539_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_2539_end_0 = const()[name = tensor("op_2539_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_2539_end_mask_0 = const()[name = tensor("op_2539_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2539_cast_fp16 = slice_by_index(begin = var_2539_begin_0, end = var_2539_end_0, end_mask = var_2539_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_2539_cast_fp16")]; tensor var_2543_begin_0 = const()[name = tensor("op_2543_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_2543_end_0 = const()[name = tensor("op_2543_end_0"), val = tensor([2, 280, 1, 77])]; tensor var_2543_end_mask_0 = const()[name = tensor("op_2543_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2543_cast_fp16 = slice_by_index(begin = var_2543_begin_0, end = var_2543_end_0, end_mask = var_2543_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_2543_cast_fp16")]; tensor var_2547_begin_0 = const()[name = tensor("op_2547_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_2547_end_0 = const()[name = tensor("op_2547_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_2547_end_mask_0 = const()[name = tensor("op_2547_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2547_cast_fp16 = slice_by_index(begin = var_2547_begin_0, end = var_2547_end_0, end_mask = var_2547_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_2547_cast_fp16")]; tensor var_2551_equation_0 = const()[name = tensor("op_2551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2551_cast_fp16 = einsum(equation = var_2551_equation_0, values = (var_2489_cast_fp16, var_2421_cast_fp16))[name = tensor("op_2551_cast_fp16")]; tensor var_2552_to_fp16 = const()[name = tensor("op_2552_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_385_cast_fp16 = mul(x = var_2551_cast_fp16, y = var_2552_to_fp16)[name = tensor("aw_chunk_385_cast_fp16")]; tensor var_2555_equation_0 = const()[name = tensor("op_2555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2555_cast_fp16 = einsum(equation = var_2555_equation_0, values = (var_2489_cast_fp16, var_2422_cast_fp16))[name = tensor("op_2555_cast_fp16")]; tensor var_2556_to_fp16 = const()[name = tensor("op_2556_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_387_cast_fp16 = mul(x = var_2555_cast_fp16, y = var_2556_to_fp16)[name = tensor("aw_chunk_387_cast_fp16")]; tensor var_2559_equation_0 = const()[name = tensor("op_2559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2559_cast_fp16 = einsum(equation = var_2559_equation_0, values = (var_2489_cast_fp16, var_2423_cast_fp16))[name = tensor("op_2559_cast_fp16")]; tensor var_2560_to_fp16 = const()[name = tensor("op_2560_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_389_cast_fp16 = mul(x = var_2559_cast_fp16, y = var_2560_to_fp16)[name = tensor("aw_chunk_389_cast_fp16")]; tensor var_2563_equation_0 = const()[name = tensor("op_2563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2563_cast_fp16 = einsum(equation = var_2563_equation_0, values = (var_2489_cast_fp16, var_2424_cast_fp16))[name = tensor("op_2563_cast_fp16")]; tensor var_2564_to_fp16 = const()[name = tensor("op_2564_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_391_cast_fp16 = mul(x = var_2563_cast_fp16, y = var_2564_to_fp16)[name = tensor("aw_chunk_391_cast_fp16")]; tensor var_2567_equation_0 = const()[name = tensor("op_2567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2567_cast_fp16 = einsum(equation = var_2567_equation_0, values = (var_2489_cast_fp16, var_2425_cast_fp16))[name = tensor("op_2567_cast_fp16")]; tensor var_2568_to_fp16 = const()[name = tensor("op_2568_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_393_cast_fp16 = mul(x = var_2567_cast_fp16, y = var_2568_to_fp16)[name = tensor("aw_chunk_393_cast_fp16")]; tensor var_2571_equation_0 = const()[name = tensor("op_2571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2571_cast_fp16 = einsum(equation = var_2571_equation_0, values = (var_2489_cast_fp16, var_2426_cast_fp16))[name = tensor("op_2571_cast_fp16")]; tensor var_2572_to_fp16 = const()[name = tensor("op_2572_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_395_cast_fp16 = mul(x = var_2571_cast_fp16, y = var_2572_to_fp16)[name = tensor("aw_chunk_395_cast_fp16")]; tensor var_2575_equation_0 = const()[name = tensor("op_2575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2575_cast_fp16 = einsum(equation = var_2575_equation_0, values = (var_2489_cast_fp16, var_2427_cast_fp16))[name = tensor("op_2575_cast_fp16")]; tensor var_2576_to_fp16 = const()[name = tensor("op_2576_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_397_cast_fp16 = mul(x = var_2575_cast_fp16, y = var_2576_to_fp16)[name = tensor("aw_chunk_397_cast_fp16")]; tensor var_2579_equation_0 = const()[name = tensor("op_2579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2579_cast_fp16 = einsum(equation = var_2579_equation_0, values = (var_2489_cast_fp16, var_2428_cast_fp16))[name = tensor("op_2579_cast_fp16")]; tensor var_2580_to_fp16 = const()[name = tensor("op_2580_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_399_cast_fp16 = mul(x = var_2579_cast_fp16, y = var_2580_to_fp16)[name = tensor("aw_chunk_399_cast_fp16")]; tensor var_2583_equation_0 = const()[name = tensor("op_2583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2583_cast_fp16 = einsum(equation = var_2583_equation_0, values = (var_2493_cast_fp16, var_2429_cast_fp16))[name = tensor("op_2583_cast_fp16")]; tensor var_2584_to_fp16 = const()[name = tensor("op_2584_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_401_cast_fp16 = mul(x = var_2583_cast_fp16, y = var_2584_to_fp16)[name = tensor("aw_chunk_401_cast_fp16")]; tensor var_2587_equation_0 = const()[name = tensor("op_2587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2587_cast_fp16 = einsum(equation = var_2587_equation_0, values = (var_2493_cast_fp16, var_2430_cast_fp16))[name = tensor("op_2587_cast_fp16")]; tensor var_2588_to_fp16 = const()[name = tensor("op_2588_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_403_cast_fp16 = mul(x = var_2587_cast_fp16, y = var_2588_to_fp16)[name = tensor("aw_chunk_403_cast_fp16")]; tensor var_2591_equation_0 = const()[name = tensor("op_2591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2591_cast_fp16 = einsum(equation = var_2591_equation_0, values = (var_2493_cast_fp16, var_2431_cast_fp16))[name = tensor("op_2591_cast_fp16")]; tensor var_2592_to_fp16 = const()[name = tensor("op_2592_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_405_cast_fp16 = mul(x = var_2591_cast_fp16, y = var_2592_to_fp16)[name = tensor("aw_chunk_405_cast_fp16")]; tensor var_2595_equation_0 = const()[name = tensor("op_2595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2595_cast_fp16 = einsum(equation = var_2595_equation_0, values = (var_2493_cast_fp16, var_2432_cast_fp16))[name = tensor("op_2595_cast_fp16")]; tensor var_2596_to_fp16 = const()[name = tensor("op_2596_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_407_cast_fp16 = mul(x = var_2595_cast_fp16, y = var_2596_to_fp16)[name = tensor("aw_chunk_407_cast_fp16")]; tensor var_2599_equation_0 = const()[name = tensor("op_2599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2599_cast_fp16 = einsum(equation = var_2599_equation_0, values = (var_2493_cast_fp16, var_2433_cast_fp16))[name = tensor("op_2599_cast_fp16")]; tensor var_2600_to_fp16 = const()[name = tensor("op_2600_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_409_cast_fp16 = mul(x = var_2599_cast_fp16, y = var_2600_to_fp16)[name = tensor("aw_chunk_409_cast_fp16")]; tensor var_2603_equation_0 = const()[name = tensor("op_2603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2603_cast_fp16 = einsum(equation = var_2603_equation_0, values = (var_2493_cast_fp16, var_2434_cast_fp16))[name = tensor("op_2603_cast_fp16")]; tensor var_2604_to_fp16 = const()[name = tensor("op_2604_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_411_cast_fp16 = mul(x = var_2603_cast_fp16, y = var_2604_to_fp16)[name = tensor("aw_chunk_411_cast_fp16")]; tensor var_2607_equation_0 = const()[name = tensor("op_2607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2607_cast_fp16 = einsum(equation = var_2607_equation_0, values = (var_2493_cast_fp16, var_2435_cast_fp16))[name = tensor("op_2607_cast_fp16")]; tensor var_2608_to_fp16 = const()[name = tensor("op_2608_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_413_cast_fp16 = mul(x = var_2607_cast_fp16, y = var_2608_to_fp16)[name = tensor("aw_chunk_413_cast_fp16")]; tensor var_2611_equation_0 = const()[name = tensor("op_2611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2611_cast_fp16 = einsum(equation = var_2611_equation_0, values = (var_2493_cast_fp16, var_2436_cast_fp16))[name = tensor("op_2611_cast_fp16")]; tensor var_2612_to_fp16 = const()[name = tensor("op_2612_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_415_cast_fp16 = mul(x = var_2611_cast_fp16, y = var_2612_to_fp16)[name = tensor("aw_chunk_415_cast_fp16")]; tensor var_2615_equation_0 = const()[name = tensor("op_2615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2615_cast_fp16 = einsum(equation = var_2615_equation_0, values = (var_2497_cast_fp16, var_2437_cast_fp16))[name = tensor("op_2615_cast_fp16")]; tensor var_2616_to_fp16 = const()[name = tensor("op_2616_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_417_cast_fp16 = mul(x = var_2615_cast_fp16, y = var_2616_to_fp16)[name = tensor("aw_chunk_417_cast_fp16")]; tensor var_2619_equation_0 = const()[name = tensor("op_2619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2619_cast_fp16 = einsum(equation = var_2619_equation_0, values = (var_2497_cast_fp16, var_2438_cast_fp16))[name = tensor("op_2619_cast_fp16")]; tensor var_2620_to_fp16 = const()[name = tensor("op_2620_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_419_cast_fp16 = mul(x = var_2619_cast_fp16, y = var_2620_to_fp16)[name = tensor("aw_chunk_419_cast_fp16")]; tensor var_2623_equation_0 = const()[name = tensor("op_2623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2623_cast_fp16 = einsum(equation = var_2623_equation_0, values = (var_2497_cast_fp16, var_2439_cast_fp16))[name = tensor("op_2623_cast_fp16")]; tensor var_2624_to_fp16 = const()[name = tensor("op_2624_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_421_cast_fp16 = mul(x = var_2623_cast_fp16, y = var_2624_to_fp16)[name = tensor("aw_chunk_421_cast_fp16")]; tensor var_2627_equation_0 = const()[name = tensor("op_2627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2627_cast_fp16 = einsum(equation = var_2627_equation_0, values = (var_2497_cast_fp16, var_2440_cast_fp16))[name = tensor("op_2627_cast_fp16")]; tensor var_2628_to_fp16 = const()[name = tensor("op_2628_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_423_cast_fp16 = mul(x = var_2627_cast_fp16, y = var_2628_to_fp16)[name = tensor("aw_chunk_423_cast_fp16")]; tensor var_2631_equation_0 = const()[name = tensor("op_2631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2631_cast_fp16 = einsum(equation = var_2631_equation_0, values = (var_2497_cast_fp16, var_2441_cast_fp16))[name = tensor("op_2631_cast_fp16")]; tensor var_2632_to_fp16 = const()[name = tensor("op_2632_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_425_cast_fp16 = mul(x = var_2631_cast_fp16, y = var_2632_to_fp16)[name = tensor("aw_chunk_425_cast_fp16")]; tensor var_2635_equation_0 = const()[name = tensor("op_2635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2635_cast_fp16 = einsum(equation = var_2635_equation_0, values = (var_2497_cast_fp16, var_2442_cast_fp16))[name = tensor("op_2635_cast_fp16")]; tensor var_2636_to_fp16 = const()[name = tensor("op_2636_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_427_cast_fp16 = mul(x = var_2635_cast_fp16, y = var_2636_to_fp16)[name = tensor("aw_chunk_427_cast_fp16")]; tensor var_2639_equation_0 = const()[name = tensor("op_2639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2639_cast_fp16 = einsum(equation = var_2639_equation_0, values = (var_2497_cast_fp16, var_2443_cast_fp16))[name = tensor("op_2639_cast_fp16")]; tensor var_2640_to_fp16 = const()[name = tensor("op_2640_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_429_cast_fp16 = mul(x = var_2639_cast_fp16, y = var_2640_to_fp16)[name = tensor("aw_chunk_429_cast_fp16")]; tensor var_2643_equation_0 = const()[name = tensor("op_2643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2643_cast_fp16 = einsum(equation = var_2643_equation_0, values = (var_2497_cast_fp16, var_2444_cast_fp16))[name = tensor("op_2643_cast_fp16")]; tensor var_2644_to_fp16 = const()[name = tensor("op_2644_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_431_cast_fp16 = mul(x = var_2643_cast_fp16, y = var_2644_to_fp16)[name = tensor("aw_chunk_431_cast_fp16")]; tensor var_2647_equation_0 = const()[name = tensor("op_2647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2647_cast_fp16 = einsum(equation = var_2647_equation_0, values = (var_2501_cast_fp16, var_2445_cast_fp16))[name = tensor("op_2647_cast_fp16")]; tensor var_2648_to_fp16 = const()[name = tensor("op_2648_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_433_cast_fp16 = mul(x = var_2647_cast_fp16, y = var_2648_to_fp16)[name = tensor("aw_chunk_433_cast_fp16")]; tensor var_2651_equation_0 = const()[name = tensor("op_2651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2651_cast_fp16 = einsum(equation = var_2651_equation_0, values = (var_2501_cast_fp16, var_2446_cast_fp16))[name = tensor("op_2651_cast_fp16")]; tensor var_2652_to_fp16 = const()[name = tensor("op_2652_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_435_cast_fp16 = mul(x = var_2651_cast_fp16, y = var_2652_to_fp16)[name = tensor("aw_chunk_435_cast_fp16")]; tensor var_2655_equation_0 = const()[name = tensor("op_2655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2655_cast_fp16 = einsum(equation = var_2655_equation_0, values = (var_2501_cast_fp16, var_2447_cast_fp16))[name = tensor("op_2655_cast_fp16")]; tensor var_2656_to_fp16 = const()[name = tensor("op_2656_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_437_cast_fp16 = mul(x = var_2655_cast_fp16, y = var_2656_to_fp16)[name = tensor("aw_chunk_437_cast_fp16")]; tensor var_2659_equation_0 = const()[name = tensor("op_2659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2659_cast_fp16 = einsum(equation = var_2659_equation_0, values = (var_2501_cast_fp16, var_2448_cast_fp16))[name = tensor("op_2659_cast_fp16")]; tensor var_2660_to_fp16 = const()[name = tensor("op_2660_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_439_cast_fp16 = mul(x = var_2659_cast_fp16, y = var_2660_to_fp16)[name = tensor("aw_chunk_439_cast_fp16")]; tensor var_2663_equation_0 = const()[name = tensor("op_2663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2663_cast_fp16 = einsum(equation = var_2663_equation_0, values = (var_2501_cast_fp16, var_2449_cast_fp16))[name = tensor("op_2663_cast_fp16")]; tensor var_2664_to_fp16 = const()[name = tensor("op_2664_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_441_cast_fp16 = mul(x = var_2663_cast_fp16, y = var_2664_to_fp16)[name = tensor("aw_chunk_441_cast_fp16")]; tensor var_2667_equation_0 = const()[name = tensor("op_2667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2667_cast_fp16 = einsum(equation = var_2667_equation_0, values = (var_2501_cast_fp16, var_2450_cast_fp16))[name = tensor("op_2667_cast_fp16")]; tensor var_2668_to_fp16 = const()[name = tensor("op_2668_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_443_cast_fp16 = mul(x = var_2667_cast_fp16, y = var_2668_to_fp16)[name = tensor("aw_chunk_443_cast_fp16")]; tensor var_2671_equation_0 = const()[name = tensor("op_2671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2671_cast_fp16 = einsum(equation = var_2671_equation_0, values = (var_2501_cast_fp16, var_2451_cast_fp16))[name = tensor("op_2671_cast_fp16")]; tensor var_2672_to_fp16 = const()[name = tensor("op_2672_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_445_cast_fp16 = mul(x = var_2671_cast_fp16, y = var_2672_to_fp16)[name = tensor("aw_chunk_445_cast_fp16")]; tensor var_2675_equation_0 = const()[name = tensor("op_2675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2675_cast_fp16 = einsum(equation = var_2675_equation_0, values = (var_2501_cast_fp16, var_2452_cast_fp16))[name = tensor("op_2675_cast_fp16")]; tensor var_2676_to_fp16 = const()[name = tensor("op_2676_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_447_cast_fp16 = mul(x = var_2675_cast_fp16, y = var_2676_to_fp16)[name = tensor("aw_chunk_447_cast_fp16")]; tensor var_2679_equation_0 = const()[name = tensor("op_2679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2679_cast_fp16 = einsum(equation = var_2679_equation_0, values = (var_2505_cast_fp16, var_2453_cast_fp16))[name = tensor("op_2679_cast_fp16")]; tensor var_2680_to_fp16 = const()[name = tensor("op_2680_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_449_cast_fp16 = mul(x = var_2679_cast_fp16, y = var_2680_to_fp16)[name = tensor("aw_chunk_449_cast_fp16")]; tensor var_2683_equation_0 = const()[name = tensor("op_2683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2683_cast_fp16 = einsum(equation = var_2683_equation_0, values = (var_2505_cast_fp16, var_2454_cast_fp16))[name = tensor("op_2683_cast_fp16")]; tensor var_2684_to_fp16 = const()[name = tensor("op_2684_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_451_cast_fp16 = mul(x = var_2683_cast_fp16, y = var_2684_to_fp16)[name = tensor("aw_chunk_451_cast_fp16")]; tensor var_2687_equation_0 = const()[name = tensor("op_2687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2687_cast_fp16 = einsum(equation = var_2687_equation_0, values = (var_2505_cast_fp16, var_2455_cast_fp16))[name = tensor("op_2687_cast_fp16")]; tensor var_2688_to_fp16 = const()[name = tensor("op_2688_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_453_cast_fp16 = mul(x = var_2687_cast_fp16, y = var_2688_to_fp16)[name = tensor("aw_chunk_453_cast_fp16")]; tensor var_2691_equation_0 = const()[name = tensor("op_2691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2691_cast_fp16 = einsum(equation = var_2691_equation_0, values = (var_2505_cast_fp16, var_2456_cast_fp16))[name = tensor("op_2691_cast_fp16")]; tensor var_2692_to_fp16 = const()[name = tensor("op_2692_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_455_cast_fp16 = mul(x = var_2691_cast_fp16, y = var_2692_to_fp16)[name = tensor("aw_chunk_455_cast_fp16")]; tensor var_2695_equation_0 = const()[name = tensor("op_2695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2695_cast_fp16 = einsum(equation = var_2695_equation_0, values = (var_2505_cast_fp16, var_2457_cast_fp16))[name = tensor("op_2695_cast_fp16")]; tensor var_2696_to_fp16 = const()[name = tensor("op_2696_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_457_cast_fp16 = mul(x = var_2695_cast_fp16, y = var_2696_to_fp16)[name = tensor("aw_chunk_457_cast_fp16")]; tensor var_2699_equation_0 = const()[name = tensor("op_2699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2699_cast_fp16 = einsum(equation = var_2699_equation_0, values = (var_2505_cast_fp16, var_2458_cast_fp16))[name = tensor("op_2699_cast_fp16")]; tensor var_2700_to_fp16 = const()[name = tensor("op_2700_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_459_cast_fp16 = mul(x = var_2699_cast_fp16, y = var_2700_to_fp16)[name = tensor("aw_chunk_459_cast_fp16")]; tensor var_2703_equation_0 = const()[name = tensor("op_2703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2703_cast_fp16 = einsum(equation = var_2703_equation_0, values = (var_2505_cast_fp16, var_2459_cast_fp16))[name = tensor("op_2703_cast_fp16")]; tensor var_2704_to_fp16 = const()[name = tensor("op_2704_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_461_cast_fp16 = mul(x = var_2703_cast_fp16, y = var_2704_to_fp16)[name = tensor("aw_chunk_461_cast_fp16")]; tensor var_2707_equation_0 = const()[name = tensor("op_2707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2707_cast_fp16 = einsum(equation = var_2707_equation_0, values = (var_2505_cast_fp16, var_2460_cast_fp16))[name = tensor("op_2707_cast_fp16")]; tensor var_2708_to_fp16 = const()[name = tensor("op_2708_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_463_cast_fp16 = mul(x = var_2707_cast_fp16, y = var_2708_to_fp16)[name = tensor("aw_chunk_463_cast_fp16")]; tensor var_2711_equation_0 = const()[name = tensor("op_2711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2711_cast_fp16 = einsum(equation = var_2711_equation_0, values = (var_2509_cast_fp16, var_2461_cast_fp16))[name = tensor("op_2711_cast_fp16")]; tensor var_2712_to_fp16 = const()[name = tensor("op_2712_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_465_cast_fp16 = mul(x = var_2711_cast_fp16, y = var_2712_to_fp16)[name = tensor("aw_chunk_465_cast_fp16")]; tensor var_2715_equation_0 = const()[name = tensor("op_2715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2715_cast_fp16 = einsum(equation = var_2715_equation_0, values = (var_2509_cast_fp16, var_2462_cast_fp16))[name = tensor("op_2715_cast_fp16")]; tensor var_2716_to_fp16 = const()[name = tensor("op_2716_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_467_cast_fp16 = mul(x = var_2715_cast_fp16, y = var_2716_to_fp16)[name = tensor("aw_chunk_467_cast_fp16")]; tensor var_2719_equation_0 = const()[name = tensor("op_2719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2719_cast_fp16 = einsum(equation = var_2719_equation_0, values = (var_2509_cast_fp16, var_2463_cast_fp16))[name = tensor("op_2719_cast_fp16")]; tensor var_2720_to_fp16 = const()[name = tensor("op_2720_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_469_cast_fp16 = mul(x = var_2719_cast_fp16, y = var_2720_to_fp16)[name = tensor("aw_chunk_469_cast_fp16")]; tensor var_2723_equation_0 = const()[name = tensor("op_2723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2723_cast_fp16 = einsum(equation = var_2723_equation_0, values = (var_2509_cast_fp16, var_2464_cast_fp16))[name = tensor("op_2723_cast_fp16")]; tensor var_2724_to_fp16 = const()[name = tensor("op_2724_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_471_cast_fp16 = mul(x = var_2723_cast_fp16, y = var_2724_to_fp16)[name = tensor("aw_chunk_471_cast_fp16")]; tensor var_2727_equation_0 = const()[name = tensor("op_2727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2727_cast_fp16 = einsum(equation = var_2727_equation_0, values = (var_2509_cast_fp16, var_2465_cast_fp16))[name = tensor("op_2727_cast_fp16")]; tensor var_2728_to_fp16 = const()[name = tensor("op_2728_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_473_cast_fp16 = mul(x = var_2727_cast_fp16, y = var_2728_to_fp16)[name = tensor("aw_chunk_473_cast_fp16")]; tensor var_2731_equation_0 = const()[name = tensor("op_2731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2731_cast_fp16 = einsum(equation = var_2731_equation_0, values = (var_2509_cast_fp16, var_2466_cast_fp16))[name = tensor("op_2731_cast_fp16")]; tensor var_2732_to_fp16 = const()[name = tensor("op_2732_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_475_cast_fp16 = mul(x = var_2731_cast_fp16, y = var_2732_to_fp16)[name = tensor("aw_chunk_475_cast_fp16")]; tensor var_2735_equation_0 = const()[name = tensor("op_2735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2735_cast_fp16 = einsum(equation = var_2735_equation_0, values = (var_2509_cast_fp16, var_2467_cast_fp16))[name = tensor("op_2735_cast_fp16")]; tensor var_2736_to_fp16 = const()[name = tensor("op_2736_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_477_cast_fp16 = mul(x = var_2735_cast_fp16, y = var_2736_to_fp16)[name = tensor("aw_chunk_477_cast_fp16")]; tensor var_2739_equation_0 = const()[name = tensor("op_2739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2739_cast_fp16 = einsum(equation = var_2739_equation_0, values = (var_2509_cast_fp16, var_2468_cast_fp16))[name = tensor("op_2739_cast_fp16")]; tensor var_2740_to_fp16 = const()[name = tensor("op_2740_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_479_cast_fp16 = mul(x = var_2739_cast_fp16, y = var_2740_to_fp16)[name = tensor("aw_chunk_479_cast_fp16")]; tensor var_2743_equation_0 = const()[name = tensor("op_2743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2743_cast_fp16 = einsum(equation = var_2743_equation_0, values = (var_2513_cast_fp16, var_2469_cast_fp16))[name = tensor("op_2743_cast_fp16")]; tensor var_2744_to_fp16 = const()[name = tensor("op_2744_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_481_cast_fp16 = mul(x = var_2743_cast_fp16, y = var_2744_to_fp16)[name = tensor("aw_chunk_481_cast_fp16")]; tensor var_2747_equation_0 = const()[name = tensor("op_2747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2747_cast_fp16 = einsum(equation = var_2747_equation_0, values = (var_2513_cast_fp16, var_2470_cast_fp16))[name = tensor("op_2747_cast_fp16")]; tensor var_2748_to_fp16 = const()[name = tensor("op_2748_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_483_cast_fp16 = mul(x = var_2747_cast_fp16, y = var_2748_to_fp16)[name = tensor("aw_chunk_483_cast_fp16")]; tensor var_2751_equation_0 = const()[name = tensor("op_2751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2751_cast_fp16 = einsum(equation = var_2751_equation_0, values = (var_2513_cast_fp16, var_2471_cast_fp16))[name = tensor("op_2751_cast_fp16")]; tensor var_2752_to_fp16 = const()[name = tensor("op_2752_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_485_cast_fp16 = mul(x = var_2751_cast_fp16, y = var_2752_to_fp16)[name = tensor("aw_chunk_485_cast_fp16")]; tensor var_2755_equation_0 = const()[name = tensor("op_2755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2755_cast_fp16 = einsum(equation = var_2755_equation_0, values = (var_2513_cast_fp16, var_2472_cast_fp16))[name = tensor("op_2755_cast_fp16")]; tensor var_2756_to_fp16 = const()[name = tensor("op_2756_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_487_cast_fp16 = mul(x = var_2755_cast_fp16, y = var_2756_to_fp16)[name = tensor("aw_chunk_487_cast_fp16")]; tensor var_2759_equation_0 = const()[name = tensor("op_2759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2759_cast_fp16 = einsum(equation = var_2759_equation_0, values = (var_2513_cast_fp16, var_2473_cast_fp16))[name = tensor("op_2759_cast_fp16")]; tensor var_2760_to_fp16 = const()[name = tensor("op_2760_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_489_cast_fp16 = mul(x = var_2759_cast_fp16, y = var_2760_to_fp16)[name = tensor("aw_chunk_489_cast_fp16")]; tensor var_2763_equation_0 = const()[name = tensor("op_2763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2763_cast_fp16 = einsum(equation = var_2763_equation_0, values = (var_2513_cast_fp16, var_2474_cast_fp16))[name = tensor("op_2763_cast_fp16")]; tensor var_2764_to_fp16 = const()[name = tensor("op_2764_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_491_cast_fp16 = mul(x = var_2763_cast_fp16, y = var_2764_to_fp16)[name = tensor("aw_chunk_491_cast_fp16")]; tensor var_2767_equation_0 = const()[name = tensor("op_2767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2767_cast_fp16 = einsum(equation = var_2767_equation_0, values = (var_2513_cast_fp16, var_2475_cast_fp16))[name = tensor("op_2767_cast_fp16")]; tensor var_2768_to_fp16 = const()[name = tensor("op_2768_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_493_cast_fp16 = mul(x = var_2767_cast_fp16, y = var_2768_to_fp16)[name = tensor("aw_chunk_493_cast_fp16")]; tensor var_2771_equation_0 = const()[name = tensor("op_2771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2771_cast_fp16 = einsum(equation = var_2771_equation_0, values = (var_2513_cast_fp16, var_2476_cast_fp16))[name = tensor("op_2771_cast_fp16")]; tensor var_2772_to_fp16 = const()[name = tensor("op_2772_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_495_cast_fp16 = mul(x = var_2771_cast_fp16, y = var_2772_to_fp16)[name = tensor("aw_chunk_495_cast_fp16")]; tensor var_2775_equation_0 = const()[name = tensor("op_2775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2775_cast_fp16 = einsum(equation = var_2775_equation_0, values = (var_2517_cast_fp16, var_2477_cast_fp16))[name = tensor("op_2775_cast_fp16")]; tensor var_2776_to_fp16 = const()[name = tensor("op_2776_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_497_cast_fp16 = mul(x = var_2775_cast_fp16, y = var_2776_to_fp16)[name = tensor("aw_chunk_497_cast_fp16")]; tensor var_2779_equation_0 = const()[name = tensor("op_2779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2779_cast_fp16 = einsum(equation = var_2779_equation_0, values = (var_2517_cast_fp16, var_2478_cast_fp16))[name = tensor("op_2779_cast_fp16")]; tensor var_2780_to_fp16 = const()[name = tensor("op_2780_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_499_cast_fp16 = mul(x = var_2779_cast_fp16, y = var_2780_to_fp16)[name = tensor("aw_chunk_499_cast_fp16")]; tensor var_2783_equation_0 = const()[name = tensor("op_2783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2783_cast_fp16 = einsum(equation = var_2783_equation_0, values = (var_2517_cast_fp16, var_2479_cast_fp16))[name = tensor("op_2783_cast_fp16")]; tensor var_2784_to_fp16 = const()[name = tensor("op_2784_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_501_cast_fp16 = mul(x = var_2783_cast_fp16, y = var_2784_to_fp16)[name = tensor("aw_chunk_501_cast_fp16")]; tensor var_2787_equation_0 = const()[name = tensor("op_2787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2787_cast_fp16 = einsum(equation = var_2787_equation_0, values = (var_2517_cast_fp16, var_2480_cast_fp16))[name = tensor("op_2787_cast_fp16")]; tensor var_2788_to_fp16 = const()[name = tensor("op_2788_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_503_cast_fp16 = mul(x = var_2787_cast_fp16, y = var_2788_to_fp16)[name = tensor("aw_chunk_503_cast_fp16")]; tensor var_2791_equation_0 = const()[name = tensor("op_2791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2791_cast_fp16 = einsum(equation = var_2791_equation_0, values = (var_2517_cast_fp16, var_2481_cast_fp16))[name = tensor("op_2791_cast_fp16")]; tensor var_2792_to_fp16 = const()[name = tensor("op_2792_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_505_cast_fp16 = mul(x = var_2791_cast_fp16, y = var_2792_to_fp16)[name = tensor("aw_chunk_505_cast_fp16")]; tensor var_2795_equation_0 = const()[name = tensor("op_2795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2795_cast_fp16 = einsum(equation = var_2795_equation_0, values = (var_2517_cast_fp16, var_2482_cast_fp16))[name = tensor("op_2795_cast_fp16")]; tensor var_2796_to_fp16 = const()[name = tensor("op_2796_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_507_cast_fp16 = mul(x = var_2795_cast_fp16, y = var_2796_to_fp16)[name = tensor("aw_chunk_507_cast_fp16")]; tensor var_2799_equation_0 = const()[name = tensor("op_2799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2799_cast_fp16 = einsum(equation = var_2799_equation_0, values = (var_2517_cast_fp16, var_2483_cast_fp16))[name = tensor("op_2799_cast_fp16")]; tensor var_2800_to_fp16 = const()[name = tensor("op_2800_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_509_cast_fp16 = mul(x = var_2799_cast_fp16, y = var_2800_to_fp16)[name = tensor("aw_chunk_509_cast_fp16")]; tensor var_2803_equation_0 = const()[name = tensor("op_2803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_2803_cast_fp16 = einsum(equation = var_2803_equation_0, values = (var_2517_cast_fp16, var_2484_cast_fp16))[name = tensor("op_2803_cast_fp16")]; tensor var_2804_to_fp16 = const()[name = tensor("op_2804_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_511_cast_fp16 = mul(x = var_2803_cast_fp16, y = var_2804_to_fp16)[name = tensor("aw_chunk_511_cast_fp16")]; tensor var_2806_cast_fp16 = softmax(axis = var_123, x = aw_chunk_385_cast_fp16)[name = tensor("op_2806_cast_fp16")]; tensor var_2807_cast_fp16 = softmax(axis = var_123, x = aw_chunk_387_cast_fp16)[name = tensor("op_2807_cast_fp16")]; tensor var_2808_cast_fp16 = softmax(axis = var_123, x = aw_chunk_389_cast_fp16)[name = tensor("op_2808_cast_fp16")]; tensor var_2809_cast_fp16 = softmax(axis = var_123, x = aw_chunk_391_cast_fp16)[name = tensor("op_2809_cast_fp16")]; tensor var_2810_cast_fp16 = softmax(axis = var_123, x = aw_chunk_393_cast_fp16)[name = tensor("op_2810_cast_fp16")]; tensor var_2811_cast_fp16 = softmax(axis = var_123, x = aw_chunk_395_cast_fp16)[name = tensor("op_2811_cast_fp16")]; tensor var_2812_cast_fp16 = softmax(axis = var_123, x = aw_chunk_397_cast_fp16)[name = tensor("op_2812_cast_fp16")]; tensor var_2813_cast_fp16 = softmax(axis = var_123, x = aw_chunk_399_cast_fp16)[name = tensor("op_2813_cast_fp16")]; tensor var_2814_cast_fp16 = softmax(axis = var_123, x = aw_chunk_401_cast_fp16)[name = tensor("op_2814_cast_fp16")]; tensor var_2815_cast_fp16 = softmax(axis = var_123, x = aw_chunk_403_cast_fp16)[name = tensor("op_2815_cast_fp16")]; tensor var_2816_cast_fp16 = softmax(axis = var_123, x = aw_chunk_405_cast_fp16)[name = tensor("op_2816_cast_fp16")]; tensor var_2817_cast_fp16 = softmax(axis = var_123, x = aw_chunk_407_cast_fp16)[name = tensor("op_2817_cast_fp16")]; tensor var_2818_cast_fp16 = softmax(axis = var_123, x = aw_chunk_409_cast_fp16)[name = tensor("op_2818_cast_fp16")]; tensor var_2819_cast_fp16 = softmax(axis = var_123, x = aw_chunk_411_cast_fp16)[name = tensor("op_2819_cast_fp16")]; tensor var_2820_cast_fp16 = softmax(axis = var_123, x = aw_chunk_413_cast_fp16)[name = tensor("op_2820_cast_fp16")]; tensor var_2821_cast_fp16 = softmax(axis = var_123, x = aw_chunk_415_cast_fp16)[name = tensor("op_2821_cast_fp16")]; tensor var_2822_cast_fp16 = softmax(axis = var_123, x = aw_chunk_417_cast_fp16)[name = tensor("op_2822_cast_fp16")]; tensor var_2823_cast_fp16 = softmax(axis = var_123, x = aw_chunk_419_cast_fp16)[name = tensor("op_2823_cast_fp16")]; tensor var_2824_cast_fp16 = softmax(axis = var_123, x = aw_chunk_421_cast_fp16)[name = tensor("op_2824_cast_fp16")]; tensor var_2825_cast_fp16 = softmax(axis = var_123, x = aw_chunk_423_cast_fp16)[name = tensor("op_2825_cast_fp16")]; tensor var_2826_cast_fp16 = softmax(axis = var_123, x = aw_chunk_425_cast_fp16)[name = tensor("op_2826_cast_fp16")]; tensor var_2827_cast_fp16 = softmax(axis = var_123, x = aw_chunk_427_cast_fp16)[name = tensor("op_2827_cast_fp16")]; tensor var_2828_cast_fp16 = softmax(axis = var_123, x = aw_chunk_429_cast_fp16)[name = tensor("op_2828_cast_fp16")]; tensor var_2829_cast_fp16 = softmax(axis = var_123, x = aw_chunk_431_cast_fp16)[name = tensor("op_2829_cast_fp16")]; tensor var_2830_cast_fp16 = softmax(axis = var_123, x = aw_chunk_433_cast_fp16)[name = tensor("op_2830_cast_fp16")]; tensor var_2831_cast_fp16 = softmax(axis = var_123, x = aw_chunk_435_cast_fp16)[name = tensor("op_2831_cast_fp16")]; tensor var_2832_cast_fp16 = softmax(axis = var_123, x = aw_chunk_437_cast_fp16)[name = tensor("op_2832_cast_fp16")]; tensor var_2833_cast_fp16 = softmax(axis = var_123, x = aw_chunk_439_cast_fp16)[name = tensor("op_2833_cast_fp16")]; tensor var_2834_cast_fp16 = softmax(axis = var_123, x = aw_chunk_441_cast_fp16)[name = tensor("op_2834_cast_fp16")]; tensor var_2835_cast_fp16 = softmax(axis = var_123, x = aw_chunk_443_cast_fp16)[name = tensor("op_2835_cast_fp16")]; tensor var_2836_cast_fp16 = softmax(axis = var_123, x = aw_chunk_445_cast_fp16)[name = tensor("op_2836_cast_fp16")]; tensor var_2837_cast_fp16 = softmax(axis = var_123, x = aw_chunk_447_cast_fp16)[name = tensor("op_2837_cast_fp16")]; tensor var_2838_cast_fp16 = softmax(axis = var_123, x = aw_chunk_449_cast_fp16)[name = tensor("op_2838_cast_fp16")]; tensor var_2839_cast_fp16 = softmax(axis = var_123, x = aw_chunk_451_cast_fp16)[name = tensor("op_2839_cast_fp16")]; tensor var_2840_cast_fp16 = softmax(axis = var_123, x = aw_chunk_453_cast_fp16)[name = tensor("op_2840_cast_fp16")]; tensor var_2841_cast_fp16 = softmax(axis = var_123, x = aw_chunk_455_cast_fp16)[name = tensor("op_2841_cast_fp16")]; tensor var_2842_cast_fp16 = softmax(axis = var_123, x = aw_chunk_457_cast_fp16)[name = tensor("op_2842_cast_fp16")]; tensor var_2843_cast_fp16 = softmax(axis = var_123, x = aw_chunk_459_cast_fp16)[name = tensor("op_2843_cast_fp16")]; tensor var_2844_cast_fp16 = softmax(axis = var_123, x = aw_chunk_461_cast_fp16)[name = tensor("op_2844_cast_fp16")]; tensor var_2845_cast_fp16 = softmax(axis = var_123, x = aw_chunk_463_cast_fp16)[name = tensor("op_2845_cast_fp16")]; tensor var_2846_cast_fp16 = softmax(axis = var_123, x = aw_chunk_465_cast_fp16)[name = tensor("op_2846_cast_fp16")]; tensor var_2847_cast_fp16 = softmax(axis = var_123, x = aw_chunk_467_cast_fp16)[name = tensor("op_2847_cast_fp16")]; tensor var_2848_cast_fp16 = softmax(axis = var_123, x = aw_chunk_469_cast_fp16)[name = tensor("op_2848_cast_fp16")]; tensor var_2849_cast_fp16 = softmax(axis = var_123, x = aw_chunk_471_cast_fp16)[name = tensor("op_2849_cast_fp16")]; tensor var_2850_cast_fp16 = softmax(axis = var_123, x = aw_chunk_473_cast_fp16)[name = tensor("op_2850_cast_fp16")]; tensor var_2851_cast_fp16 = softmax(axis = var_123, x = aw_chunk_475_cast_fp16)[name = tensor("op_2851_cast_fp16")]; tensor var_2852_cast_fp16 = softmax(axis = var_123, x = aw_chunk_477_cast_fp16)[name = tensor("op_2852_cast_fp16")]; tensor var_2853_cast_fp16 = softmax(axis = var_123, x = aw_chunk_479_cast_fp16)[name = tensor("op_2853_cast_fp16")]; tensor var_2854_cast_fp16 = softmax(axis = var_123, x = aw_chunk_481_cast_fp16)[name = tensor("op_2854_cast_fp16")]; tensor var_2855_cast_fp16 = softmax(axis = var_123, x = aw_chunk_483_cast_fp16)[name = tensor("op_2855_cast_fp16")]; tensor var_2856_cast_fp16 = softmax(axis = var_123, x = aw_chunk_485_cast_fp16)[name = tensor("op_2856_cast_fp16")]; tensor var_2857_cast_fp16 = softmax(axis = var_123, x = aw_chunk_487_cast_fp16)[name = tensor("op_2857_cast_fp16")]; tensor var_2858_cast_fp16 = softmax(axis = var_123, x = aw_chunk_489_cast_fp16)[name = tensor("op_2858_cast_fp16")]; tensor var_2859_cast_fp16 = softmax(axis = var_123, x = aw_chunk_491_cast_fp16)[name = tensor("op_2859_cast_fp16")]; tensor var_2860_cast_fp16 = softmax(axis = var_123, x = aw_chunk_493_cast_fp16)[name = tensor("op_2860_cast_fp16")]; tensor var_2861_cast_fp16 = softmax(axis = var_123, x = aw_chunk_495_cast_fp16)[name = tensor("op_2861_cast_fp16")]; tensor var_2862_cast_fp16 = softmax(axis = var_123, x = aw_chunk_497_cast_fp16)[name = tensor("op_2862_cast_fp16")]; tensor var_2863_cast_fp16 = softmax(axis = var_123, x = aw_chunk_499_cast_fp16)[name = tensor("op_2863_cast_fp16")]; tensor var_2864_cast_fp16 = softmax(axis = var_123, x = aw_chunk_501_cast_fp16)[name = tensor("op_2864_cast_fp16")]; tensor var_2865_cast_fp16 = softmax(axis = var_123, x = aw_chunk_503_cast_fp16)[name = tensor("op_2865_cast_fp16")]; tensor var_2866_cast_fp16 = softmax(axis = var_123, x = aw_chunk_505_cast_fp16)[name = tensor("op_2866_cast_fp16")]; tensor var_2867_cast_fp16 = softmax(axis = var_123, x = aw_chunk_507_cast_fp16)[name = tensor("op_2867_cast_fp16")]; tensor var_2868_cast_fp16 = softmax(axis = var_123, x = aw_chunk_509_cast_fp16)[name = tensor("op_2868_cast_fp16")]; tensor var_2869_cast_fp16 = softmax(axis = var_123, x = aw_chunk_511_cast_fp16)[name = tensor("op_2869_cast_fp16")]; tensor var_2871_equation_0 = const()[name = tensor("op_2871_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2871_cast_fp16 = einsum(equation = var_2871_equation_0, values = (var_2519_cast_fp16, var_2806_cast_fp16))[name = tensor("op_2871_cast_fp16")]; tensor var_2873_equation_0 = const()[name = tensor("op_2873_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2873_cast_fp16 = einsum(equation = var_2873_equation_0, values = (var_2519_cast_fp16, var_2807_cast_fp16))[name = tensor("op_2873_cast_fp16")]; tensor var_2875_equation_0 = const()[name = tensor("op_2875_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2875_cast_fp16 = einsum(equation = var_2875_equation_0, values = (var_2519_cast_fp16, var_2808_cast_fp16))[name = tensor("op_2875_cast_fp16")]; tensor var_2877_equation_0 = const()[name = tensor("op_2877_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2877_cast_fp16 = einsum(equation = var_2877_equation_0, values = (var_2519_cast_fp16, var_2809_cast_fp16))[name = tensor("op_2877_cast_fp16")]; tensor var_2879_equation_0 = const()[name = tensor("op_2879_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2879_cast_fp16 = einsum(equation = var_2879_equation_0, values = (var_2519_cast_fp16, var_2810_cast_fp16))[name = tensor("op_2879_cast_fp16")]; tensor var_2881_equation_0 = const()[name = tensor("op_2881_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2881_cast_fp16 = einsum(equation = var_2881_equation_0, values = (var_2519_cast_fp16, var_2811_cast_fp16))[name = tensor("op_2881_cast_fp16")]; tensor var_2883_equation_0 = const()[name = tensor("op_2883_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2883_cast_fp16 = einsum(equation = var_2883_equation_0, values = (var_2519_cast_fp16, var_2812_cast_fp16))[name = tensor("op_2883_cast_fp16")]; tensor var_2885_equation_0 = const()[name = tensor("op_2885_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2885_cast_fp16 = einsum(equation = var_2885_equation_0, values = (var_2519_cast_fp16, var_2813_cast_fp16))[name = tensor("op_2885_cast_fp16")]; tensor var_2887_equation_0 = const()[name = tensor("op_2887_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2887_cast_fp16 = einsum(equation = var_2887_equation_0, values = (var_2523_cast_fp16, var_2814_cast_fp16))[name = tensor("op_2887_cast_fp16")]; tensor var_2889_equation_0 = const()[name = tensor("op_2889_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2889_cast_fp16 = einsum(equation = var_2889_equation_0, values = (var_2523_cast_fp16, var_2815_cast_fp16))[name = tensor("op_2889_cast_fp16")]; tensor var_2891_equation_0 = const()[name = tensor("op_2891_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2891_cast_fp16 = einsum(equation = var_2891_equation_0, values = (var_2523_cast_fp16, var_2816_cast_fp16))[name = tensor("op_2891_cast_fp16")]; tensor var_2893_equation_0 = const()[name = tensor("op_2893_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2893_cast_fp16 = einsum(equation = var_2893_equation_0, values = (var_2523_cast_fp16, var_2817_cast_fp16))[name = tensor("op_2893_cast_fp16")]; tensor var_2895_equation_0 = const()[name = tensor("op_2895_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2895_cast_fp16 = einsum(equation = var_2895_equation_0, values = (var_2523_cast_fp16, var_2818_cast_fp16))[name = tensor("op_2895_cast_fp16")]; tensor var_2897_equation_0 = const()[name = tensor("op_2897_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2897_cast_fp16 = einsum(equation = var_2897_equation_0, values = (var_2523_cast_fp16, var_2819_cast_fp16))[name = tensor("op_2897_cast_fp16")]; tensor var_2899_equation_0 = const()[name = tensor("op_2899_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2899_cast_fp16 = einsum(equation = var_2899_equation_0, values = (var_2523_cast_fp16, var_2820_cast_fp16))[name = tensor("op_2899_cast_fp16")]; tensor var_2901_equation_0 = const()[name = tensor("op_2901_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2901_cast_fp16 = einsum(equation = var_2901_equation_0, values = (var_2523_cast_fp16, var_2821_cast_fp16))[name = tensor("op_2901_cast_fp16")]; tensor var_2903_equation_0 = const()[name = tensor("op_2903_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2903_cast_fp16 = einsum(equation = var_2903_equation_0, values = (var_2527_cast_fp16, var_2822_cast_fp16))[name = tensor("op_2903_cast_fp16")]; tensor var_2905_equation_0 = const()[name = tensor("op_2905_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2905_cast_fp16 = einsum(equation = var_2905_equation_0, values = (var_2527_cast_fp16, var_2823_cast_fp16))[name = tensor("op_2905_cast_fp16")]; tensor var_2907_equation_0 = const()[name = tensor("op_2907_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2907_cast_fp16 = einsum(equation = var_2907_equation_0, values = (var_2527_cast_fp16, var_2824_cast_fp16))[name = tensor("op_2907_cast_fp16")]; tensor var_2909_equation_0 = const()[name = tensor("op_2909_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2909_cast_fp16 = einsum(equation = var_2909_equation_0, values = (var_2527_cast_fp16, var_2825_cast_fp16))[name = tensor("op_2909_cast_fp16")]; tensor var_2911_equation_0 = const()[name = tensor("op_2911_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2911_cast_fp16 = einsum(equation = var_2911_equation_0, values = (var_2527_cast_fp16, var_2826_cast_fp16))[name = tensor("op_2911_cast_fp16")]; tensor var_2913_equation_0 = const()[name = tensor("op_2913_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2913_cast_fp16 = einsum(equation = var_2913_equation_0, values = (var_2527_cast_fp16, var_2827_cast_fp16))[name = tensor("op_2913_cast_fp16")]; tensor var_2915_equation_0 = const()[name = tensor("op_2915_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2915_cast_fp16 = einsum(equation = var_2915_equation_0, values = (var_2527_cast_fp16, var_2828_cast_fp16))[name = tensor("op_2915_cast_fp16")]; tensor var_2917_equation_0 = const()[name = tensor("op_2917_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2917_cast_fp16 = einsum(equation = var_2917_equation_0, values = (var_2527_cast_fp16, var_2829_cast_fp16))[name = tensor("op_2917_cast_fp16")]; tensor var_2919_equation_0 = const()[name = tensor("op_2919_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2919_cast_fp16 = einsum(equation = var_2919_equation_0, values = (var_2531_cast_fp16, var_2830_cast_fp16))[name = tensor("op_2919_cast_fp16")]; tensor var_2921_equation_0 = const()[name = tensor("op_2921_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2921_cast_fp16 = einsum(equation = var_2921_equation_0, values = (var_2531_cast_fp16, var_2831_cast_fp16))[name = tensor("op_2921_cast_fp16")]; tensor var_2923_equation_0 = const()[name = tensor("op_2923_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2923_cast_fp16 = einsum(equation = var_2923_equation_0, values = (var_2531_cast_fp16, var_2832_cast_fp16))[name = tensor("op_2923_cast_fp16")]; tensor var_2925_equation_0 = const()[name = tensor("op_2925_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2925_cast_fp16 = einsum(equation = var_2925_equation_0, values = (var_2531_cast_fp16, var_2833_cast_fp16))[name = tensor("op_2925_cast_fp16")]; tensor var_2927_equation_0 = const()[name = tensor("op_2927_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2927_cast_fp16 = einsum(equation = var_2927_equation_0, values = (var_2531_cast_fp16, var_2834_cast_fp16))[name = tensor("op_2927_cast_fp16")]; tensor var_2929_equation_0 = const()[name = tensor("op_2929_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2929_cast_fp16 = einsum(equation = var_2929_equation_0, values = (var_2531_cast_fp16, var_2835_cast_fp16))[name = tensor("op_2929_cast_fp16")]; tensor var_2931_equation_0 = const()[name = tensor("op_2931_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2931_cast_fp16 = einsum(equation = var_2931_equation_0, values = (var_2531_cast_fp16, var_2836_cast_fp16))[name = tensor("op_2931_cast_fp16")]; tensor var_2933_equation_0 = const()[name = tensor("op_2933_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2933_cast_fp16 = einsum(equation = var_2933_equation_0, values = (var_2531_cast_fp16, var_2837_cast_fp16))[name = tensor("op_2933_cast_fp16")]; tensor var_2935_equation_0 = const()[name = tensor("op_2935_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2935_cast_fp16 = einsum(equation = var_2935_equation_0, values = (var_2535_cast_fp16, var_2838_cast_fp16))[name = tensor("op_2935_cast_fp16")]; tensor var_2937_equation_0 = const()[name = tensor("op_2937_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2937_cast_fp16 = einsum(equation = var_2937_equation_0, values = (var_2535_cast_fp16, var_2839_cast_fp16))[name = tensor("op_2937_cast_fp16")]; tensor var_2939_equation_0 = const()[name = tensor("op_2939_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2939_cast_fp16 = einsum(equation = var_2939_equation_0, values = (var_2535_cast_fp16, var_2840_cast_fp16))[name = tensor("op_2939_cast_fp16")]; tensor var_2941_equation_0 = const()[name = tensor("op_2941_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2941_cast_fp16 = einsum(equation = var_2941_equation_0, values = (var_2535_cast_fp16, var_2841_cast_fp16))[name = tensor("op_2941_cast_fp16")]; tensor var_2943_equation_0 = const()[name = tensor("op_2943_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2943_cast_fp16 = einsum(equation = var_2943_equation_0, values = (var_2535_cast_fp16, var_2842_cast_fp16))[name = tensor("op_2943_cast_fp16")]; tensor var_2945_equation_0 = const()[name = tensor("op_2945_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2945_cast_fp16 = einsum(equation = var_2945_equation_0, values = (var_2535_cast_fp16, var_2843_cast_fp16))[name = tensor("op_2945_cast_fp16")]; tensor var_2947_equation_0 = const()[name = tensor("op_2947_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2947_cast_fp16 = einsum(equation = var_2947_equation_0, values = (var_2535_cast_fp16, var_2844_cast_fp16))[name = tensor("op_2947_cast_fp16")]; tensor var_2949_equation_0 = const()[name = tensor("op_2949_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2949_cast_fp16 = einsum(equation = var_2949_equation_0, values = (var_2535_cast_fp16, var_2845_cast_fp16))[name = tensor("op_2949_cast_fp16")]; tensor var_2951_equation_0 = const()[name = tensor("op_2951_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2951_cast_fp16 = einsum(equation = var_2951_equation_0, values = (var_2539_cast_fp16, var_2846_cast_fp16))[name = tensor("op_2951_cast_fp16")]; tensor var_2953_equation_0 = const()[name = tensor("op_2953_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2953_cast_fp16 = einsum(equation = var_2953_equation_0, values = (var_2539_cast_fp16, var_2847_cast_fp16))[name = tensor("op_2953_cast_fp16")]; tensor var_2955_equation_0 = const()[name = tensor("op_2955_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2955_cast_fp16 = einsum(equation = var_2955_equation_0, values = (var_2539_cast_fp16, var_2848_cast_fp16))[name = tensor("op_2955_cast_fp16")]; tensor var_2957_equation_0 = const()[name = tensor("op_2957_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2957_cast_fp16 = einsum(equation = var_2957_equation_0, values = (var_2539_cast_fp16, var_2849_cast_fp16))[name = tensor("op_2957_cast_fp16")]; tensor var_2959_equation_0 = const()[name = tensor("op_2959_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2959_cast_fp16 = einsum(equation = var_2959_equation_0, values = (var_2539_cast_fp16, var_2850_cast_fp16))[name = tensor("op_2959_cast_fp16")]; tensor var_2961_equation_0 = const()[name = tensor("op_2961_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2961_cast_fp16 = einsum(equation = var_2961_equation_0, values = (var_2539_cast_fp16, var_2851_cast_fp16))[name = tensor("op_2961_cast_fp16")]; tensor var_2963_equation_0 = const()[name = tensor("op_2963_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2963_cast_fp16 = einsum(equation = var_2963_equation_0, values = (var_2539_cast_fp16, var_2852_cast_fp16))[name = tensor("op_2963_cast_fp16")]; tensor var_2965_equation_0 = const()[name = tensor("op_2965_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2965_cast_fp16 = einsum(equation = var_2965_equation_0, values = (var_2539_cast_fp16, var_2853_cast_fp16))[name = tensor("op_2965_cast_fp16")]; tensor var_2967_equation_0 = const()[name = tensor("op_2967_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2967_cast_fp16 = einsum(equation = var_2967_equation_0, values = (var_2543_cast_fp16, var_2854_cast_fp16))[name = tensor("op_2967_cast_fp16")]; tensor var_2969_equation_0 = const()[name = tensor("op_2969_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2969_cast_fp16 = einsum(equation = var_2969_equation_0, values = (var_2543_cast_fp16, var_2855_cast_fp16))[name = tensor("op_2969_cast_fp16")]; tensor var_2971_equation_0 = const()[name = tensor("op_2971_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2971_cast_fp16 = einsum(equation = var_2971_equation_0, values = (var_2543_cast_fp16, var_2856_cast_fp16))[name = tensor("op_2971_cast_fp16")]; tensor var_2973_equation_0 = const()[name = tensor("op_2973_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2973_cast_fp16 = einsum(equation = var_2973_equation_0, values = (var_2543_cast_fp16, var_2857_cast_fp16))[name = tensor("op_2973_cast_fp16")]; tensor var_2975_equation_0 = const()[name = tensor("op_2975_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2975_cast_fp16 = einsum(equation = var_2975_equation_0, values = (var_2543_cast_fp16, var_2858_cast_fp16))[name = tensor("op_2975_cast_fp16")]; tensor var_2977_equation_0 = const()[name = tensor("op_2977_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2977_cast_fp16 = einsum(equation = var_2977_equation_0, values = (var_2543_cast_fp16, var_2859_cast_fp16))[name = tensor("op_2977_cast_fp16")]; tensor var_2979_equation_0 = const()[name = tensor("op_2979_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2979_cast_fp16 = einsum(equation = var_2979_equation_0, values = (var_2543_cast_fp16, var_2860_cast_fp16))[name = tensor("op_2979_cast_fp16")]; tensor var_2981_equation_0 = const()[name = tensor("op_2981_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2981_cast_fp16 = einsum(equation = var_2981_equation_0, values = (var_2543_cast_fp16, var_2861_cast_fp16))[name = tensor("op_2981_cast_fp16")]; tensor var_2983_equation_0 = const()[name = tensor("op_2983_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2983_cast_fp16 = einsum(equation = var_2983_equation_0, values = (var_2547_cast_fp16, var_2862_cast_fp16))[name = tensor("op_2983_cast_fp16")]; tensor var_2985_equation_0 = const()[name = tensor("op_2985_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2985_cast_fp16 = einsum(equation = var_2985_equation_0, values = (var_2547_cast_fp16, var_2863_cast_fp16))[name = tensor("op_2985_cast_fp16")]; tensor var_2987_equation_0 = const()[name = tensor("op_2987_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2987_cast_fp16 = einsum(equation = var_2987_equation_0, values = (var_2547_cast_fp16, var_2864_cast_fp16))[name = tensor("op_2987_cast_fp16")]; tensor var_2989_equation_0 = const()[name = tensor("op_2989_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2989_cast_fp16 = einsum(equation = var_2989_equation_0, values = (var_2547_cast_fp16, var_2865_cast_fp16))[name = tensor("op_2989_cast_fp16")]; tensor var_2991_equation_0 = const()[name = tensor("op_2991_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2991_cast_fp16 = einsum(equation = var_2991_equation_0, values = (var_2547_cast_fp16, var_2866_cast_fp16))[name = tensor("op_2991_cast_fp16")]; tensor var_2993_equation_0 = const()[name = tensor("op_2993_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2993_cast_fp16 = einsum(equation = var_2993_equation_0, values = (var_2547_cast_fp16, var_2867_cast_fp16))[name = tensor("op_2993_cast_fp16")]; tensor var_2995_equation_0 = const()[name = tensor("op_2995_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2995_cast_fp16 = einsum(equation = var_2995_equation_0, values = (var_2547_cast_fp16, var_2868_cast_fp16))[name = tensor("op_2995_cast_fp16")]; tensor var_2997_equation_0 = const()[name = tensor("op_2997_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_2997_cast_fp16 = einsum(equation = var_2997_equation_0, values = (var_2547_cast_fp16, var_2869_cast_fp16))[name = tensor("op_2997_cast_fp16")]; tensor var_2999_interleave_0 = const()[name = tensor("op_2999_interleave_0"), val = tensor(false)]; tensor var_2999_cast_fp16 = concat(axis = var_95, interleave = var_2999_interleave_0, values = (var_2871_cast_fp16, var_2873_cast_fp16, var_2875_cast_fp16, var_2877_cast_fp16, var_2879_cast_fp16, var_2881_cast_fp16, var_2883_cast_fp16, var_2885_cast_fp16))[name = tensor("op_2999_cast_fp16")]; tensor var_3001_interleave_0 = const()[name = tensor("op_3001_interleave_0"), val = tensor(false)]; tensor var_3001_cast_fp16 = concat(axis = var_95, interleave = var_3001_interleave_0, values = (var_2887_cast_fp16, var_2889_cast_fp16, var_2891_cast_fp16, var_2893_cast_fp16, var_2895_cast_fp16, var_2897_cast_fp16, var_2899_cast_fp16, var_2901_cast_fp16))[name = tensor("op_3001_cast_fp16")]; tensor var_3003_interleave_0 = const()[name = tensor("op_3003_interleave_0"), val = tensor(false)]; tensor var_3003_cast_fp16 = concat(axis = var_95, interleave = var_3003_interleave_0, values = (var_2903_cast_fp16, var_2905_cast_fp16, var_2907_cast_fp16, var_2909_cast_fp16, var_2911_cast_fp16, var_2913_cast_fp16, var_2915_cast_fp16, var_2917_cast_fp16))[name = tensor("op_3003_cast_fp16")]; tensor var_3005_interleave_0 = const()[name = tensor("op_3005_interleave_0"), val = tensor(false)]; tensor var_3005_cast_fp16 = concat(axis = var_95, interleave = var_3005_interleave_0, values = (var_2919_cast_fp16, var_2921_cast_fp16, var_2923_cast_fp16, var_2925_cast_fp16, var_2927_cast_fp16, var_2929_cast_fp16, var_2931_cast_fp16, var_2933_cast_fp16))[name = tensor("op_3005_cast_fp16")]; tensor var_3007_interleave_0 = const()[name = tensor("op_3007_interleave_0"), val = tensor(false)]; tensor var_3007_cast_fp16 = concat(axis = var_95, interleave = var_3007_interleave_0, values = (var_2935_cast_fp16, var_2937_cast_fp16, var_2939_cast_fp16, var_2941_cast_fp16, var_2943_cast_fp16, var_2945_cast_fp16, var_2947_cast_fp16, var_2949_cast_fp16))[name = tensor("op_3007_cast_fp16")]; tensor var_3009_interleave_0 = const()[name = tensor("op_3009_interleave_0"), val = tensor(false)]; tensor var_3009_cast_fp16 = concat(axis = var_95, interleave = var_3009_interleave_0, values = (var_2951_cast_fp16, var_2953_cast_fp16, var_2955_cast_fp16, var_2957_cast_fp16, var_2959_cast_fp16, var_2961_cast_fp16, var_2963_cast_fp16, var_2965_cast_fp16))[name = tensor("op_3009_cast_fp16")]; tensor var_3011_interleave_0 = const()[name = tensor("op_3011_interleave_0"), val = tensor(false)]; tensor var_3011_cast_fp16 = concat(axis = var_95, interleave = var_3011_interleave_0, values = (var_2967_cast_fp16, var_2969_cast_fp16, var_2971_cast_fp16, var_2973_cast_fp16, var_2975_cast_fp16, var_2977_cast_fp16, var_2979_cast_fp16, var_2981_cast_fp16))[name = tensor("op_3011_cast_fp16")]; tensor var_3013_interleave_0 = const()[name = tensor("op_3013_interleave_0"), val = tensor(false)]; tensor var_3013_cast_fp16 = concat(axis = var_95, interleave = var_3013_interleave_0, values = (var_2983_cast_fp16, var_2985_cast_fp16, var_2987_cast_fp16, var_2989_cast_fp16, var_2991_cast_fp16, var_2993_cast_fp16, var_2995_cast_fp16, var_2997_cast_fp16))[name = tensor("op_3013_cast_fp16")]; tensor input_53_interleave_0 = const()[name = tensor("input_53_interleave_0"), val = tensor(false)]; tensor input_53_cast_fp16 = concat(axis = var_123, interleave = input_53_interleave_0, values = (var_2999_cast_fp16, var_3001_cast_fp16, var_3003_cast_fp16, var_3005_cast_fp16, var_3007_cast_fp16, var_3009_cast_fp16, var_3011_cast_fp16, var_3013_cast_fp16))[name = tensor("input_53_cast_fp16")]; tensor var_3019 = const()[name = tensor("op_3019"), val = tensor([1, 1])]; tensor var_3021 = const()[name = tensor("op_3021"), val = tensor([1, 1])]; tensor var_3023_pad_type_0 = const()[name = tensor("op_3023_pad_type_0"), val = tensor("custom")]; tensor var_3023_pad_0 = const()[name = tensor("op_3023_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20460608)))]; tensor down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20665472)))]; tensor var_3023_cast_fp16 = conv(bias = down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_3021, groups = var_123, pad = var_3023_pad_0, pad_type = var_3023_pad_type_0, strides = var_3019, weight = down_blocks_0_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_53_cast_fp16)[name = tensor("op_3023_cast_fp16")]; tensor inputs_11_cast_fp16 = add(x = var_3023_cast_fp16, y = inputs_9_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; tensor input_55_axes_0 = const()[name = tensor("input_55_axes_0"), val = tensor([1])]; tensor input_55_gamma_0_to_fp16 = const()[name = tensor("input_55_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20666176)))]; tensor input_55_beta_0_to_fp16 = const()[name = tensor("input_55_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20666880)))]; tensor var_3033_to_fp16 = const()[name = tensor("op_3033_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_55_cast_fp16 = layer_norm(axes = input_55_axes_0, beta = input_55_beta_0_to_fp16, epsilon = var_3033_to_fp16, gamma = input_55_gamma_0_to_fp16, x = inputs_11_cast_fp16)[name = tensor("input_55_cast_fp16")]; tensor var_3049 = const()[name = tensor("op_3049"), val = tensor([1, 1])]; tensor var_3051 = const()[name = tensor("op_3051"), val = tensor([1, 1])]; tensor var_3053_pad_type_0 = const()[name = tensor("op_3053_pad_type_0"), val = tensor("custom")]; tensor var_3053_pad_0 = const()[name = tensor("op_3053_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20667584)))]; tensor down_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22306048)))]; tensor var_3053_cast_fp16 = conv(bias = down_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_3051, groups = var_123, pad = var_3053_pad_0, pad_type = var_3053_pad_type_0, strides = var_3049, weight = down_blocks_0_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_55_cast_fp16)[name = tensor("op_3053_cast_fp16")]; tensor var_3054_split_sizes_0 = const()[name = tensor("op_3054_split_sizes_0"), val = tensor([1280, 1280])]; tensor var_3054_axis_0 = const()[name = tensor("op_3054_axis_0"), val = tensor(1)]; tensor var_3054_cast_fp16_0, tensor var_3054_cast_fp16_1 = split(axis = var_3054_axis_0, split_sizes = var_3054_split_sizes_0, x = var_3053_cast_fp16)[name = tensor("op_3054_cast_fp16")]; tensor var_3056_mode_0 = const()[name = tensor("op_3056_mode_0"), val = tensor("EXACT")]; tensor var_3056_cast_fp16 = gelu(mode = var_3056_mode_0, x = var_3054_cast_fp16_1)[name = tensor("op_3056_cast_fp16")]; tensor input_57_cast_fp16 = mul(x = var_3054_cast_fp16_0, y = var_3056_cast_fp16)[name = tensor("input_57_cast_fp16")]; tensor var_3060 = const()[name = tensor("op_3060"), val = tensor([1, 1])]; tensor var_3062 = const()[name = tensor("op_3062"), val = tensor([1, 1])]; tensor var_3064_pad_type_0 = const()[name = tensor("op_3064_pad_type_0"), val = tensor("custom")]; tensor var_3064_pad_0 = const()[name = tensor("op_3064_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22311232)))]; tensor down_blocks_0_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23130496)))]; tensor var_3064_cast_fp16 = conv(bias = down_blocks_0_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_3062, groups = var_123, pad = var_3064_pad_0, pad_type = var_3064_pad_type_0, strides = var_3060, weight = down_blocks_0_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("op_3064_cast_fp16")]; tensor hidden_states_33_cast_fp16 = add(x = var_3064_cast_fp16, y = inputs_11_cast_fp16)[name = tensor("hidden_states_33_cast_fp16")]; tensor var_3066 = const()[name = tensor("op_3066"), val = tensor([2, 320, 64, 64])]; tensor input_59_cast_fp16 = reshape(shape = var_3066, x = hidden_states_33_cast_fp16)[name = tensor("input_59_cast_fp16")]; tensor var_3070 = const()[name = tensor("op_3070"), val = tensor([1, 1])]; tensor var_3072 = const()[name = tensor("op_3072"), val = tensor([1, 1])]; tensor hidden_states_35_pad_type_0 = const()[name = tensor("hidden_states_35_pad_type_0"), val = tensor("custom")]; tensor hidden_states_35_pad_0 = const()[name = tensor("hidden_states_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_0_attentions_1_proj_out_weight_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23131200)))]; tensor down_blocks_0_attentions_1_proj_out_bias_to_fp16 = const()[name = tensor("down_blocks_0_attentions_1_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23336064)))]; tensor hidden_states_35_cast_fp16 = conv(bias = down_blocks_0_attentions_1_proj_out_bias_to_fp16, dilations = var_3072, groups = var_123, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = var_3070, weight = down_blocks_0_attentions_1_proj_out_weight_to_fp16, x = input_59_cast_fp16)[name = tensor("hidden_states_35_cast_fp16")]; tensor input_61_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("input_61_cast_fp16")]; tensor var_3079 = const()[name = tensor("op_3079"), val = tensor([2, 2])]; tensor var_3081 = const()[name = tensor("op_3081"), val = tensor([1, 1])]; tensor input_63_pad_type_0 = const()[name = tensor("input_63_pad_type_0"), val = tensor("custom")]; tensor input_63_pad_0 = const()[name = tensor("input_63_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_0_downsamplers_0_conv_weight_to_fp16 = const()[name = tensor("down_blocks_0_downsamplers_0_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23336768)))]; tensor down_blocks_0_downsamplers_0_conv_bias_to_fp16 = const()[name = tensor("down_blocks_0_downsamplers_0_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25180032)))]; tensor input_63_cast_fp16 = conv(bias = down_blocks_0_downsamplers_0_conv_bias_to_fp16, dilations = var_3081, groups = var_123, pad = input_63_pad_0, pad_type = input_63_pad_type_0, strides = var_3079, weight = down_blocks_0_downsamplers_0_conv_weight_to_fp16, x = input_61_cast_fp16)[name = tensor("input_63_cast_fp16")]; tensor var_3089 = const()[name = tensor("op_3089"), val = tensor(3)]; tensor var_3111 = const()[name = tensor("op_3111"), val = tensor(1)]; tensor reshape_24_shape_0 = const()[name = tensor("reshape_24_shape_0"), val = tensor([2, 32, 10, 32, 32])]; tensor reshape_24_cast_fp16 = reshape(shape = reshape_24_shape_0, x = input_63_cast_fp16)[name = tensor("reshape_24_cast_fp16")]; tensor reduce_mean_18_axes_0 = const()[name = tensor("reduce_mean_18_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_18_keep_dims_0 = const()[name = tensor("reduce_mean_18_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_18_cast_fp16 = reduce_mean(axes = reduce_mean_18_axes_0, keep_dims = reduce_mean_18_keep_dims_0, x = reshape_24_cast_fp16)[name = tensor("reduce_mean_18_cast_fp16")]; tensor sub_12_cast_fp16 = sub(x = reshape_24_cast_fp16, y = reduce_mean_18_cast_fp16)[name = tensor("sub_12_cast_fp16")]; tensor square_6_cast_fp16 = square(x = sub_12_cast_fp16)[name = tensor("square_6_cast_fp16")]; tensor reduce_mean_20_axes_0 = const()[name = tensor("reduce_mean_20_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_20_keep_dims_0 = const()[name = tensor("reduce_mean_20_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_20_cast_fp16 = reduce_mean(axes = reduce_mean_20_axes_0, keep_dims = reduce_mean_20_keep_dims_0, x = square_6_cast_fp16)[name = tensor("reduce_mean_20_cast_fp16")]; tensor add_12_y_0_to_fp16 = const()[name = tensor("add_12_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_12_cast_fp16 = add(x = reduce_mean_20_cast_fp16, y = add_12_y_0_to_fp16)[name = tensor("add_12_cast_fp16")]; tensor sqrt_6_cast_fp16 = sqrt(x = add_12_cast_fp16)[name = tensor("sqrt_6_cast_fp16")]; tensor real_div_6_cast_fp16 = real_div(x = sub_12_cast_fp16, y = sqrt_6_cast_fp16)[name = tensor("real_div_6_cast_fp16")]; tensor reshape_25_shape_0 = const()[name = tensor("reshape_25_shape_0"), val = tensor([2, 320, 32, 32])]; tensor reshape_25_cast_fp16 = reshape(shape = reshape_25_shape_0, x = real_div_6_cast_fp16)[name = tensor("reshape_25_cast_fp16")]; tensor add_13_gamma_0_to_fp16 = const()[name = tensor("add_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25180736)))]; tensor add_13_beta_0_to_fp16 = const()[name = tensor("add_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25181440)))]; tensor add_13_epsilon_0_to_fp16 = const()[name = tensor("add_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_13_cast_fp16 = batch_norm(beta = add_13_beta_0_to_fp16, epsilon = add_13_epsilon_0_to_fp16, gamma = add_13_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_25_cast_fp16)[name = tensor("add_13_cast_fp16")]; tensor input_67_cast_fp16 = silu(x = add_13_cast_fp16)[name = tensor("input_67_cast_fp16")]; tensor var_3134 = const()[name = tensor("op_3134"), val = tensor([1, 1])]; tensor var_3136 = const()[name = tensor("op_3136"), val = tensor([1, 1])]; tensor hidden_states_37_pad_type_0 = const()[name = tensor("hidden_states_37_pad_type_0"), val = tensor("custom")]; tensor hidden_states_37_pad_0 = const()[name = tensor("hidden_states_37_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_1_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("down_blocks_1_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25182144)))]; tensor down_blocks_1_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("down_blocks_1_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28868608)))]; tensor hidden_states_37_cast_fp16 = conv(bias = down_blocks_1_resnets_0_conv1_bias_to_fp16, dilations = var_3136, groups = var_3111, pad = hidden_states_37_pad_0, pad_type = hidden_states_37_pad_type_0, strides = var_3134, weight = down_blocks_1_resnets_0_conv1_weight_to_fp16, x = input_67_cast_fp16)[name = tensor("hidden_states_37_cast_fp16")]; tensor var_3142 = const()[name = tensor("op_3142"), val = tensor([1, 1])]; tensor var_3144 = const()[name = tensor("op_3144"), val = tensor([1, 1])]; tensor temb_5_pad_type_0 = const()[name = tensor("temb_5_pad_type_0"), val = tensor("custom")]; tensor temb_5_pad_0 = const()[name = tensor("temb_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("down_blocks_1_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(28869952)))]; tensor down_blocks_1_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("down_blocks_1_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30508416)))]; tensor temb_5_cast_fp16 = conv(bias = down_blocks_1_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_3144, groups = var_3111, pad = temb_5_pad_0, pad_type = temb_5_pad_type_0, strides = var_3142, weight = down_blocks_1_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_5_cast_fp16")]; tensor input_71_cast_fp16 = add(x = hidden_states_37_cast_fp16, y = temb_5_cast_fp16)[name = tensor("input_71_cast_fp16")]; tensor reshape_28_shape_0 = const()[name = tensor("reshape_28_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_28_cast_fp16 = reshape(shape = reshape_28_shape_0, x = input_71_cast_fp16)[name = tensor("reshape_28_cast_fp16")]; tensor reduce_mean_21_axes_0 = const()[name = tensor("reduce_mean_21_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_21_keep_dims_0 = const()[name = tensor("reduce_mean_21_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_21_cast_fp16 = reduce_mean(axes = reduce_mean_21_axes_0, keep_dims = reduce_mean_21_keep_dims_0, x = reshape_28_cast_fp16)[name = tensor("reduce_mean_21_cast_fp16")]; tensor sub_14_cast_fp16 = sub(x = reshape_28_cast_fp16, y = reduce_mean_21_cast_fp16)[name = tensor("sub_14_cast_fp16")]; tensor square_7_cast_fp16 = square(x = sub_14_cast_fp16)[name = tensor("square_7_cast_fp16")]; tensor reduce_mean_23_axes_0 = const()[name = tensor("reduce_mean_23_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_23_keep_dims_0 = const()[name = tensor("reduce_mean_23_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_23_cast_fp16 = reduce_mean(axes = reduce_mean_23_axes_0, keep_dims = reduce_mean_23_keep_dims_0, x = square_7_cast_fp16)[name = tensor("reduce_mean_23_cast_fp16")]; tensor add_14_y_0_to_fp16 = const()[name = tensor("add_14_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_14_cast_fp16 = add(x = reduce_mean_23_cast_fp16, y = add_14_y_0_to_fp16)[name = tensor("add_14_cast_fp16")]; tensor sqrt_7_cast_fp16 = sqrt(x = add_14_cast_fp16)[name = tensor("sqrt_7_cast_fp16")]; tensor real_div_7_cast_fp16 = real_div(x = sub_14_cast_fp16, y = sqrt_7_cast_fp16)[name = tensor("real_div_7_cast_fp16")]; tensor reshape_29_shape_0 = const()[name = tensor("reshape_29_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_29_cast_fp16 = reshape(shape = reshape_29_shape_0, x = real_div_7_cast_fp16)[name = tensor("reshape_29_cast_fp16")]; tensor add_15_mean_0_to_fp16 = const()[name = tensor("add_15_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30509760)))]; tensor add_15_variance_0_to_fp16 = const()[name = tensor("add_15_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30511104)))]; tensor add_15_gamma_0_to_fp16 = const()[name = tensor("add_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30512448)))]; tensor add_15_beta_0_to_fp16 = const()[name = tensor("add_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30513792)))]; tensor add_15_epsilon_0_to_fp16 = const()[name = tensor("add_15_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_15_cast_fp16 = batch_norm(beta = add_15_beta_0_to_fp16, epsilon = add_15_epsilon_0_to_fp16, gamma = add_15_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_29_cast_fp16)[name = tensor("add_15_cast_fp16")]; tensor input_75_cast_fp16 = silu(x = add_15_cast_fp16)[name = tensor("input_75_cast_fp16")]; tensor var_3154 = const()[name = tensor("op_3154"), val = tensor([1, 1])]; tensor var_3156 = const()[name = tensor("op_3156"), val = tensor([1, 1])]; tensor hidden_states_39_pad_type_0 = const()[name = tensor("hidden_states_39_pad_type_0"), val = tensor("custom")]; tensor hidden_states_39_pad_0 = const()[name = tensor("hidden_states_39_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_1_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("down_blocks_1_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30515136)))]; tensor down_blocks_1_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("down_blocks_1_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37888000)))]; tensor hidden_states_39_cast_fp16 = conv(bias = down_blocks_1_resnets_0_conv2_bias_to_fp16, dilations = var_3156, groups = var_3111, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = var_3154, weight = down_blocks_1_resnets_0_conv2_weight_to_fp16, x = input_75_cast_fp16)[name = tensor("hidden_states_39_cast_fp16")]; tensor var_3161 = const()[name = tensor("op_3161"), val = tensor([1, 1])]; tensor var_3163 = const()[name = tensor("op_3163"), val = tensor([1, 1])]; tensor x_1_pad_type_0 = const()[name = tensor("x_1_pad_type_0"), val = tensor("custom")]; tensor x_1_pad_0 = const()[name = tensor("x_1_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_resnets_0_conv_shortcut_weight_to_fp16 = const()[name = tensor("down_blocks_1_resnets_0_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(37889344)))]; tensor down_blocks_1_resnets_0_conv_shortcut_bias_to_fp16 = const()[name = tensor("down_blocks_1_resnets_0_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38299008)))]; tensor x_1_cast_fp16 = conv(bias = down_blocks_1_resnets_0_conv_shortcut_bias_to_fp16, dilations = var_3163, groups = var_3111, pad = x_1_pad_0, pad_type = x_1_pad_type_0, strides = var_3161, weight = down_blocks_1_resnets_0_conv_shortcut_weight_to_fp16, x = input_63_cast_fp16)[name = tensor("x_1_cast_fp16")]; tensor hidden_states_41_cast_fp16 = add(x = x_1_cast_fp16, y = hidden_states_39_cast_fp16)[name = tensor("hidden_states_41_cast_fp16")]; tensor reshape_32_shape_0 = const()[name = tensor("reshape_32_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_32_cast_fp16 = reshape(shape = reshape_32_shape_0, x = hidden_states_41_cast_fp16)[name = tensor("reshape_32_cast_fp16")]; tensor reduce_mean_24_axes_0 = const()[name = tensor("reduce_mean_24_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_24_keep_dims_0 = const()[name = tensor("reduce_mean_24_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_24_cast_fp16 = reduce_mean(axes = reduce_mean_24_axes_0, keep_dims = reduce_mean_24_keep_dims_0, x = reshape_32_cast_fp16)[name = tensor("reduce_mean_24_cast_fp16")]; tensor sub_16_cast_fp16 = sub(x = reshape_32_cast_fp16, y = reduce_mean_24_cast_fp16)[name = tensor("sub_16_cast_fp16")]; tensor square_8_cast_fp16 = square(x = sub_16_cast_fp16)[name = tensor("square_8_cast_fp16")]; tensor reduce_mean_26_axes_0 = const()[name = tensor("reduce_mean_26_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_26_keep_dims_0 = const()[name = tensor("reduce_mean_26_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_26_cast_fp16 = reduce_mean(axes = reduce_mean_26_axes_0, keep_dims = reduce_mean_26_keep_dims_0, x = square_8_cast_fp16)[name = tensor("reduce_mean_26_cast_fp16")]; tensor add_16_y_0_to_fp16 = const()[name = tensor("add_16_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_16_cast_fp16 = add(x = reduce_mean_26_cast_fp16, y = add_16_y_0_to_fp16)[name = tensor("add_16_cast_fp16")]; tensor sqrt_8_cast_fp16 = sqrt(x = add_16_cast_fp16)[name = tensor("sqrt_8_cast_fp16")]; tensor real_div_8_cast_fp16 = real_div(x = sub_16_cast_fp16, y = sqrt_8_cast_fp16)[name = tensor("real_div_8_cast_fp16")]; tensor reshape_33_shape_0 = const()[name = tensor("reshape_33_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_33_cast_fp16 = reshape(shape = reshape_33_shape_0, x = real_div_8_cast_fp16)[name = tensor("reshape_33_cast_fp16")]; tensor add_17_gamma_0_to_fp16 = const()[name = tensor("add_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38300352)))]; tensor add_17_beta_0_to_fp16 = const()[name = tensor("add_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38301696)))]; tensor add_17_epsilon_0_to_fp16 = const()[name = tensor("add_17_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_17_cast_fp16 = batch_norm(beta = add_17_beta_0_to_fp16, epsilon = add_17_epsilon_0_to_fp16, gamma = add_17_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_33_cast_fp16)[name = tensor("add_17_cast_fp16")]; tensor var_3183 = const()[name = tensor("op_3183"), val = tensor([1, 1])]; tensor var_3185 = const()[name = tensor("op_3185"), val = tensor([1, 1])]; tensor hidden_states_43_pad_type_0 = const()[name = tensor("hidden_states_43_pad_type_0"), val = tensor("custom")]; tensor hidden_states_43_pad_0 = const()[name = tensor("hidden_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_proj_in_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38303040)))]; tensor down_blocks_1_attentions_0_proj_in_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39122304)))]; tensor hidden_states_43_cast_fp16 = conv(bias = down_blocks_1_attentions_0_proj_in_bias_to_fp16, dilations = var_3185, groups = var_3111, pad = hidden_states_43_pad_0, pad_type = hidden_states_43_pad_type_0, strides = var_3183, weight = down_blocks_1_attentions_0_proj_in_weight_to_fp16, x = add_17_cast_fp16)[name = tensor("hidden_states_43_cast_fp16")]; tensor var_3190 = const()[name = tensor("op_3190"), val = tensor([2, 640, 1, 1024])]; tensor inputs_13_cast_fp16 = reshape(shape = var_3190, x = hidden_states_43_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; tensor hidden_states_45_axes_0 = const()[name = tensor("hidden_states_45_axes_0"), val = tensor([1])]; tensor hidden_states_45_gamma_0_to_fp16 = const()[name = tensor("hidden_states_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39123648)))]; tensor hidden_states_45_beta_0_to_fp16 = const()[name = tensor("hidden_states_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39124992)))]; tensor var_3206_to_fp16 = const()[name = tensor("op_3206_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_45_cast_fp16 = layer_norm(axes = hidden_states_45_axes_0, beta = hidden_states_45_beta_0_to_fp16, epsilon = var_3206_to_fp16, gamma = hidden_states_45_gamma_0_to_fp16, x = inputs_13_cast_fp16)[name = tensor("hidden_states_45_cast_fp16")]; tensor var_3221 = const()[name = tensor("op_3221"), val = tensor([1, 1])]; tensor var_3223 = const()[name = tensor("op_3223"), val = tensor([1, 1])]; tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39126336)))]; tensor q_9_cast_fp16 = conv(dilations = var_3223, groups = var_3111, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_3221, weight = down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_45_cast_fp16)[name = tensor("q_9_cast_fp16")]; tensor var_3227 = const()[name = tensor("op_3227"), val = tensor([1, 1])]; tensor var_3229 = const()[name = tensor("op_3229"), val = tensor([1, 1])]; tensor k_17_pad_type_0 = const()[name = tensor("k_17_pad_type_0"), val = tensor("custom")]; tensor k_17_pad_0 = const()[name = tensor("k_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39945600)))]; tensor k_17_cast_fp16 = conv(dilations = var_3229, groups = var_3111, pad = k_17_pad_0, pad_type = k_17_pad_type_0, strides = var_3227, weight = down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_45_cast_fp16)[name = tensor("k_17_cast_fp16")]; tensor var_3233 = const()[name = tensor("op_3233"), val = tensor([1, 1])]; tensor var_3235 = const()[name = tensor("op_3235"), val = tensor([1, 1])]; tensor v_9_pad_type_0 = const()[name = tensor("v_9_pad_type_0"), val = tensor("custom")]; tensor v_9_pad_0 = const()[name = tensor("v_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40764864)))]; tensor v_9_cast_fp16 = conv(dilations = var_3235, groups = var_3111, pad = v_9_pad_0, pad_type = v_9_pad_type_0, strides = var_3233, weight = down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_45_cast_fp16)[name = tensor("v_9_cast_fp16")]; tensor var_3239_begin_0 = const()[name = tensor("op_3239_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3239_end_0 = const()[name = tensor("op_3239_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3239_end_mask_0 = const()[name = tensor("op_3239_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3239_cast_fp16 = slice_by_index(begin = var_3239_begin_0, end = var_3239_end_0, end_mask = var_3239_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_3239_cast_fp16")]; tensor var_3243_begin_0 = const()[name = tensor("op_3243_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_3243_end_0 = const()[name = tensor("op_3243_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_3243_end_mask_0 = const()[name = tensor("op_3243_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3243_cast_fp16 = slice_by_index(begin = var_3243_begin_0, end = var_3243_end_0, end_mask = var_3243_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_3243_cast_fp16")]; tensor var_3247_begin_0 = const()[name = tensor("op_3247_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_3247_end_0 = const()[name = tensor("op_3247_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_3247_end_mask_0 = const()[name = tensor("op_3247_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3247_cast_fp16 = slice_by_index(begin = var_3247_begin_0, end = var_3247_end_0, end_mask = var_3247_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_3247_cast_fp16")]; tensor var_3251_begin_0 = const()[name = tensor("op_3251_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_3251_end_0 = const()[name = tensor("op_3251_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_3251_end_mask_0 = const()[name = tensor("op_3251_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3251_cast_fp16 = slice_by_index(begin = var_3251_begin_0, end = var_3251_end_0, end_mask = var_3251_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_3251_cast_fp16")]; tensor var_3255_begin_0 = const()[name = tensor("op_3255_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3255_end_0 = const()[name = tensor("op_3255_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_3255_end_mask_0 = const()[name = tensor("op_3255_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3255_cast_fp16 = slice_by_index(begin = var_3255_begin_0, end = var_3255_end_0, end_mask = var_3255_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_3255_cast_fp16")]; tensor var_3259_begin_0 = const()[name = tensor("op_3259_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_3259_end_0 = const()[name = tensor("op_3259_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_3259_end_mask_0 = const()[name = tensor("op_3259_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3259_cast_fp16 = slice_by_index(begin = var_3259_begin_0, end = var_3259_end_0, end_mask = var_3259_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_3259_cast_fp16")]; tensor var_3263_begin_0 = const()[name = tensor("op_3263_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_3263_end_0 = const()[name = tensor("op_3263_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_3263_end_mask_0 = const()[name = tensor("op_3263_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3263_cast_fp16 = slice_by_index(begin = var_3263_begin_0, end = var_3263_end_0, end_mask = var_3263_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_3263_cast_fp16")]; tensor var_3267_begin_0 = const()[name = tensor("op_3267_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_3267_end_0 = const()[name = tensor("op_3267_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_3267_end_mask_0 = const()[name = tensor("op_3267_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3267_cast_fp16 = slice_by_index(begin = var_3267_begin_0, end = var_3267_end_0, end_mask = var_3267_end_mask_0, x = q_9_cast_fp16)[name = tensor("op_3267_cast_fp16")]; tensor var_3270_begin_0 = const()[name = tensor("op_3270_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3270_end_0 = const()[name = tensor("op_3270_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3270_end_mask_0 = const()[name = tensor("op_3270_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3270_cast_fp16 = slice_by_index(begin = var_3270_begin_0, end = var_3270_end_0, end_mask = var_3270_end_mask_0, x = var_3239_cast_fp16)[name = tensor("op_3270_cast_fp16")]; tensor var_3271_begin_0 = const()[name = tensor("op_3271_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3271_end_0 = const()[name = tensor("op_3271_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3271_end_mask_0 = const()[name = tensor("op_3271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3271_cast_fp16 = slice_by_index(begin = var_3271_begin_0, end = var_3271_end_0, end_mask = var_3271_end_mask_0, x = var_3239_cast_fp16)[name = tensor("op_3271_cast_fp16")]; tensor var_3272_begin_0 = const()[name = tensor("op_3272_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3272_end_0 = const()[name = tensor("op_3272_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3272_end_mask_0 = const()[name = tensor("op_3272_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3272_cast_fp16 = slice_by_index(begin = var_3272_begin_0, end = var_3272_end_0, end_mask = var_3272_end_mask_0, x = var_3243_cast_fp16)[name = tensor("op_3272_cast_fp16")]; tensor var_3273_begin_0 = const()[name = tensor("op_3273_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3273_end_0 = const()[name = tensor("op_3273_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3273_end_mask_0 = const()[name = tensor("op_3273_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3273_cast_fp16 = slice_by_index(begin = var_3273_begin_0, end = var_3273_end_0, end_mask = var_3273_end_mask_0, x = var_3243_cast_fp16)[name = tensor("op_3273_cast_fp16")]; tensor var_3274_begin_0 = const()[name = tensor("op_3274_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3274_end_0 = const()[name = tensor("op_3274_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3274_end_mask_0 = const()[name = tensor("op_3274_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3274_cast_fp16 = slice_by_index(begin = var_3274_begin_0, end = var_3274_end_0, end_mask = var_3274_end_mask_0, x = var_3247_cast_fp16)[name = tensor("op_3274_cast_fp16")]; tensor var_3275_begin_0 = const()[name = tensor("op_3275_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3275_end_0 = const()[name = tensor("op_3275_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3275_end_mask_0 = const()[name = tensor("op_3275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3275_cast_fp16 = slice_by_index(begin = var_3275_begin_0, end = var_3275_end_0, end_mask = var_3275_end_mask_0, x = var_3247_cast_fp16)[name = tensor("op_3275_cast_fp16")]; tensor var_3276_begin_0 = const()[name = tensor("op_3276_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3276_end_0 = const()[name = tensor("op_3276_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3276_end_mask_0 = const()[name = tensor("op_3276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3276_cast_fp16 = slice_by_index(begin = var_3276_begin_0, end = var_3276_end_0, end_mask = var_3276_end_mask_0, x = var_3251_cast_fp16)[name = tensor("op_3276_cast_fp16")]; tensor var_3277_begin_0 = const()[name = tensor("op_3277_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3277_end_0 = const()[name = tensor("op_3277_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3277_end_mask_0 = const()[name = tensor("op_3277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3277_cast_fp16 = slice_by_index(begin = var_3277_begin_0, end = var_3277_end_0, end_mask = var_3277_end_mask_0, x = var_3251_cast_fp16)[name = tensor("op_3277_cast_fp16")]; tensor var_3278_begin_0 = const()[name = tensor("op_3278_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3278_end_0 = const()[name = tensor("op_3278_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3278_end_mask_0 = const()[name = tensor("op_3278_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3278_cast_fp16 = slice_by_index(begin = var_3278_begin_0, end = var_3278_end_0, end_mask = var_3278_end_mask_0, x = var_3255_cast_fp16)[name = tensor("op_3278_cast_fp16")]; tensor var_3279_begin_0 = const()[name = tensor("op_3279_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3279_end_0 = const()[name = tensor("op_3279_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3279_end_mask_0 = const()[name = tensor("op_3279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3279_cast_fp16 = slice_by_index(begin = var_3279_begin_0, end = var_3279_end_0, end_mask = var_3279_end_mask_0, x = var_3255_cast_fp16)[name = tensor("op_3279_cast_fp16")]; tensor var_3280_begin_0 = const()[name = tensor("op_3280_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3280_end_0 = const()[name = tensor("op_3280_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3280_end_mask_0 = const()[name = tensor("op_3280_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3280_cast_fp16 = slice_by_index(begin = var_3280_begin_0, end = var_3280_end_0, end_mask = var_3280_end_mask_0, x = var_3259_cast_fp16)[name = tensor("op_3280_cast_fp16")]; tensor var_3281_begin_0 = const()[name = tensor("op_3281_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3281_end_0 = const()[name = tensor("op_3281_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3281_end_mask_0 = const()[name = tensor("op_3281_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3281_cast_fp16 = slice_by_index(begin = var_3281_begin_0, end = var_3281_end_0, end_mask = var_3281_end_mask_0, x = var_3259_cast_fp16)[name = tensor("op_3281_cast_fp16")]; tensor var_3282_begin_0 = const()[name = tensor("op_3282_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3282_end_0 = const()[name = tensor("op_3282_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3282_end_mask_0 = const()[name = tensor("op_3282_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3282_cast_fp16 = slice_by_index(begin = var_3282_begin_0, end = var_3282_end_0, end_mask = var_3282_end_mask_0, x = var_3263_cast_fp16)[name = tensor("op_3282_cast_fp16")]; tensor var_3283_begin_0 = const()[name = tensor("op_3283_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3283_end_0 = const()[name = tensor("op_3283_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3283_end_mask_0 = const()[name = tensor("op_3283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3283_cast_fp16 = slice_by_index(begin = var_3283_begin_0, end = var_3283_end_0, end_mask = var_3283_end_mask_0, x = var_3263_cast_fp16)[name = tensor("op_3283_cast_fp16")]; tensor var_3284_begin_0 = const()[name = tensor("op_3284_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3284_end_0 = const()[name = tensor("op_3284_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3284_end_mask_0 = const()[name = tensor("op_3284_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3284_cast_fp16 = slice_by_index(begin = var_3284_begin_0, end = var_3284_end_0, end_mask = var_3284_end_mask_0, x = var_3267_cast_fp16)[name = tensor("op_3284_cast_fp16")]; tensor var_3285_begin_0 = const()[name = tensor("op_3285_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3285_end_0 = const()[name = tensor("op_3285_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3285_end_mask_0 = const()[name = tensor("op_3285_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3285_cast_fp16 = slice_by_index(begin = var_3285_begin_0, end = var_3285_end_0, end_mask = var_3285_end_mask_0, x = var_3267_cast_fp16)[name = tensor("op_3285_cast_fp16")]; tensor k_19_perm_0 = const()[name = tensor("k_19_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_3290_begin_0 = const()[name = tensor("op_3290_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3290_end_0 = const()[name = tensor("op_3290_end_0"), val = tensor([2, 1024, 1, 80])]; tensor var_3290_end_mask_0 = const()[name = tensor("op_3290_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_27 = transpose(perm = k_19_perm_0, x = k_17_cast_fp16)[name = tensor("transpose_27")]; tensor var_3290_cast_fp16 = slice_by_index(begin = var_3290_begin_0, end = var_3290_end_0, end_mask = var_3290_end_mask_0, x = transpose_27)[name = tensor("op_3290_cast_fp16")]; tensor var_3294_begin_0 = const()[name = tensor("op_3294_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_3294_end_0 = const()[name = tensor("op_3294_end_0"), val = tensor([2, 1024, 1, 160])]; tensor var_3294_end_mask_0 = const()[name = tensor("op_3294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3294_cast_fp16 = slice_by_index(begin = var_3294_begin_0, end = var_3294_end_0, end_mask = var_3294_end_mask_0, x = transpose_27)[name = tensor("op_3294_cast_fp16")]; tensor var_3298_begin_0 = const()[name = tensor("op_3298_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_3298_end_0 = const()[name = tensor("op_3298_end_0"), val = tensor([2, 1024, 1, 240])]; tensor var_3298_end_mask_0 = const()[name = tensor("op_3298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3298_cast_fp16 = slice_by_index(begin = var_3298_begin_0, end = var_3298_end_0, end_mask = var_3298_end_mask_0, x = transpose_27)[name = tensor("op_3298_cast_fp16")]; tensor var_3302_begin_0 = const()[name = tensor("op_3302_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_3302_end_0 = const()[name = tensor("op_3302_end_0"), val = tensor([2, 1024, 1, 320])]; tensor var_3302_end_mask_0 = const()[name = tensor("op_3302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3302_cast_fp16 = slice_by_index(begin = var_3302_begin_0, end = var_3302_end_0, end_mask = var_3302_end_mask_0, x = transpose_27)[name = tensor("op_3302_cast_fp16")]; tensor var_3306_begin_0 = const()[name = tensor("op_3306_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_3306_end_0 = const()[name = tensor("op_3306_end_0"), val = tensor([2, 1024, 1, 400])]; tensor var_3306_end_mask_0 = const()[name = tensor("op_3306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3306_cast_fp16 = slice_by_index(begin = var_3306_begin_0, end = var_3306_end_0, end_mask = var_3306_end_mask_0, x = transpose_27)[name = tensor("op_3306_cast_fp16")]; tensor var_3310_begin_0 = const()[name = tensor("op_3310_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_3310_end_0 = const()[name = tensor("op_3310_end_0"), val = tensor([2, 1024, 1, 480])]; tensor var_3310_end_mask_0 = const()[name = tensor("op_3310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3310_cast_fp16 = slice_by_index(begin = var_3310_begin_0, end = var_3310_end_0, end_mask = var_3310_end_mask_0, x = transpose_27)[name = tensor("op_3310_cast_fp16")]; tensor var_3314_begin_0 = const()[name = tensor("op_3314_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_3314_end_0 = const()[name = tensor("op_3314_end_0"), val = tensor([2, 1024, 1, 560])]; tensor var_3314_end_mask_0 = const()[name = tensor("op_3314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3314_cast_fp16 = slice_by_index(begin = var_3314_begin_0, end = var_3314_end_0, end_mask = var_3314_end_mask_0, x = transpose_27)[name = tensor("op_3314_cast_fp16")]; tensor var_3318_begin_0 = const()[name = tensor("op_3318_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_3318_end_0 = const()[name = tensor("op_3318_end_0"), val = tensor([2, 1024, 1, 640])]; tensor var_3318_end_mask_0 = const()[name = tensor("op_3318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3318_cast_fp16 = slice_by_index(begin = var_3318_begin_0, end = var_3318_end_0, end_mask = var_3318_end_mask_0, x = transpose_27)[name = tensor("op_3318_cast_fp16")]; tensor var_3320_begin_0 = const()[name = tensor("op_3320_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3320_end_0 = const()[name = tensor("op_3320_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3320_end_mask_0 = const()[name = tensor("op_3320_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3320_cast_fp16 = slice_by_index(begin = var_3320_begin_0, end = var_3320_end_0, end_mask = var_3320_end_mask_0, x = v_9_cast_fp16)[name = tensor("op_3320_cast_fp16")]; tensor var_3324_begin_0 = const()[name = tensor("op_3324_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_3324_end_0 = const()[name = tensor("op_3324_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_3324_end_mask_0 = const()[name = tensor("op_3324_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3324_cast_fp16 = slice_by_index(begin = var_3324_begin_0, end = var_3324_end_0, end_mask = var_3324_end_mask_0, x = v_9_cast_fp16)[name = tensor("op_3324_cast_fp16")]; tensor var_3328_begin_0 = const()[name = tensor("op_3328_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_3328_end_0 = const()[name = tensor("op_3328_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_3328_end_mask_0 = const()[name = tensor("op_3328_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3328_cast_fp16 = slice_by_index(begin = var_3328_begin_0, end = var_3328_end_0, end_mask = var_3328_end_mask_0, x = v_9_cast_fp16)[name = tensor("op_3328_cast_fp16")]; tensor var_3332_begin_0 = const()[name = tensor("op_3332_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_3332_end_0 = const()[name = tensor("op_3332_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_3332_end_mask_0 = const()[name = tensor("op_3332_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3332_cast_fp16 = slice_by_index(begin = var_3332_begin_0, end = var_3332_end_0, end_mask = var_3332_end_mask_0, x = v_9_cast_fp16)[name = tensor("op_3332_cast_fp16")]; tensor var_3336_begin_0 = const()[name = tensor("op_3336_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3336_end_0 = const()[name = tensor("op_3336_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_3336_end_mask_0 = const()[name = tensor("op_3336_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3336_cast_fp16 = slice_by_index(begin = var_3336_begin_0, end = var_3336_end_0, end_mask = var_3336_end_mask_0, x = v_9_cast_fp16)[name = tensor("op_3336_cast_fp16")]; tensor var_3340_begin_0 = const()[name = tensor("op_3340_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_3340_end_0 = const()[name = tensor("op_3340_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_3340_end_mask_0 = const()[name = tensor("op_3340_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3340_cast_fp16 = slice_by_index(begin = var_3340_begin_0, end = var_3340_end_0, end_mask = var_3340_end_mask_0, x = v_9_cast_fp16)[name = tensor("op_3340_cast_fp16")]; tensor var_3344_begin_0 = const()[name = tensor("op_3344_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_3344_end_0 = const()[name = tensor("op_3344_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_3344_end_mask_0 = const()[name = tensor("op_3344_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3344_cast_fp16 = slice_by_index(begin = var_3344_begin_0, end = var_3344_end_0, end_mask = var_3344_end_mask_0, x = v_9_cast_fp16)[name = tensor("op_3344_cast_fp16")]; tensor var_3348_begin_0 = const()[name = tensor("op_3348_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_3348_end_0 = const()[name = tensor("op_3348_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_3348_end_mask_0 = const()[name = tensor("op_3348_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3348_cast_fp16 = slice_by_index(begin = var_3348_begin_0, end = var_3348_end_0, end_mask = var_3348_end_mask_0, x = v_9_cast_fp16)[name = tensor("op_3348_cast_fp16")]; tensor var_3352_equation_0 = const()[name = tensor("op_3352_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3352_cast_fp16 = einsum(equation = var_3352_equation_0, values = (var_3290_cast_fp16, var_3270_cast_fp16))[name = tensor("op_3352_cast_fp16")]; tensor var_3353_to_fp16 = const()[name = tensor("op_3353_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_513_cast_fp16 = mul(x = var_3352_cast_fp16, y = var_3353_to_fp16)[name = tensor("aw_chunk_513_cast_fp16")]; tensor var_3356_equation_0 = const()[name = tensor("op_3356_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3356_cast_fp16 = einsum(equation = var_3356_equation_0, values = (var_3290_cast_fp16, var_3271_cast_fp16))[name = tensor("op_3356_cast_fp16")]; tensor var_3357_to_fp16 = const()[name = tensor("op_3357_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_515_cast_fp16 = mul(x = var_3356_cast_fp16, y = var_3357_to_fp16)[name = tensor("aw_chunk_515_cast_fp16")]; tensor var_3360_equation_0 = const()[name = tensor("op_3360_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3360_cast_fp16 = einsum(equation = var_3360_equation_0, values = (var_3294_cast_fp16, var_3272_cast_fp16))[name = tensor("op_3360_cast_fp16")]; tensor var_3361_to_fp16 = const()[name = tensor("op_3361_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_517_cast_fp16 = mul(x = var_3360_cast_fp16, y = var_3361_to_fp16)[name = tensor("aw_chunk_517_cast_fp16")]; tensor var_3364_equation_0 = const()[name = tensor("op_3364_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3364_cast_fp16 = einsum(equation = var_3364_equation_0, values = (var_3294_cast_fp16, var_3273_cast_fp16))[name = tensor("op_3364_cast_fp16")]; tensor var_3365_to_fp16 = const()[name = tensor("op_3365_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_519_cast_fp16 = mul(x = var_3364_cast_fp16, y = var_3365_to_fp16)[name = tensor("aw_chunk_519_cast_fp16")]; tensor var_3368_equation_0 = const()[name = tensor("op_3368_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3368_cast_fp16 = einsum(equation = var_3368_equation_0, values = (var_3298_cast_fp16, var_3274_cast_fp16))[name = tensor("op_3368_cast_fp16")]; tensor var_3369_to_fp16 = const()[name = tensor("op_3369_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_521_cast_fp16 = mul(x = var_3368_cast_fp16, y = var_3369_to_fp16)[name = tensor("aw_chunk_521_cast_fp16")]; tensor var_3372_equation_0 = const()[name = tensor("op_3372_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3372_cast_fp16 = einsum(equation = var_3372_equation_0, values = (var_3298_cast_fp16, var_3275_cast_fp16))[name = tensor("op_3372_cast_fp16")]; tensor var_3373_to_fp16 = const()[name = tensor("op_3373_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_523_cast_fp16 = mul(x = var_3372_cast_fp16, y = var_3373_to_fp16)[name = tensor("aw_chunk_523_cast_fp16")]; tensor var_3376_equation_0 = const()[name = tensor("op_3376_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3376_cast_fp16 = einsum(equation = var_3376_equation_0, values = (var_3302_cast_fp16, var_3276_cast_fp16))[name = tensor("op_3376_cast_fp16")]; tensor var_3377_to_fp16 = const()[name = tensor("op_3377_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_525_cast_fp16 = mul(x = var_3376_cast_fp16, y = var_3377_to_fp16)[name = tensor("aw_chunk_525_cast_fp16")]; tensor var_3380_equation_0 = const()[name = tensor("op_3380_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3380_cast_fp16 = einsum(equation = var_3380_equation_0, values = (var_3302_cast_fp16, var_3277_cast_fp16))[name = tensor("op_3380_cast_fp16")]; tensor var_3381_to_fp16 = const()[name = tensor("op_3381_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_527_cast_fp16 = mul(x = var_3380_cast_fp16, y = var_3381_to_fp16)[name = tensor("aw_chunk_527_cast_fp16")]; tensor var_3384_equation_0 = const()[name = tensor("op_3384_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3384_cast_fp16 = einsum(equation = var_3384_equation_0, values = (var_3306_cast_fp16, var_3278_cast_fp16))[name = tensor("op_3384_cast_fp16")]; tensor var_3385_to_fp16 = const()[name = tensor("op_3385_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_529_cast_fp16 = mul(x = var_3384_cast_fp16, y = var_3385_to_fp16)[name = tensor("aw_chunk_529_cast_fp16")]; tensor var_3388_equation_0 = const()[name = tensor("op_3388_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3388_cast_fp16 = einsum(equation = var_3388_equation_0, values = (var_3306_cast_fp16, var_3279_cast_fp16))[name = tensor("op_3388_cast_fp16")]; tensor var_3389_to_fp16 = const()[name = tensor("op_3389_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_531_cast_fp16 = mul(x = var_3388_cast_fp16, y = var_3389_to_fp16)[name = tensor("aw_chunk_531_cast_fp16")]; tensor var_3392_equation_0 = const()[name = tensor("op_3392_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3392_cast_fp16 = einsum(equation = var_3392_equation_0, values = (var_3310_cast_fp16, var_3280_cast_fp16))[name = tensor("op_3392_cast_fp16")]; tensor var_3393_to_fp16 = const()[name = tensor("op_3393_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_533_cast_fp16 = mul(x = var_3392_cast_fp16, y = var_3393_to_fp16)[name = tensor("aw_chunk_533_cast_fp16")]; tensor var_3396_equation_0 = const()[name = tensor("op_3396_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3396_cast_fp16 = einsum(equation = var_3396_equation_0, values = (var_3310_cast_fp16, var_3281_cast_fp16))[name = tensor("op_3396_cast_fp16")]; tensor var_3397_to_fp16 = const()[name = tensor("op_3397_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_535_cast_fp16 = mul(x = var_3396_cast_fp16, y = var_3397_to_fp16)[name = tensor("aw_chunk_535_cast_fp16")]; tensor var_3400_equation_0 = const()[name = tensor("op_3400_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3400_cast_fp16 = einsum(equation = var_3400_equation_0, values = (var_3314_cast_fp16, var_3282_cast_fp16))[name = tensor("op_3400_cast_fp16")]; tensor var_3401_to_fp16 = const()[name = tensor("op_3401_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_537_cast_fp16 = mul(x = var_3400_cast_fp16, y = var_3401_to_fp16)[name = tensor("aw_chunk_537_cast_fp16")]; tensor var_3404_equation_0 = const()[name = tensor("op_3404_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3404_cast_fp16 = einsum(equation = var_3404_equation_0, values = (var_3314_cast_fp16, var_3283_cast_fp16))[name = tensor("op_3404_cast_fp16")]; tensor var_3405_to_fp16 = const()[name = tensor("op_3405_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_539_cast_fp16 = mul(x = var_3404_cast_fp16, y = var_3405_to_fp16)[name = tensor("aw_chunk_539_cast_fp16")]; tensor var_3408_equation_0 = const()[name = tensor("op_3408_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3408_cast_fp16 = einsum(equation = var_3408_equation_0, values = (var_3318_cast_fp16, var_3284_cast_fp16))[name = tensor("op_3408_cast_fp16")]; tensor var_3409_to_fp16 = const()[name = tensor("op_3409_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_541_cast_fp16 = mul(x = var_3408_cast_fp16, y = var_3409_to_fp16)[name = tensor("aw_chunk_541_cast_fp16")]; tensor var_3412_equation_0 = const()[name = tensor("op_3412_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3412_cast_fp16 = einsum(equation = var_3412_equation_0, values = (var_3318_cast_fp16, var_3285_cast_fp16))[name = tensor("op_3412_cast_fp16")]; tensor var_3413_to_fp16 = const()[name = tensor("op_3413_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_543_cast_fp16 = mul(x = var_3412_cast_fp16, y = var_3413_to_fp16)[name = tensor("aw_chunk_543_cast_fp16")]; tensor var_3415_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_513_cast_fp16)[name = tensor("op_3415_cast_fp16")]; tensor var_3416_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_515_cast_fp16)[name = tensor("op_3416_cast_fp16")]; tensor var_3417_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_517_cast_fp16)[name = tensor("op_3417_cast_fp16")]; tensor var_3418_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_519_cast_fp16)[name = tensor("op_3418_cast_fp16")]; tensor var_3419_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_521_cast_fp16)[name = tensor("op_3419_cast_fp16")]; tensor var_3420_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_523_cast_fp16)[name = tensor("op_3420_cast_fp16")]; tensor var_3421_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_525_cast_fp16)[name = tensor("op_3421_cast_fp16")]; tensor var_3422_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_527_cast_fp16)[name = tensor("op_3422_cast_fp16")]; tensor var_3423_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_529_cast_fp16)[name = tensor("op_3423_cast_fp16")]; tensor var_3424_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_531_cast_fp16)[name = tensor("op_3424_cast_fp16")]; tensor var_3425_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_533_cast_fp16)[name = tensor("op_3425_cast_fp16")]; tensor var_3426_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_535_cast_fp16)[name = tensor("op_3426_cast_fp16")]; tensor var_3427_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_537_cast_fp16)[name = tensor("op_3427_cast_fp16")]; tensor var_3428_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_539_cast_fp16)[name = tensor("op_3428_cast_fp16")]; tensor var_3429_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_541_cast_fp16)[name = tensor("op_3429_cast_fp16")]; tensor var_3430_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_543_cast_fp16)[name = tensor("op_3430_cast_fp16")]; tensor var_3432_equation_0 = const()[name = tensor("op_3432_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3432_cast_fp16 = einsum(equation = var_3432_equation_0, values = (var_3320_cast_fp16, var_3415_cast_fp16))[name = tensor("op_3432_cast_fp16")]; tensor var_3434_equation_0 = const()[name = tensor("op_3434_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3434_cast_fp16 = einsum(equation = var_3434_equation_0, values = (var_3320_cast_fp16, var_3416_cast_fp16))[name = tensor("op_3434_cast_fp16")]; tensor var_3436_equation_0 = const()[name = tensor("op_3436_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3436_cast_fp16 = einsum(equation = var_3436_equation_0, values = (var_3324_cast_fp16, var_3417_cast_fp16))[name = tensor("op_3436_cast_fp16")]; tensor var_3438_equation_0 = const()[name = tensor("op_3438_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3438_cast_fp16 = einsum(equation = var_3438_equation_0, values = (var_3324_cast_fp16, var_3418_cast_fp16))[name = tensor("op_3438_cast_fp16")]; tensor var_3440_equation_0 = const()[name = tensor("op_3440_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3440_cast_fp16 = einsum(equation = var_3440_equation_0, values = (var_3328_cast_fp16, var_3419_cast_fp16))[name = tensor("op_3440_cast_fp16")]; tensor var_3442_equation_0 = const()[name = tensor("op_3442_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3442_cast_fp16 = einsum(equation = var_3442_equation_0, values = (var_3328_cast_fp16, var_3420_cast_fp16))[name = tensor("op_3442_cast_fp16")]; tensor var_3444_equation_0 = const()[name = tensor("op_3444_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3444_cast_fp16 = einsum(equation = var_3444_equation_0, values = (var_3332_cast_fp16, var_3421_cast_fp16))[name = tensor("op_3444_cast_fp16")]; tensor var_3446_equation_0 = const()[name = tensor("op_3446_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3446_cast_fp16 = einsum(equation = var_3446_equation_0, values = (var_3332_cast_fp16, var_3422_cast_fp16))[name = tensor("op_3446_cast_fp16")]; tensor var_3448_equation_0 = const()[name = tensor("op_3448_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3448_cast_fp16 = einsum(equation = var_3448_equation_0, values = (var_3336_cast_fp16, var_3423_cast_fp16))[name = tensor("op_3448_cast_fp16")]; tensor var_3450_equation_0 = const()[name = tensor("op_3450_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3450_cast_fp16 = einsum(equation = var_3450_equation_0, values = (var_3336_cast_fp16, var_3424_cast_fp16))[name = tensor("op_3450_cast_fp16")]; tensor var_3452_equation_0 = const()[name = tensor("op_3452_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3452_cast_fp16 = einsum(equation = var_3452_equation_0, values = (var_3340_cast_fp16, var_3425_cast_fp16))[name = tensor("op_3452_cast_fp16")]; tensor var_3454_equation_0 = const()[name = tensor("op_3454_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3454_cast_fp16 = einsum(equation = var_3454_equation_0, values = (var_3340_cast_fp16, var_3426_cast_fp16))[name = tensor("op_3454_cast_fp16")]; tensor var_3456_equation_0 = const()[name = tensor("op_3456_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3456_cast_fp16 = einsum(equation = var_3456_equation_0, values = (var_3344_cast_fp16, var_3427_cast_fp16))[name = tensor("op_3456_cast_fp16")]; tensor var_3458_equation_0 = const()[name = tensor("op_3458_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3458_cast_fp16 = einsum(equation = var_3458_equation_0, values = (var_3344_cast_fp16, var_3428_cast_fp16))[name = tensor("op_3458_cast_fp16")]; tensor var_3460_equation_0 = const()[name = tensor("op_3460_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3460_cast_fp16 = einsum(equation = var_3460_equation_0, values = (var_3348_cast_fp16, var_3429_cast_fp16))[name = tensor("op_3460_cast_fp16")]; tensor var_3462_equation_0 = const()[name = tensor("op_3462_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3462_cast_fp16 = einsum(equation = var_3462_equation_0, values = (var_3348_cast_fp16, var_3430_cast_fp16))[name = tensor("op_3462_cast_fp16")]; tensor var_3464_interleave_0 = const()[name = tensor("op_3464_interleave_0"), val = tensor(false)]; tensor var_3464_cast_fp16 = concat(axis = var_3089, interleave = var_3464_interleave_0, values = (var_3432_cast_fp16, var_3434_cast_fp16))[name = tensor("op_3464_cast_fp16")]; tensor var_3466_interleave_0 = const()[name = tensor("op_3466_interleave_0"), val = tensor(false)]; tensor var_3466_cast_fp16 = concat(axis = var_3089, interleave = var_3466_interleave_0, values = (var_3436_cast_fp16, var_3438_cast_fp16))[name = tensor("op_3466_cast_fp16")]; tensor var_3468_interleave_0 = const()[name = tensor("op_3468_interleave_0"), val = tensor(false)]; tensor var_3468_cast_fp16 = concat(axis = var_3089, interleave = var_3468_interleave_0, values = (var_3440_cast_fp16, var_3442_cast_fp16))[name = tensor("op_3468_cast_fp16")]; tensor var_3470_interleave_0 = const()[name = tensor("op_3470_interleave_0"), val = tensor(false)]; tensor var_3470_cast_fp16 = concat(axis = var_3089, interleave = var_3470_interleave_0, values = (var_3444_cast_fp16, var_3446_cast_fp16))[name = tensor("op_3470_cast_fp16")]; tensor var_3472_interleave_0 = const()[name = tensor("op_3472_interleave_0"), val = tensor(false)]; tensor var_3472_cast_fp16 = concat(axis = var_3089, interleave = var_3472_interleave_0, values = (var_3448_cast_fp16, var_3450_cast_fp16))[name = tensor("op_3472_cast_fp16")]; tensor var_3474_interleave_0 = const()[name = tensor("op_3474_interleave_0"), val = tensor(false)]; tensor var_3474_cast_fp16 = concat(axis = var_3089, interleave = var_3474_interleave_0, values = (var_3452_cast_fp16, var_3454_cast_fp16))[name = tensor("op_3474_cast_fp16")]; tensor var_3476_interleave_0 = const()[name = tensor("op_3476_interleave_0"), val = tensor(false)]; tensor var_3476_cast_fp16 = concat(axis = var_3089, interleave = var_3476_interleave_0, values = (var_3456_cast_fp16, var_3458_cast_fp16))[name = tensor("op_3476_cast_fp16")]; tensor var_3478_interleave_0 = const()[name = tensor("op_3478_interleave_0"), val = tensor(false)]; tensor var_3478_cast_fp16 = concat(axis = var_3089, interleave = var_3478_interleave_0, values = (var_3460_cast_fp16, var_3462_cast_fp16))[name = tensor("op_3478_cast_fp16")]; tensor input_79_interleave_0 = const()[name = tensor("input_79_interleave_0"), val = tensor(false)]; tensor input_79_cast_fp16 = concat(axis = var_3111, interleave = input_79_interleave_0, values = (var_3464_cast_fp16, var_3466_cast_fp16, var_3468_cast_fp16, var_3470_cast_fp16, var_3472_cast_fp16, var_3474_cast_fp16, var_3476_cast_fp16, var_3478_cast_fp16))[name = tensor("input_79_cast_fp16")]; tensor var_3484 = const()[name = tensor("op_3484"), val = tensor([1, 1])]; tensor var_3486 = const()[name = tensor("op_3486"), val = tensor([1, 1])]; tensor var_3488_pad_type_0 = const()[name = tensor("op_3488_pad_type_0"), val = tensor("custom")]; tensor var_3488_pad_0 = const()[name = tensor("op_3488_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(41584128)))]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42403392)))]; tensor var_3488_cast_fp16 = conv(bias = down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_3486, groups = var_3111, pad = var_3488_pad_0, pad_type = var_3488_pad_type_0, strides = var_3484, weight = down_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_79_cast_fp16)[name = tensor("op_3488_cast_fp16")]; tensor inputs_15_cast_fp16 = add(x = var_3488_cast_fp16, y = inputs_13_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; tensor hidden_states_47_axes_0 = const()[name = tensor("hidden_states_47_axes_0"), val = tensor([1])]; tensor hidden_states_47_gamma_0_to_fp16 = const()[name = tensor("hidden_states_47_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42404736)))]; tensor hidden_states_47_beta_0_to_fp16 = const()[name = tensor("hidden_states_47_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42406080)))]; tensor var_3498_to_fp16 = const()[name = tensor("op_3498_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_47_cast_fp16 = layer_norm(axes = hidden_states_47_axes_0, beta = hidden_states_47_beta_0_to_fp16, epsilon = var_3498_to_fp16, gamma = hidden_states_47_gamma_0_to_fp16, x = inputs_15_cast_fp16)[name = tensor("hidden_states_47_cast_fp16")]; tensor var_3513 = const()[name = tensor("op_3513"), val = tensor([1, 1])]; tensor var_3515 = const()[name = tensor("op_3515"), val = tensor([1, 1])]; tensor q_11_pad_type_0 = const()[name = tensor("q_11_pad_type_0"), val = tensor("custom")]; tensor q_11_pad_0 = const()[name = tensor("q_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42407424)))]; tensor q_11_cast_fp16 = conv(dilations = var_3515, groups = var_3111, pad = q_11_pad_0, pad_type = q_11_pad_type_0, strides = var_3513, weight = down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_47_cast_fp16)[name = tensor("q_11_cast_fp16")]; tensor var_3519 = const()[name = tensor("op_3519"), val = tensor([1, 1])]; tensor var_3521 = const()[name = tensor("op_3521"), val = tensor([1, 1])]; tensor k_21_pad_type_0 = const()[name = tensor("k_21_pad_type_0"), val = tensor("custom")]; tensor k_21_pad_0 = const()[name = tensor("k_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43226688)))]; tensor k_21_cast_fp16 = conv(dilations = var_3521, groups = var_3111, pad = k_21_pad_0, pad_type = k_21_pad_type_0, strides = var_3519, weight = down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_21_cast_fp16")]; tensor var_3525 = const()[name = tensor("op_3525"), val = tensor([1, 1])]; tensor var_3527 = const()[name = tensor("op_3527"), val = tensor([1, 1])]; tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44209792)))]; tensor v_11_cast_fp16 = conv(dilations = var_3527, groups = var_3111, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_3525, weight = down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_11_cast_fp16")]; tensor var_3531_begin_0 = const()[name = tensor("op_3531_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3531_end_0 = const()[name = tensor("op_3531_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3531_end_mask_0 = const()[name = tensor("op_3531_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3531_cast_fp16 = slice_by_index(begin = var_3531_begin_0, end = var_3531_end_0, end_mask = var_3531_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_3531_cast_fp16")]; tensor var_3535_begin_0 = const()[name = tensor("op_3535_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_3535_end_0 = const()[name = tensor("op_3535_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_3535_end_mask_0 = const()[name = tensor("op_3535_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3535_cast_fp16 = slice_by_index(begin = var_3535_begin_0, end = var_3535_end_0, end_mask = var_3535_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_3535_cast_fp16")]; tensor var_3539_begin_0 = const()[name = tensor("op_3539_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_3539_end_0 = const()[name = tensor("op_3539_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_3539_end_mask_0 = const()[name = tensor("op_3539_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3539_cast_fp16 = slice_by_index(begin = var_3539_begin_0, end = var_3539_end_0, end_mask = var_3539_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_3539_cast_fp16")]; tensor var_3543_begin_0 = const()[name = tensor("op_3543_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_3543_end_0 = const()[name = tensor("op_3543_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_3543_end_mask_0 = const()[name = tensor("op_3543_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3543_cast_fp16 = slice_by_index(begin = var_3543_begin_0, end = var_3543_end_0, end_mask = var_3543_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_3543_cast_fp16")]; tensor var_3547_begin_0 = const()[name = tensor("op_3547_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3547_end_0 = const()[name = tensor("op_3547_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_3547_end_mask_0 = const()[name = tensor("op_3547_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3547_cast_fp16 = slice_by_index(begin = var_3547_begin_0, end = var_3547_end_0, end_mask = var_3547_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_3547_cast_fp16")]; tensor var_3551_begin_0 = const()[name = tensor("op_3551_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_3551_end_0 = const()[name = tensor("op_3551_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_3551_end_mask_0 = const()[name = tensor("op_3551_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3551_cast_fp16 = slice_by_index(begin = var_3551_begin_0, end = var_3551_end_0, end_mask = var_3551_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_3551_cast_fp16")]; tensor var_3555_begin_0 = const()[name = tensor("op_3555_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_3555_end_0 = const()[name = tensor("op_3555_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_3555_end_mask_0 = const()[name = tensor("op_3555_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3555_cast_fp16 = slice_by_index(begin = var_3555_begin_0, end = var_3555_end_0, end_mask = var_3555_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_3555_cast_fp16")]; tensor var_3559_begin_0 = const()[name = tensor("op_3559_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_3559_end_0 = const()[name = tensor("op_3559_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_3559_end_mask_0 = const()[name = tensor("op_3559_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3559_cast_fp16 = slice_by_index(begin = var_3559_begin_0, end = var_3559_end_0, end_mask = var_3559_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_3559_cast_fp16")]; tensor var_3562_begin_0 = const()[name = tensor("op_3562_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3562_end_0 = const()[name = tensor("op_3562_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3562_end_mask_0 = const()[name = tensor("op_3562_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3562_cast_fp16 = slice_by_index(begin = var_3562_begin_0, end = var_3562_end_0, end_mask = var_3562_end_mask_0, x = var_3531_cast_fp16)[name = tensor("op_3562_cast_fp16")]; tensor var_3563_begin_0 = const()[name = tensor("op_3563_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3563_end_0 = const()[name = tensor("op_3563_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3563_end_mask_0 = const()[name = tensor("op_3563_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3563_cast_fp16 = slice_by_index(begin = var_3563_begin_0, end = var_3563_end_0, end_mask = var_3563_end_mask_0, x = var_3531_cast_fp16)[name = tensor("op_3563_cast_fp16")]; tensor var_3564_begin_0 = const()[name = tensor("op_3564_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3564_end_0 = const()[name = tensor("op_3564_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3564_end_mask_0 = const()[name = tensor("op_3564_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3564_cast_fp16 = slice_by_index(begin = var_3564_begin_0, end = var_3564_end_0, end_mask = var_3564_end_mask_0, x = var_3535_cast_fp16)[name = tensor("op_3564_cast_fp16")]; tensor var_3565_begin_0 = const()[name = tensor("op_3565_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3565_end_0 = const()[name = tensor("op_3565_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3565_end_mask_0 = const()[name = tensor("op_3565_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3565_cast_fp16 = slice_by_index(begin = var_3565_begin_0, end = var_3565_end_0, end_mask = var_3565_end_mask_0, x = var_3535_cast_fp16)[name = tensor("op_3565_cast_fp16")]; tensor var_3566_begin_0 = const()[name = tensor("op_3566_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3566_end_0 = const()[name = tensor("op_3566_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3566_end_mask_0 = const()[name = tensor("op_3566_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3566_cast_fp16 = slice_by_index(begin = var_3566_begin_0, end = var_3566_end_0, end_mask = var_3566_end_mask_0, x = var_3539_cast_fp16)[name = tensor("op_3566_cast_fp16")]; tensor var_3567_begin_0 = const()[name = tensor("op_3567_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3567_end_0 = const()[name = tensor("op_3567_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3567_end_mask_0 = const()[name = tensor("op_3567_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3567_cast_fp16 = slice_by_index(begin = var_3567_begin_0, end = var_3567_end_0, end_mask = var_3567_end_mask_0, x = var_3539_cast_fp16)[name = tensor("op_3567_cast_fp16")]; tensor var_3568_begin_0 = const()[name = tensor("op_3568_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3568_end_0 = const()[name = tensor("op_3568_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3568_end_mask_0 = const()[name = tensor("op_3568_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3568_cast_fp16 = slice_by_index(begin = var_3568_begin_0, end = var_3568_end_0, end_mask = var_3568_end_mask_0, x = var_3543_cast_fp16)[name = tensor("op_3568_cast_fp16")]; tensor var_3569_begin_0 = const()[name = tensor("op_3569_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3569_end_0 = const()[name = tensor("op_3569_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3569_end_mask_0 = const()[name = tensor("op_3569_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3569_cast_fp16 = slice_by_index(begin = var_3569_begin_0, end = var_3569_end_0, end_mask = var_3569_end_mask_0, x = var_3543_cast_fp16)[name = tensor("op_3569_cast_fp16")]; tensor var_3570_begin_0 = const()[name = tensor("op_3570_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3570_end_0 = const()[name = tensor("op_3570_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3570_end_mask_0 = const()[name = tensor("op_3570_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3570_cast_fp16 = slice_by_index(begin = var_3570_begin_0, end = var_3570_end_0, end_mask = var_3570_end_mask_0, x = var_3547_cast_fp16)[name = tensor("op_3570_cast_fp16")]; tensor var_3571_begin_0 = const()[name = tensor("op_3571_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3571_end_0 = const()[name = tensor("op_3571_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3571_end_mask_0 = const()[name = tensor("op_3571_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3571_cast_fp16 = slice_by_index(begin = var_3571_begin_0, end = var_3571_end_0, end_mask = var_3571_end_mask_0, x = var_3547_cast_fp16)[name = tensor("op_3571_cast_fp16")]; tensor var_3572_begin_0 = const()[name = tensor("op_3572_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3572_end_0 = const()[name = tensor("op_3572_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3572_end_mask_0 = const()[name = tensor("op_3572_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3572_cast_fp16 = slice_by_index(begin = var_3572_begin_0, end = var_3572_end_0, end_mask = var_3572_end_mask_0, x = var_3551_cast_fp16)[name = tensor("op_3572_cast_fp16")]; tensor var_3573_begin_0 = const()[name = tensor("op_3573_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3573_end_0 = const()[name = tensor("op_3573_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3573_end_mask_0 = const()[name = tensor("op_3573_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3573_cast_fp16 = slice_by_index(begin = var_3573_begin_0, end = var_3573_end_0, end_mask = var_3573_end_mask_0, x = var_3551_cast_fp16)[name = tensor("op_3573_cast_fp16")]; tensor var_3574_begin_0 = const()[name = tensor("op_3574_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3574_end_0 = const()[name = tensor("op_3574_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3574_end_mask_0 = const()[name = tensor("op_3574_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3574_cast_fp16 = slice_by_index(begin = var_3574_begin_0, end = var_3574_end_0, end_mask = var_3574_end_mask_0, x = var_3555_cast_fp16)[name = tensor("op_3574_cast_fp16")]; tensor var_3575_begin_0 = const()[name = tensor("op_3575_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3575_end_0 = const()[name = tensor("op_3575_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3575_end_mask_0 = const()[name = tensor("op_3575_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3575_cast_fp16 = slice_by_index(begin = var_3575_begin_0, end = var_3575_end_0, end_mask = var_3575_end_mask_0, x = var_3555_cast_fp16)[name = tensor("op_3575_cast_fp16")]; tensor var_3576_begin_0 = const()[name = tensor("op_3576_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3576_end_0 = const()[name = tensor("op_3576_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3576_end_mask_0 = const()[name = tensor("op_3576_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3576_cast_fp16 = slice_by_index(begin = var_3576_begin_0, end = var_3576_end_0, end_mask = var_3576_end_mask_0, x = var_3559_cast_fp16)[name = tensor("op_3576_cast_fp16")]; tensor var_3577_begin_0 = const()[name = tensor("op_3577_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3577_end_0 = const()[name = tensor("op_3577_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3577_end_mask_0 = const()[name = tensor("op_3577_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3577_cast_fp16 = slice_by_index(begin = var_3577_begin_0, end = var_3577_end_0, end_mask = var_3577_end_mask_0, x = var_3559_cast_fp16)[name = tensor("op_3577_cast_fp16")]; tensor k_23_perm_0 = const()[name = tensor("k_23_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_3582_begin_0 = const()[name = tensor("op_3582_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3582_end_0 = const()[name = tensor("op_3582_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_3582_end_mask_0 = const()[name = tensor("op_3582_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_26 = transpose(perm = k_23_perm_0, x = k_21_cast_fp16)[name = tensor("transpose_26")]; tensor var_3582_cast_fp16 = slice_by_index(begin = var_3582_begin_0, end = var_3582_end_0, end_mask = var_3582_end_mask_0, x = transpose_26)[name = tensor("op_3582_cast_fp16")]; tensor var_3586_begin_0 = const()[name = tensor("op_3586_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_3586_end_0 = const()[name = tensor("op_3586_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_3586_end_mask_0 = const()[name = tensor("op_3586_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3586_cast_fp16 = slice_by_index(begin = var_3586_begin_0, end = var_3586_end_0, end_mask = var_3586_end_mask_0, x = transpose_26)[name = tensor("op_3586_cast_fp16")]; tensor var_3590_begin_0 = const()[name = tensor("op_3590_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_3590_end_0 = const()[name = tensor("op_3590_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_3590_end_mask_0 = const()[name = tensor("op_3590_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3590_cast_fp16 = slice_by_index(begin = var_3590_begin_0, end = var_3590_end_0, end_mask = var_3590_end_mask_0, x = transpose_26)[name = tensor("op_3590_cast_fp16")]; tensor var_3594_begin_0 = const()[name = tensor("op_3594_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_3594_end_0 = const()[name = tensor("op_3594_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_3594_end_mask_0 = const()[name = tensor("op_3594_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3594_cast_fp16 = slice_by_index(begin = var_3594_begin_0, end = var_3594_end_0, end_mask = var_3594_end_mask_0, x = transpose_26)[name = tensor("op_3594_cast_fp16")]; tensor var_3598_begin_0 = const()[name = tensor("op_3598_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_3598_end_0 = const()[name = tensor("op_3598_end_0"), val = tensor([2, 77, 1, 400])]; tensor var_3598_end_mask_0 = const()[name = tensor("op_3598_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3598_cast_fp16 = slice_by_index(begin = var_3598_begin_0, end = var_3598_end_0, end_mask = var_3598_end_mask_0, x = transpose_26)[name = tensor("op_3598_cast_fp16")]; tensor var_3602_begin_0 = const()[name = tensor("op_3602_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_3602_end_0 = const()[name = tensor("op_3602_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_3602_end_mask_0 = const()[name = tensor("op_3602_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3602_cast_fp16 = slice_by_index(begin = var_3602_begin_0, end = var_3602_end_0, end_mask = var_3602_end_mask_0, x = transpose_26)[name = tensor("op_3602_cast_fp16")]; tensor var_3606_begin_0 = const()[name = tensor("op_3606_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_3606_end_0 = const()[name = tensor("op_3606_end_0"), val = tensor([2, 77, 1, 560])]; tensor var_3606_end_mask_0 = const()[name = tensor("op_3606_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3606_cast_fp16 = slice_by_index(begin = var_3606_begin_0, end = var_3606_end_0, end_mask = var_3606_end_mask_0, x = transpose_26)[name = tensor("op_3606_cast_fp16")]; tensor var_3610_begin_0 = const()[name = tensor("op_3610_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_3610_end_0 = const()[name = tensor("op_3610_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_3610_end_mask_0 = const()[name = tensor("op_3610_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3610_cast_fp16 = slice_by_index(begin = var_3610_begin_0, end = var_3610_end_0, end_mask = var_3610_end_mask_0, x = transpose_26)[name = tensor("op_3610_cast_fp16")]; tensor var_3612_begin_0 = const()[name = tensor("op_3612_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3612_end_0 = const()[name = tensor("op_3612_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_3612_end_mask_0 = const()[name = tensor("op_3612_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3612_cast_fp16 = slice_by_index(begin = var_3612_begin_0, end = var_3612_end_0, end_mask = var_3612_end_mask_0, x = v_11_cast_fp16)[name = tensor("op_3612_cast_fp16")]; tensor var_3616_begin_0 = const()[name = tensor("op_3616_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_3616_end_0 = const()[name = tensor("op_3616_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_3616_end_mask_0 = const()[name = tensor("op_3616_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3616_cast_fp16 = slice_by_index(begin = var_3616_begin_0, end = var_3616_end_0, end_mask = var_3616_end_mask_0, x = v_11_cast_fp16)[name = tensor("op_3616_cast_fp16")]; tensor var_3620_begin_0 = const()[name = tensor("op_3620_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_3620_end_0 = const()[name = tensor("op_3620_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_3620_end_mask_0 = const()[name = tensor("op_3620_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3620_cast_fp16 = slice_by_index(begin = var_3620_begin_0, end = var_3620_end_0, end_mask = var_3620_end_mask_0, x = v_11_cast_fp16)[name = tensor("op_3620_cast_fp16")]; tensor var_3624_begin_0 = const()[name = tensor("op_3624_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_3624_end_0 = const()[name = tensor("op_3624_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_3624_end_mask_0 = const()[name = tensor("op_3624_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3624_cast_fp16 = slice_by_index(begin = var_3624_begin_0, end = var_3624_end_0, end_mask = var_3624_end_mask_0, x = v_11_cast_fp16)[name = tensor("op_3624_cast_fp16")]; tensor var_3628_begin_0 = const()[name = tensor("op_3628_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3628_end_0 = const()[name = tensor("op_3628_end_0"), val = tensor([2, 400, 1, 77])]; tensor var_3628_end_mask_0 = const()[name = tensor("op_3628_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3628_cast_fp16 = slice_by_index(begin = var_3628_begin_0, end = var_3628_end_0, end_mask = var_3628_end_mask_0, x = v_11_cast_fp16)[name = tensor("op_3628_cast_fp16")]; tensor var_3632_begin_0 = const()[name = tensor("op_3632_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_3632_end_0 = const()[name = tensor("op_3632_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_3632_end_mask_0 = const()[name = tensor("op_3632_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3632_cast_fp16 = slice_by_index(begin = var_3632_begin_0, end = var_3632_end_0, end_mask = var_3632_end_mask_0, x = v_11_cast_fp16)[name = tensor("op_3632_cast_fp16")]; tensor var_3636_begin_0 = const()[name = tensor("op_3636_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_3636_end_0 = const()[name = tensor("op_3636_end_0"), val = tensor([2, 560, 1, 77])]; tensor var_3636_end_mask_0 = const()[name = tensor("op_3636_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3636_cast_fp16 = slice_by_index(begin = var_3636_begin_0, end = var_3636_end_0, end_mask = var_3636_end_mask_0, x = v_11_cast_fp16)[name = tensor("op_3636_cast_fp16")]; tensor var_3640_begin_0 = const()[name = tensor("op_3640_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_3640_end_0 = const()[name = tensor("op_3640_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_3640_end_mask_0 = const()[name = tensor("op_3640_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3640_cast_fp16 = slice_by_index(begin = var_3640_begin_0, end = var_3640_end_0, end_mask = var_3640_end_mask_0, x = v_11_cast_fp16)[name = tensor("op_3640_cast_fp16")]; tensor var_3644_equation_0 = const()[name = tensor("op_3644_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3644_cast_fp16 = einsum(equation = var_3644_equation_0, values = (var_3582_cast_fp16, var_3562_cast_fp16))[name = tensor("op_3644_cast_fp16")]; tensor var_3645_to_fp16 = const()[name = tensor("op_3645_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_545_cast_fp16 = mul(x = var_3644_cast_fp16, y = var_3645_to_fp16)[name = tensor("aw_chunk_545_cast_fp16")]; tensor var_3648_equation_0 = const()[name = tensor("op_3648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3648_cast_fp16 = einsum(equation = var_3648_equation_0, values = (var_3582_cast_fp16, var_3563_cast_fp16))[name = tensor("op_3648_cast_fp16")]; tensor var_3649_to_fp16 = const()[name = tensor("op_3649_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_547_cast_fp16 = mul(x = var_3648_cast_fp16, y = var_3649_to_fp16)[name = tensor("aw_chunk_547_cast_fp16")]; tensor var_3652_equation_0 = const()[name = tensor("op_3652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3652_cast_fp16 = einsum(equation = var_3652_equation_0, values = (var_3586_cast_fp16, var_3564_cast_fp16))[name = tensor("op_3652_cast_fp16")]; tensor var_3653_to_fp16 = const()[name = tensor("op_3653_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_549_cast_fp16 = mul(x = var_3652_cast_fp16, y = var_3653_to_fp16)[name = tensor("aw_chunk_549_cast_fp16")]; tensor var_3656_equation_0 = const()[name = tensor("op_3656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3656_cast_fp16 = einsum(equation = var_3656_equation_0, values = (var_3586_cast_fp16, var_3565_cast_fp16))[name = tensor("op_3656_cast_fp16")]; tensor var_3657_to_fp16 = const()[name = tensor("op_3657_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_551_cast_fp16 = mul(x = var_3656_cast_fp16, y = var_3657_to_fp16)[name = tensor("aw_chunk_551_cast_fp16")]; tensor var_3660_equation_0 = const()[name = tensor("op_3660_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3660_cast_fp16 = einsum(equation = var_3660_equation_0, values = (var_3590_cast_fp16, var_3566_cast_fp16))[name = tensor("op_3660_cast_fp16")]; tensor var_3661_to_fp16 = const()[name = tensor("op_3661_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_553_cast_fp16 = mul(x = var_3660_cast_fp16, y = var_3661_to_fp16)[name = tensor("aw_chunk_553_cast_fp16")]; tensor var_3664_equation_0 = const()[name = tensor("op_3664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3664_cast_fp16 = einsum(equation = var_3664_equation_0, values = (var_3590_cast_fp16, var_3567_cast_fp16))[name = tensor("op_3664_cast_fp16")]; tensor var_3665_to_fp16 = const()[name = tensor("op_3665_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_555_cast_fp16 = mul(x = var_3664_cast_fp16, y = var_3665_to_fp16)[name = tensor("aw_chunk_555_cast_fp16")]; tensor var_3668_equation_0 = const()[name = tensor("op_3668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3668_cast_fp16 = einsum(equation = var_3668_equation_0, values = (var_3594_cast_fp16, var_3568_cast_fp16))[name = tensor("op_3668_cast_fp16")]; tensor var_3669_to_fp16 = const()[name = tensor("op_3669_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_557_cast_fp16 = mul(x = var_3668_cast_fp16, y = var_3669_to_fp16)[name = tensor("aw_chunk_557_cast_fp16")]; tensor var_3672_equation_0 = const()[name = tensor("op_3672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3672_cast_fp16 = einsum(equation = var_3672_equation_0, values = (var_3594_cast_fp16, var_3569_cast_fp16))[name = tensor("op_3672_cast_fp16")]; tensor var_3673_to_fp16 = const()[name = tensor("op_3673_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_559_cast_fp16 = mul(x = var_3672_cast_fp16, y = var_3673_to_fp16)[name = tensor("aw_chunk_559_cast_fp16")]; tensor var_3676_equation_0 = const()[name = tensor("op_3676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3676_cast_fp16 = einsum(equation = var_3676_equation_0, values = (var_3598_cast_fp16, var_3570_cast_fp16))[name = tensor("op_3676_cast_fp16")]; tensor var_3677_to_fp16 = const()[name = tensor("op_3677_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_561_cast_fp16 = mul(x = var_3676_cast_fp16, y = var_3677_to_fp16)[name = tensor("aw_chunk_561_cast_fp16")]; tensor var_3680_equation_0 = const()[name = tensor("op_3680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3680_cast_fp16 = einsum(equation = var_3680_equation_0, values = (var_3598_cast_fp16, var_3571_cast_fp16))[name = tensor("op_3680_cast_fp16")]; tensor var_3681_to_fp16 = const()[name = tensor("op_3681_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_563_cast_fp16 = mul(x = var_3680_cast_fp16, y = var_3681_to_fp16)[name = tensor("aw_chunk_563_cast_fp16")]; tensor var_3684_equation_0 = const()[name = tensor("op_3684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3684_cast_fp16 = einsum(equation = var_3684_equation_0, values = (var_3602_cast_fp16, var_3572_cast_fp16))[name = tensor("op_3684_cast_fp16")]; tensor var_3685_to_fp16 = const()[name = tensor("op_3685_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_565_cast_fp16 = mul(x = var_3684_cast_fp16, y = var_3685_to_fp16)[name = tensor("aw_chunk_565_cast_fp16")]; tensor var_3688_equation_0 = const()[name = tensor("op_3688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3688_cast_fp16 = einsum(equation = var_3688_equation_0, values = (var_3602_cast_fp16, var_3573_cast_fp16))[name = tensor("op_3688_cast_fp16")]; tensor var_3689_to_fp16 = const()[name = tensor("op_3689_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_567_cast_fp16 = mul(x = var_3688_cast_fp16, y = var_3689_to_fp16)[name = tensor("aw_chunk_567_cast_fp16")]; tensor var_3692_equation_0 = const()[name = tensor("op_3692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3692_cast_fp16 = einsum(equation = var_3692_equation_0, values = (var_3606_cast_fp16, var_3574_cast_fp16))[name = tensor("op_3692_cast_fp16")]; tensor var_3693_to_fp16 = const()[name = tensor("op_3693_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_569_cast_fp16 = mul(x = var_3692_cast_fp16, y = var_3693_to_fp16)[name = tensor("aw_chunk_569_cast_fp16")]; tensor var_3696_equation_0 = const()[name = tensor("op_3696_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3696_cast_fp16 = einsum(equation = var_3696_equation_0, values = (var_3606_cast_fp16, var_3575_cast_fp16))[name = tensor("op_3696_cast_fp16")]; tensor var_3697_to_fp16 = const()[name = tensor("op_3697_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_571_cast_fp16 = mul(x = var_3696_cast_fp16, y = var_3697_to_fp16)[name = tensor("aw_chunk_571_cast_fp16")]; tensor var_3700_equation_0 = const()[name = tensor("op_3700_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3700_cast_fp16 = einsum(equation = var_3700_equation_0, values = (var_3610_cast_fp16, var_3576_cast_fp16))[name = tensor("op_3700_cast_fp16")]; tensor var_3701_to_fp16 = const()[name = tensor("op_3701_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_573_cast_fp16 = mul(x = var_3700_cast_fp16, y = var_3701_to_fp16)[name = tensor("aw_chunk_573_cast_fp16")]; tensor var_3704_equation_0 = const()[name = tensor("op_3704_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_3704_cast_fp16 = einsum(equation = var_3704_equation_0, values = (var_3610_cast_fp16, var_3577_cast_fp16))[name = tensor("op_3704_cast_fp16")]; tensor var_3705_to_fp16 = const()[name = tensor("op_3705_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_575_cast_fp16 = mul(x = var_3704_cast_fp16, y = var_3705_to_fp16)[name = tensor("aw_chunk_575_cast_fp16")]; tensor var_3707_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_545_cast_fp16)[name = tensor("op_3707_cast_fp16")]; tensor var_3708_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_547_cast_fp16)[name = tensor("op_3708_cast_fp16")]; tensor var_3709_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_549_cast_fp16)[name = tensor("op_3709_cast_fp16")]; tensor var_3710_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_551_cast_fp16)[name = tensor("op_3710_cast_fp16")]; tensor var_3711_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_553_cast_fp16)[name = tensor("op_3711_cast_fp16")]; tensor var_3712_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_555_cast_fp16)[name = tensor("op_3712_cast_fp16")]; tensor var_3713_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_557_cast_fp16)[name = tensor("op_3713_cast_fp16")]; tensor var_3714_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_559_cast_fp16)[name = tensor("op_3714_cast_fp16")]; tensor var_3715_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_561_cast_fp16)[name = tensor("op_3715_cast_fp16")]; tensor var_3716_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_563_cast_fp16)[name = tensor("op_3716_cast_fp16")]; tensor var_3717_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_565_cast_fp16)[name = tensor("op_3717_cast_fp16")]; tensor var_3718_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_567_cast_fp16)[name = tensor("op_3718_cast_fp16")]; tensor var_3719_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_569_cast_fp16)[name = tensor("op_3719_cast_fp16")]; tensor var_3720_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_571_cast_fp16)[name = tensor("op_3720_cast_fp16")]; tensor var_3721_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_573_cast_fp16)[name = tensor("op_3721_cast_fp16")]; tensor var_3722_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_575_cast_fp16)[name = tensor("op_3722_cast_fp16")]; tensor var_3724_equation_0 = const()[name = tensor("op_3724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3724_cast_fp16 = einsum(equation = var_3724_equation_0, values = (var_3612_cast_fp16, var_3707_cast_fp16))[name = tensor("op_3724_cast_fp16")]; tensor var_3726_equation_0 = const()[name = tensor("op_3726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3726_cast_fp16 = einsum(equation = var_3726_equation_0, values = (var_3612_cast_fp16, var_3708_cast_fp16))[name = tensor("op_3726_cast_fp16")]; tensor var_3728_equation_0 = const()[name = tensor("op_3728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3728_cast_fp16 = einsum(equation = var_3728_equation_0, values = (var_3616_cast_fp16, var_3709_cast_fp16))[name = tensor("op_3728_cast_fp16")]; tensor var_3730_equation_0 = const()[name = tensor("op_3730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3730_cast_fp16 = einsum(equation = var_3730_equation_0, values = (var_3616_cast_fp16, var_3710_cast_fp16))[name = tensor("op_3730_cast_fp16")]; tensor var_3732_equation_0 = const()[name = tensor("op_3732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3732_cast_fp16 = einsum(equation = var_3732_equation_0, values = (var_3620_cast_fp16, var_3711_cast_fp16))[name = tensor("op_3732_cast_fp16")]; tensor var_3734_equation_0 = const()[name = tensor("op_3734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3734_cast_fp16 = einsum(equation = var_3734_equation_0, values = (var_3620_cast_fp16, var_3712_cast_fp16))[name = tensor("op_3734_cast_fp16")]; tensor var_3736_equation_0 = const()[name = tensor("op_3736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3736_cast_fp16 = einsum(equation = var_3736_equation_0, values = (var_3624_cast_fp16, var_3713_cast_fp16))[name = tensor("op_3736_cast_fp16")]; tensor var_3738_equation_0 = const()[name = tensor("op_3738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3738_cast_fp16 = einsum(equation = var_3738_equation_0, values = (var_3624_cast_fp16, var_3714_cast_fp16))[name = tensor("op_3738_cast_fp16")]; tensor var_3740_equation_0 = const()[name = tensor("op_3740_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3740_cast_fp16 = einsum(equation = var_3740_equation_0, values = (var_3628_cast_fp16, var_3715_cast_fp16))[name = tensor("op_3740_cast_fp16")]; tensor var_3742_equation_0 = const()[name = tensor("op_3742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3742_cast_fp16 = einsum(equation = var_3742_equation_0, values = (var_3628_cast_fp16, var_3716_cast_fp16))[name = tensor("op_3742_cast_fp16")]; tensor var_3744_equation_0 = const()[name = tensor("op_3744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3744_cast_fp16 = einsum(equation = var_3744_equation_0, values = (var_3632_cast_fp16, var_3717_cast_fp16))[name = tensor("op_3744_cast_fp16")]; tensor var_3746_equation_0 = const()[name = tensor("op_3746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3746_cast_fp16 = einsum(equation = var_3746_equation_0, values = (var_3632_cast_fp16, var_3718_cast_fp16))[name = tensor("op_3746_cast_fp16")]; tensor var_3748_equation_0 = const()[name = tensor("op_3748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3748_cast_fp16 = einsum(equation = var_3748_equation_0, values = (var_3636_cast_fp16, var_3719_cast_fp16))[name = tensor("op_3748_cast_fp16")]; tensor var_3750_equation_0 = const()[name = tensor("op_3750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3750_cast_fp16 = einsum(equation = var_3750_equation_0, values = (var_3636_cast_fp16, var_3720_cast_fp16))[name = tensor("op_3750_cast_fp16")]; tensor var_3752_equation_0 = const()[name = tensor("op_3752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3752_cast_fp16 = einsum(equation = var_3752_equation_0, values = (var_3640_cast_fp16, var_3721_cast_fp16))[name = tensor("op_3752_cast_fp16")]; tensor var_3754_equation_0 = const()[name = tensor("op_3754_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3754_cast_fp16 = einsum(equation = var_3754_equation_0, values = (var_3640_cast_fp16, var_3722_cast_fp16))[name = tensor("op_3754_cast_fp16")]; tensor var_3756_interleave_0 = const()[name = tensor("op_3756_interleave_0"), val = tensor(false)]; tensor var_3756_cast_fp16 = concat(axis = var_3089, interleave = var_3756_interleave_0, values = (var_3724_cast_fp16, var_3726_cast_fp16))[name = tensor("op_3756_cast_fp16")]; tensor var_3758_interleave_0 = const()[name = tensor("op_3758_interleave_0"), val = tensor(false)]; tensor var_3758_cast_fp16 = concat(axis = var_3089, interleave = var_3758_interleave_0, values = (var_3728_cast_fp16, var_3730_cast_fp16))[name = tensor("op_3758_cast_fp16")]; tensor var_3760_interleave_0 = const()[name = tensor("op_3760_interleave_0"), val = tensor(false)]; tensor var_3760_cast_fp16 = concat(axis = var_3089, interleave = var_3760_interleave_0, values = (var_3732_cast_fp16, var_3734_cast_fp16))[name = tensor("op_3760_cast_fp16")]; tensor var_3762_interleave_0 = const()[name = tensor("op_3762_interleave_0"), val = tensor(false)]; tensor var_3762_cast_fp16 = concat(axis = var_3089, interleave = var_3762_interleave_0, values = (var_3736_cast_fp16, var_3738_cast_fp16))[name = tensor("op_3762_cast_fp16")]; tensor var_3764_interleave_0 = const()[name = tensor("op_3764_interleave_0"), val = tensor(false)]; tensor var_3764_cast_fp16 = concat(axis = var_3089, interleave = var_3764_interleave_0, values = (var_3740_cast_fp16, var_3742_cast_fp16))[name = tensor("op_3764_cast_fp16")]; tensor var_3766_interleave_0 = const()[name = tensor("op_3766_interleave_0"), val = tensor(false)]; tensor var_3766_cast_fp16 = concat(axis = var_3089, interleave = var_3766_interleave_0, values = (var_3744_cast_fp16, var_3746_cast_fp16))[name = tensor("op_3766_cast_fp16")]; tensor var_3768_interleave_0 = const()[name = tensor("op_3768_interleave_0"), val = tensor(false)]; tensor var_3768_cast_fp16 = concat(axis = var_3089, interleave = var_3768_interleave_0, values = (var_3748_cast_fp16, var_3750_cast_fp16))[name = tensor("op_3768_cast_fp16")]; tensor var_3770_interleave_0 = const()[name = tensor("op_3770_interleave_0"), val = tensor(false)]; tensor var_3770_cast_fp16 = concat(axis = var_3089, interleave = var_3770_interleave_0, values = (var_3752_cast_fp16, var_3754_cast_fp16))[name = tensor("op_3770_cast_fp16")]; tensor input_81_interleave_0 = const()[name = tensor("input_81_interleave_0"), val = tensor(false)]; tensor input_81_cast_fp16 = concat(axis = var_3111, interleave = input_81_interleave_0, values = (var_3756_cast_fp16, var_3758_cast_fp16, var_3760_cast_fp16, var_3762_cast_fp16, var_3764_cast_fp16, var_3766_cast_fp16, var_3768_cast_fp16, var_3770_cast_fp16))[name = tensor("input_81_cast_fp16")]; tensor var_3776 = const()[name = tensor("op_3776"), val = tensor([1, 1])]; tensor var_3778 = const()[name = tensor("op_3778"), val = tensor([1, 1])]; tensor var_3780_pad_type_0 = const()[name = tensor("op_3780_pad_type_0"), val = tensor("custom")]; tensor var_3780_pad_0 = const()[name = tensor("op_3780_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45192896)))]; tensor down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46012160)))]; tensor var_3780_cast_fp16 = conv(bias = down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_3778, groups = var_3111, pad = var_3780_pad_0, pad_type = var_3780_pad_type_0, strides = var_3776, weight = down_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_81_cast_fp16)[name = tensor("op_3780_cast_fp16")]; tensor inputs_17_cast_fp16 = add(x = var_3780_cast_fp16, y = inputs_15_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; tensor input_83_axes_0 = const()[name = tensor("input_83_axes_0"), val = tensor([1])]; tensor input_83_gamma_0_to_fp16 = const()[name = tensor("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46013504)))]; tensor input_83_beta_0_to_fp16 = const()[name = tensor("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46014848)))]; tensor var_3790_to_fp16 = const()[name = tensor("op_3790_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_83_cast_fp16 = layer_norm(axes = input_83_axes_0, beta = input_83_beta_0_to_fp16, epsilon = var_3790_to_fp16, gamma = input_83_gamma_0_to_fp16, x = inputs_17_cast_fp16)[name = tensor("input_83_cast_fp16")]; tensor var_3806 = const()[name = tensor("op_3806"), val = tensor([1, 1])]; tensor var_3808 = const()[name = tensor("op_3808"), val = tensor([1, 1])]; tensor var_3810_pad_type_0 = const()[name = tensor("op_3810_pad_type_0"), val = tensor("custom")]; tensor var_3810_pad_0 = const()[name = tensor("op_3810_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46016192)))]; tensor down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52569856)))]; tensor var_3810_cast_fp16 = conv(bias = down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_3808, groups = var_3111, pad = var_3810_pad_0, pad_type = var_3810_pad_type_0, strides = var_3806, weight = down_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_83_cast_fp16)[name = tensor("op_3810_cast_fp16")]; tensor var_3811_split_sizes_0 = const()[name = tensor("op_3811_split_sizes_0"), val = tensor([2560, 2560])]; tensor var_3811_axis_0 = const()[name = tensor("op_3811_axis_0"), val = tensor(1)]; tensor var_3811_cast_fp16_0, tensor var_3811_cast_fp16_1 = split(axis = var_3811_axis_0, split_sizes = var_3811_split_sizes_0, x = var_3810_cast_fp16)[name = tensor("op_3811_cast_fp16")]; tensor var_3813_mode_0 = const()[name = tensor("op_3813_mode_0"), val = tensor("EXACT")]; tensor var_3813_cast_fp16 = gelu(mode = var_3813_mode_0, x = var_3811_cast_fp16_1)[name = tensor("op_3813_cast_fp16")]; tensor input_85_cast_fp16 = mul(x = var_3811_cast_fp16_0, y = var_3813_cast_fp16)[name = tensor("input_85_cast_fp16")]; tensor var_3817 = const()[name = tensor("op_3817"), val = tensor([1, 1])]; tensor var_3819 = const()[name = tensor("op_3819"), val = tensor([1, 1])]; tensor var_3821_pad_type_0 = const()[name = tensor("op_3821_pad_type_0"), val = tensor("custom")]; tensor var_3821_pad_0 = const()[name = tensor("op_3821_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52580160)))]; tensor down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55857024)))]; tensor var_3821_cast_fp16 = conv(bias = down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_3819, groups = var_3111, pad = var_3821_pad_0, pad_type = var_3821_pad_type_0, strides = var_3817, weight = down_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_85_cast_fp16)[name = tensor("op_3821_cast_fp16")]; tensor hidden_states_51_cast_fp16 = add(x = var_3821_cast_fp16, y = inputs_17_cast_fp16)[name = tensor("hidden_states_51_cast_fp16")]; tensor var_3823 = const()[name = tensor("op_3823"), val = tensor([2, 640, 32, 32])]; tensor input_87_cast_fp16 = reshape(shape = var_3823, x = hidden_states_51_cast_fp16)[name = tensor("input_87_cast_fp16")]; tensor var_3827 = const()[name = tensor("op_3827"), val = tensor([1, 1])]; tensor var_3829 = const()[name = tensor("op_3829"), val = tensor([1, 1])]; tensor hidden_states_53_pad_type_0 = const()[name = tensor("hidden_states_53_pad_type_0"), val = tensor("custom")]; tensor hidden_states_53_pad_0 = const()[name = tensor("hidden_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_0_proj_out_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55858368)))]; tensor down_blocks_1_attentions_0_proj_out_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_0_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56677632)))]; tensor hidden_states_53_cast_fp16 = conv(bias = down_blocks_1_attentions_0_proj_out_bias_to_fp16, dilations = var_3829, groups = var_3111, pad = hidden_states_53_pad_0, pad_type = hidden_states_53_pad_type_0, strides = var_3827, weight = down_blocks_1_attentions_0_proj_out_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("hidden_states_53_cast_fp16")]; tensor input_89_cast_fp16 = add(x = hidden_states_53_cast_fp16, y = hidden_states_41_cast_fp16)[name = tensor("input_89_cast_fp16")]; tensor reshape_36_shape_0 = const()[name = tensor("reshape_36_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_36_cast_fp16 = reshape(shape = reshape_36_shape_0, x = input_89_cast_fp16)[name = tensor("reshape_36_cast_fp16")]; tensor reduce_mean_27_axes_0 = const()[name = tensor("reduce_mean_27_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_27_keep_dims_0 = const()[name = tensor("reduce_mean_27_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_27_cast_fp16 = reduce_mean(axes = reduce_mean_27_axes_0, keep_dims = reduce_mean_27_keep_dims_0, x = reshape_36_cast_fp16)[name = tensor("reduce_mean_27_cast_fp16")]; tensor sub_18_cast_fp16 = sub(x = reshape_36_cast_fp16, y = reduce_mean_27_cast_fp16)[name = tensor("sub_18_cast_fp16")]; tensor square_9_cast_fp16 = square(x = sub_18_cast_fp16)[name = tensor("square_9_cast_fp16")]; tensor reduce_mean_29_axes_0 = const()[name = tensor("reduce_mean_29_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_29_keep_dims_0 = const()[name = tensor("reduce_mean_29_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_29_cast_fp16 = reduce_mean(axes = reduce_mean_29_axes_0, keep_dims = reduce_mean_29_keep_dims_0, x = square_9_cast_fp16)[name = tensor("reduce_mean_29_cast_fp16")]; tensor add_18_y_0_to_fp16 = const()[name = tensor("add_18_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_18_cast_fp16 = add(x = reduce_mean_29_cast_fp16, y = add_18_y_0_to_fp16)[name = tensor("add_18_cast_fp16")]; tensor sqrt_9_cast_fp16 = sqrt(x = add_18_cast_fp16)[name = tensor("sqrt_9_cast_fp16")]; tensor real_div_9_cast_fp16 = real_div(x = sub_18_cast_fp16, y = sqrt_9_cast_fp16)[name = tensor("real_div_9_cast_fp16")]; tensor reshape_37_shape_0 = const()[name = tensor("reshape_37_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_37_cast_fp16 = reshape(shape = reshape_37_shape_0, x = real_div_9_cast_fp16)[name = tensor("reshape_37_cast_fp16")]; tensor add_19_gamma_0_to_fp16 = const()[name = tensor("add_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56678976)))]; tensor add_19_beta_0_to_fp16 = const()[name = tensor("add_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56680320)))]; tensor add_19_epsilon_0_to_fp16 = const()[name = tensor("add_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_19_cast_fp16 = batch_norm(beta = add_19_beta_0_to_fp16, epsilon = add_19_epsilon_0_to_fp16, gamma = add_19_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_37_cast_fp16)[name = tensor("add_19_cast_fp16")]; tensor input_93_cast_fp16 = silu(x = add_19_cast_fp16)[name = tensor("input_93_cast_fp16")]; tensor var_3844 = const()[name = tensor("op_3844"), val = tensor([1, 1])]; tensor var_3846 = const()[name = tensor("op_3846"), val = tensor([1, 1])]; tensor hidden_states_55_pad_type_0 = const()[name = tensor("hidden_states_55_pad_type_0"), val = tensor("custom")]; tensor hidden_states_55_pad_0 = const()[name = tensor("hidden_states_55_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_1_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("down_blocks_1_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56681664)))]; tensor down_blocks_1_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("down_blocks_1_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64054528)))]; tensor hidden_states_55_cast_fp16 = conv(bias = down_blocks_1_resnets_1_conv1_bias_to_fp16, dilations = var_3846, groups = var_3111, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = var_3844, weight = down_blocks_1_resnets_1_conv1_weight_to_fp16, x = input_93_cast_fp16)[name = tensor("hidden_states_55_cast_fp16")]; tensor var_3852 = const()[name = tensor("op_3852"), val = tensor([1, 1])]; tensor var_3854 = const()[name = tensor("op_3854"), val = tensor([1, 1])]; tensor temb_7_pad_type_0 = const()[name = tensor("temb_7_pad_type_0"), val = tensor("custom")]; tensor temb_7_pad_0 = const()[name = tensor("temb_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("down_blocks_1_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64055872)))]; tensor down_blocks_1_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("down_blocks_1_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65694336)))]; tensor temb_7_cast_fp16 = conv(bias = down_blocks_1_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_3854, groups = var_3111, pad = temb_7_pad_0, pad_type = temb_7_pad_type_0, strides = var_3852, weight = down_blocks_1_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_7_cast_fp16")]; tensor input_97_cast_fp16 = add(x = hidden_states_55_cast_fp16, y = temb_7_cast_fp16)[name = tensor("input_97_cast_fp16")]; tensor reshape_40_shape_0 = const()[name = tensor("reshape_40_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_40_cast_fp16 = reshape(shape = reshape_40_shape_0, x = input_97_cast_fp16)[name = tensor("reshape_40_cast_fp16")]; tensor reduce_mean_30_axes_0 = const()[name = tensor("reduce_mean_30_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_30_keep_dims_0 = const()[name = tensor("reduce_mean_30_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_30_cast_fp16 = reduce_mean(axes = reduce_mean_30_axes_0, keep_dims = reduce_mean_30_keep_dims_0, x = reshape_40_cast_fp16)[name = tensor("reduce_mean_30_cast_fp16")]; tensor sub_20_cast_fp16 = sub(x = reshape_40_cast_fp16, y = reduce_mean_30_cast_fp16)[name = tensor("sub_20_cast_fp16")]; tensor square_10_cast_fp16 = square(x = sub_20_cast_fp16)[name = tensor("square_10_cast_fp16")]; tensor reduce_mean_32_axes_0 = const()[name = tensor("reduce_mean_32_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_32_keep_dims_0 = const()[name = tensor("reduce_mean_32_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_32_cast_fp16 = reduce_mean(axes = reduce_mean_32_axes_0, keep_dims = reduce_mean_32_keep_dims_0, x = square_10_cast_fp16)[name = tensor("reduce_mean_32_cast_fp16")]; tensor add_20_y_0_to_fp16 = const()[name = tensor("add_20_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_20_cast_fp16 = add(x = reduce_mean_32_cast_fp16, y = add_20_y_0_to_fp16)[name = tensor("add_20_cast_fp16")]; tensor sqrt_10_cast_fp16 = sqrt(x = add_20_cast_fp16)[name = tensor("sqrt_10_cast_fp16")]; tensor real_div_10_cast_fp16 = real_div(x = sub_20_cast_fp16, y = sqrt_10_cast_fp16)[name = tensor("real_div_10_cast_fp16")]; tensor reshape_41_shape_0 = const()[name = tensor("reshape_41_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_41_cast_fp16 = reshape(shape = reshape_41_shape_0, x = real_div_10_cast_fp16)[name = tensor("reshape_41_cast_fp16")]; tensor add_21_gamma_0_to_fp16 = const()[name = tensor("add_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65695680)))]; tensor add_21_beta_0_to_fp16 = const()[name = tensor("add_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65697024)))]; tensor add_21_epsilon_0_to_fp16 = const()[name = tensor("add_21_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_21_cast_fp16 = batch_norm(beta = add_21_beta_0_to_fp16, epsilon = add_21_epsilon_0_to_fp16, gamma = add_21_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_41_cast_fp16)[name = tensor("add_21_cast_fp16")]; tensor input_101_cast_fp16 = silu(x = add_21_cast_fp16)[name = tensor("input_101_cast_fp16")]; tensor var_3864 = const()[name = tensor("op_3864"), val = tensor([1, 1])]; tensor var_3866 = const()[name = tensor("op_3866"), val = tensor([1, 1])]; tensor hidden_states_57_pad_type_0 = const()[name = tensor("hidden_states_57_pad_type_0"), val = tensor("custom")]; tensor hidden_states_57_pad_0 = const()[name = tensor("hidden_states_57_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_1_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("down_blocks_1_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65698368)))]; tensor down_blocks_1_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("down_blocks_1_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73071232)))]; tensor hidden_states_57_cast_fp16 = conv(bias = down_blocks_1_resnets_1_conv2_bias_to_fp16, dilations = var_3866, groups = var_3111, pad = hidden_states_57_pad_0, pad_type = hidden_states_57_pad_type_0, strides = var_3864, weight = down_blocks_1_resnets_1_conv2_weight_to_fp16, x = input_101_cast_fp16)[name = tensor("hidden_states_57_cast_fp16")]; tensor hidden_states_59_cast_fp16 = add(x = input_89_cast_fp16, y = hidden_states_57_cast_fp16)[name = tensor("hidden_states_59_cast_fp16")]; tensor reshape_44_shape_0 = const()[name = tensor("reshape_44_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_44_cast_fp16 = reshape(shape = reshape_44_shape_0, x = hidden_states_59_cast_fp16)[name = tensor("reshape_44_cast_fp16")]; tensor reduce_mean_33_axes_0 = const()[name = tensor("reduce_mean_33_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_33_keep_dims_0 = const()[name = tensor("reduce_mean_33_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_33_cast_fp16 = reduce_mean(axes = reduce_mean_33_axes_0, keep_dims = reduce_mean_33_keep_dims_0, x = reshape_44_cast_fp16)[name = tensor("reduce_mean_33_cast_fp16")]; tensor sub_22_cast_fp16 = sub(x = reshape_44_cast_fp16, y = reduce_mean_33_cast_fp16)[name = tensor("sub_22_cast_fp16")]; tensor square_11_cast_fp16 = square(x = sub_22_cast_fp16)[name = tensor("square_11_cast_fp16")]; tensor reduce_mean_35_axes_0 = const()[name = tensor("reduce_mean_35_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_35_keep_dims_0 = const()[name = tensor("reduce_mean_35_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_35_cast_fp16 = reduce_mean(axes = reduce_mean_35_axes_0, keep_dims = reduce_mean_35_keep_dims_0, x = square_11_cast_fp16)[name = tensor("reduce_mean_35_cast_fp16")]; tensor add_22_y_0_to_fp16 = const()[name = tensor("add_22_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_22_cast_fp16 = add(x = reduce_mean_35_cast_fp16, y = add_22_y_0_to_fp16)[name = tensor("add_22_cast_fp16")]; tensor sqrt_11_cast_fp16 = sqrt(x = add_22_cast_fp16)[name = tensor("sqrt_11_cast_fp16")]; tensor real_div_11_cast_fp16 = real_div(x = sub_22_cast_fp16, y = sqrt_11_cast_fp16)[name = tensor("real_div_11_cast_fp16")]; tensor reshape_45_shape_0 = const()[name = tensor("reshape_45_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_45_cast_fp16 = reshape(shape = reshape_45_shape_0, x = real_div_11_cast_fp16)[name = tensor("reshape_45_cast_fp16")]; tensor add_23_gamma_0_to_fp16 = const()[name = tensor("add_23_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73072576)))]; tensor add_23_beta_0_to_fp16 = const()[name = tensor("add_23_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73073920)))]; tensor add_23_epsilon_0_to_fp16 = const()[name = tensor("add_23_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_23_cast_fp16 = batch_norm(beta = add_23_beta_0_to_fp16, epsilon = add_23_epsilon_0_to_fp16, gamma = add_23_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_45_cast_fp16)[name = tensor("add_23_cast_fp16")]; tensor var_3886 = const()[name = tensor("op_3886"), val = tensor([1, 1])]; tensor var_3888 = const()[name = tensor("op_3888"), val = tensor([1, 1])]; tensor hidden_states_61_pad_type_0 = const()[name = tensor("hidden_states_61_pad_type_0"), val = tensor("custom")]; tensor hidden_states_61_pad_0 = const()[name = tensor("hidden_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_proj_in_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73075264)))]; tensor down_blocks_1_attentions_1_proj_in_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73894528)))]; tensor hidden_states_61_cast_fp16 = conv(bias = down_blocks_1_attentions_1_proj_in_bias_to_fp16, dilations = var_3888, groups = var_3111, pad = hidden_states_61_pad_0, pad_type = hidden_states_61_pad_type_0, strides = var_3886, weight = down_blocks_1_attentions_1_proj_in_weight_to_fp16, x = add_23_cast_fp16)[name = tensor("hidden_states_61_cast_fp16")]; tensor var_3893 = const()[name = tensor("op_3893"), val = tensor([2, 640, 1, 1024])]; tensor inputs_19_cast_fp16 = reshape(shape = var_3893, x = hidden_states_61_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; tensor hidden_states_63_axes_0 = const()[name = tensor("hidden_states_63_axes_0"), val = tensor([1])]; tensor hidden_states_63_gamma_0_to_fp16 = const()[name = tensor("hidden_states_63_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73895872)))]; tensor hidden_states_63_beta_0_to_fp16 = const()[name = tensor("hidden_states_63_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73897216)))]; tensor var_3909_to_fp16 = const()[name = tensor("op_3909_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_63_cast_fp16 = layer_norm(axes = hidden_states_63_axes_0, beta = hidden_states_63_beta_0_to_fp16, epsilon = var_3909_to_fp16, gamma = hidden_states_63_gamma_0_to_fp16, x = inputs_19_cast_fp16)[name = tensor("hidden_states_63_cast_fp16")]; tensor var_3924 = const()[name = tensor("op_3924"), val = tensor([1, 1])]; tensor var_3926 = const()[name = tensor("op_3926"), val = tensor([1, 1])]; tensor q_13_pad_type_0 = const()[name = tensor("q_13_pad_type_0"), val = tensor("custom")]; tensor q_13_pad_0 = const()[name = tensor("q_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73898560)))]; tensor q_13_cast_fp16 = conv(dilations = var_3926, groups = var_3111, pad = q_13_pad_0, pad_type = q_13_pad_type_0, strides = var_3924, weight = down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_63_cast_fp16)[name = tensor("q_13_cast_fp16")]; tensor var_3930 = const()[name = tensor("op_3930"), val = tensor([1, 1])]; tensor var_3932 = const()[name = tensor("op_3932"), val = tensor([1, 1])]; tensor k_25_pad_type_0 = const()[name = tensor("k_25_pad_type_0"), val = tensor("custom")]; tensor k_25_pad_0 = const()[name = tensor("k_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(74717824)))]; tensor k_25_cast_fp16 = conv(dilations = var_3932, groups = var_3111, pad = k_25_pad_0, pad_type = k_25_pad_type_0, strides = var_3930, weight = down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_63_cast_fp16)[name = tensor("k_25_cast_fp16")]; tensor var_3936 = const()[name = tensor("op_3936"), val = tensor([1, 1])]; tensor var_3938 = const()[name = tensor("op_3938"), val = tensor([1, 1])]; tensor v_13_pad_type_0 = const()[name = tensor("v_13_pad_type_0"), val = tensor("custom")]; tensor v_13_pad_0 = const()[name = tensor("v_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(75537088)))]; tensor v_13_cast_fp16 = conv(dilations = var_3938, groups = var_3111, pad = v_13_pad_0, pad_type = v_13_pad_type_0, strides = var_3936, weight = down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_63_cast_fp16)[name = tensor("v_13_cast_fp16")]; tensor var_3942_begin_0 = const()[name = tensor("op_3942_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3942_end_0 = const()[name = tensor("op_3942_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3942_end_mask_0 = const()[name = tensor("op_3942_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3942_cast_fp16 = slice_by_index(begin = var_3942_begin_0, end = var_3942_end_0, end_mask = var_3942_end_mask_0, x = q_13_cast_fp16)[name = tensor("op_3942_cast_fp16")]; tensor var_3946_begin_0 = const()[name = tensor("op_3946_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_3946_end_0 = const()[name = tensor("op_3946_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_3946_end_mask_0 = const()[name = tensor("op_3946_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3946_cast_fp16 = slice_by_index(begin = var_3946_begin_0, end = var_3946_end_0, end_mask = var_3946_end_mask_0, x = q_13_cast_fp16)[name = tensor("op_3946_cast_fp16")]; tensor var_3950_begin_0 = const()[name = tensor("op_3950_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_3950_end_0 = const()[name = tensor("op_3950_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_3950_end_mask_0 = const()[name = tensor("op_3950_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3950_cast_fp16 = slice_by_index(begin = var_3950_begin_0, end = var_3950_end_0, end_mask = var_3950_end_mask_0, x = q_13_cast_fp16)[name = tensor("op_3950_cast_fp16")]; tensor var_3954_begin_0 = const()[name = tensor("op_3954_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_3954_end_0 = const()[name = tensor("op_3954_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_3954_end_mask_0 = const()[name = tensor("op_3954_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3954_cast_fp16 = slice_by_index(begin = var_3954_begin_0, end = var_3954_end_0, end_mask = var_3954_end_mask_0, x = q_13_cast_fp16)[name = tensor("op_3954_cast_fp16")]; tensor var_3958_begin_0 = const()[name = tensor("op_3958_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3958_end_0 = const()[name = tensor("op_3958_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_3958_end_mask_0 = const()[name = tensor("op_3958_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3958_cast_fp16 = slice_by_index(begin = var_3958_begin_0, end = var_3958_end_0, end_mask = var_3958_end_mask_0, x = q_13_cast_fp16)[name = tensor("op_3958_cast_fp16")]; tensor var_3962_begin_0 = const()[name = tensor("op_3962_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_3962_end_0 = const()[name = tensor("op_3962_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_3962_end_mask_0 = const()[name = tensor("op_3962_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3962_cast_fp16 = slice_by_index(begin = var_3962_begin_0, end = var_3962_end_0, end_mask = var_3962_end_mask_0, x = q_13_cast_fp16)[name = tensor("op_3962_cast_fp16")]; tensor var_3966_begin_0 = const()[name = tensor("op_3966_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_3966_end_0 = const()[name = tensor("op_3966_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_3966_end_mask_0 = const()[name = tensor("op_3966_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3966_cast_fp16 = slice_by_index(begin = var_3966_begin_0, end = var_3966_end_0, end_mask = var_3966_end_mask_0, x = q_13_cast_fp16)[name = tensor("op_3966_cast_fp16")]; tensor var_3970_begin_0 = const()[name = tensor("op_3970_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_3970_end_0 = const()[name = tensor("op_3970_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_3970_end_mask_0 = const()[name = tensor("op_3970_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3970_cast_fp16 = slice_by_index(begin = var_3970_begin_0, end = var_3970_end_0, end_mask = var_3970_end_mask_0, x = q_13_cast_fp16)[name = tensor("op_3970_cast_fp16")]; tensor var_3973_begin_0 = const()[name = tensor("op_3973_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3973_end_0 = const()[name = tensor("op_3973_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3973_end_mask_0 = const()[name = tensor("op_3973_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3973_cast_fp16 = slice_by_index(begin = var_3973_begin_0, end = var_3973_end_0, end_mask = var_3973_end_mask_0, x = var_3942_cast_fp16)[name = tensor("op_3973_cast_fp16")]; tensor var_3974_begin_0 = const()[name = tensor("op_3974_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3974_end_0 = const()[name = tensor("op_3974_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3974_end_mask_0 = const()[name = tensor("op_3974_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3974_cast_fp16 = slice_by_index(begin = var_3974_begin_0, end = var_3974_end_0, end_mask = var_3974_end_mask_0, x = var_3942_cast_fp16)[name = tensor("op_3974_cast_fp16")]; tensor var_3975_begin_0 = const()[name = tensor("op_3975_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3975_end_0 = const()[name = tensor("op_3975_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3975_end_mask_0 = const()[name = tensor("op_3975_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3975_cast_fp16 = slice_by_index(begin = var_3975_begin_0, end = var_3975_end_0, end_mask = var_3975_end_mask_0, x = var_3946_cast_fp16)[name = tensor("op_3975_cast_fp16")]; tensor var_3976_begin_0 = const()[name = tensor("op_3976_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3976_end_0 = const()[name = tensor("op_3976_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3976_end_mask_0 = const()[name = tensor("op_3976_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3976_cast_fp16 = slice_by_index(begin = var_3976_begin_0, end = var_3976_end_0, end_mask = var_3976_end_mask_0, x = var_3946_cast_fp16)[name = tensor("op_3976_cast_fp16")]; tensor var_3977_begin_0 = const()[name = tensor("op_3977_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3977_end_0 = const()[name = tensor("op_3977_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3977_end_mask_0 = const()[name = tensor("op_3977_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3977_cast_fp16 = slice_by_index(begin = var_3977_begin_0, end = var_3977_end_0, end_mask = var_3977_end_mask_0, x = var_3950_cast_fp16)[name = tensor("op_3977_cast_fp16")]; tensor var_3978_begin_0 = const()[name = tensor("op_3978_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3978_end_0 = const()[name = tensor("op_3978_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3978_end_mask_0 = const()[name = tensor("op_3978_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3978_cast_fp16 = slice_by_index(begin = var_3978_begin_0, end = var_3978_end_0, end_mask = var_3978_end_mask_0, x = var_3950_cast_fp16)[name = tensor("op_3978_cast_fp16")]; tensor var_3979_begin_0 = const()[name = tensor("op_3979_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3979_end_0 = const()[name = tensor("op_3979_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3979_end_mask_0 = const()[name = tensor("op_3979_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3979_cast_fp16 = slice_by_index(begin = var_3979_begin_0, end = var_3979_end_0, end_mask = var_3979_end_mask_0, x = var_3954_cast_fp16)[name = tensor("op_3979_cast_fp16")]; tensor var_3980_begin_0 = const()[name = tensor("op_3980_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3980_end_0 = const()[name = tensor("op_3980_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3980_end_mask_0 = const()[name = tensor("op_3980_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3980_cast_fp16 = slice_by_index(begin = var_3980_begin_0, end = var_3980_end_0, end_mask = var_3980_end_mask_0, x = var_3954_cast_fp16)[name = tensor("op_3980_cast_fp16")]; tensor var_3981_begin_0 = const()[name = tensor("op_3981_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3981_end_0 = const()[name = tensor("op_3981_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3981_end_mask_0 = const()[name = tensor("op_3981_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3981_cast_fp16 = slice_by_index(begin = var_3981_begin_0, end = var_3981_end_0, end_mask = var_3981_end_mask_0, x = var_3958_cast_fp16)[name = tensor("op_3981_cast_fp16")]; tensor var_3982_begin_0 = const()[name = tensor("op_3982_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3982_end_0 = const()[name = tensor("op_3982_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3982_end_mask_0 = const()[name = tensor("op_3982_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3982_cast_fp16 = slice_by_index(begin = var_3982_begin_0, end = var_3982_end_0, end_mask = var_3982_end_mask_0, x = var_3958_cast_fp16)[name = tensor("op_3982_cast_fp16")]; tensor var_3983_begin_0 = const()[name = tensor("op_3983_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3983_end_0 = const()[name = tensor("op_3983_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3983_end_mask_0 = const()[name = tensor("op_3983_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3983_cast_fp16 = slice_by_index(begin = var_3983_begin_0, end = var_3983_end_0, end_mask = var_3983_end_mask_0, x = var_3962_cast_fp16)[name = tensor("op_3983_cast_fp16")]; tensor var_3984_begin_0 = const()[name = tensor("op_3984_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3984_end_0 = const()[name = tensor("op_3984_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3984_end_mask_0 = const()[name = tensor("op_3984_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3984_cast_fp16 = slice_by_index(begin = var_3984_begin_0, end = var_3984_end_0, end_mask = var_3984_end_mask_0, x = var_3962_cast_fp16)[name = tensor("op_3984_cast_fp16")]; tensor var_3985_begin_0 = const()[name = tensor("op_3985_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3985_end_0 = const()[name = tensor("op_3985_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3985_end_mask_0 = const()[name = tensor("op_3985_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3985_cast_fp16 = slice_by_index(begin = var_3985_begin_0, end = var_3985_end_0, end_mask = var_3985_end_mask_0, x = var_3966_cast_fp16)[name = tensor("op_3985_cast_fp16")]; tensor var_3986_begin_0 = const()[name = tensor("op_3986_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3986_end_0 = const()[name = tensor("op_3986_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3986_end_mask_0 = const()[name = tensor("op_3986_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3986_cast_fp16 = slice_by_index(begin = var_3986_begin_0, end = var_3986_end_0, end_mask = var_3986_end_mask_0, x = var_3966_cast_fp16)[name = tensor("op_3986_cast_fp16")]; tensor var_3987_begin_0 = const()[name = tensor("op_3987_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3987_end_0 = const()[name = tensor("op_3987_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_3987_end_mask_0 = const()[name = tensor("op_3987_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3987_cast_fp16 = slice_by_index(begin = var_3987_begin_0, end = var_3987_end_0, end_mask = var_3987_end_mask_0, x = var_3970_cast_fp16)[name = tensor("op_3987_cast_fp16")]; tensor var_3988_begin_0 = const()[name = tensor("op_3988_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_3988_end_0 = const()[name = tensor("op_3988_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_3988_end_mask_0 = const()[name = tensor("op_3988_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3988_cast_fp16 = slice_by_index(begin = var_3988_begin_0, end = var_3988_end_0, end_mask = var_3988_end_mask_0, x = var_3970_cast_fp16)[name = tensor("op_3988_cast_fp16")]; tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_3993_begin_0 = const()[name = tensor("op_3993_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3993_end_0 = const()[name = tensor("op_3993_end_0"), val = tensor([2, 1024, 1, 80])]; tensor var_3993_end_mask_0 = const()[name = tensor("op_3993_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_25 = transpose(perm = k_27_perm_0, x = k_25_cast_fp16)[name = tensor("transpose_25")]; tensor var_3993_cast_fp16 = slice_by_index(begin = var_3993_begin_0, end = var_3993_end_0, end_mask = var_3993_end_mask_0, x = transpose_25)[name = tensor("op_3993_cast_fp16")]; tensor var_3997_begin_0 = const()[name = tensor("op_3997_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_3997_end_0 = const()[name = tensor("op_3997_end_0"), val = tensor([2, 1024, 1, 160])]; tensor var_3997_end_mask_0 = const()[name = tensor("op_3997_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3997_cast_fp16 = slice_by_index(begin = var_3997_begin_0, end = var_3997_end_0, end_mask = var_3997_end_mask_0, x = transpose_25)[name = tensor("op_3997_cast_fp16")]; tensor var_4001_begin_0 = const()[name = tensor("op_4001_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_4001_end_0 = const()[name = tensor("op_4001_end_0"), val = tensor([2, 1024, 1, 240])]; tensor var_4001_end_mask_0 = const()[name = tensor("op_4001_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4001_cast_fp16 = slice_by_index(begin = var_4001_begin_0, end = var_4001_end_0, end_mask = var_4001_end_mask_0, x = transpose_25)[name = tensor("op_4001_cast_fp16")]; tensor var_4005_begin_0 = const()[name = tensor("op_4005_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_4005_end_0 = const()[name = tensor("op_4005_end_0"), val = tensor([2, 1024, 1, 320])]; tensor var_4005_end_mask_0 = const()[name = tensor("op_4005_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4005_cast_fp16 = slice_by_index(begin = var_4005_begin_0, end = var_4005_end_0, end_mask = var_4005_end_mask_0, x = transpose_25)[name = tensor("op_4005_cast_fp16")]; tensor var_4009_begin_0 = const()[name = tensor("op_4009_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_4009_end_0 = const()[name = tensor("op_4009_end_0"), val = tensor([2, 1024, 1, 400])]; tensor var_4009_end_mask_0 = const()[name = tensor("op_4009_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4009_cast_fp16 = slice_by_index(begin = var_4009_begin_0, end = var_4009_end_0, end_mask = var_4009_end_mask_0, x = transpose_25)[name = tensor("op_4009_cast_fp16")]; tensor var_4013_begin_0 = const()[name = tensor("op_4013_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_4013_end_0 = const()[name = tensor("op_4013_end_0"), val = tensor([2, 1024, 1, 480])]; tensor var_4013_end_mask_0 = const()[name = tensor("op_4013_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4013_cast_fp16 = slice_by_index(begin = var_4013_begin_0, end = var_4013_end_0, end_mask = var_4013_end_mask_0, x = transpose_25)[name = tensor("op_4013_cast_fp16")]; tensor var_4017_begin_0 = const()[name = tensor("op_4017_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_4017_end_0 = const()[name = tensor("op_4017_end_0"), val = tensor([2, 1024, 1, 560])]; tensor var_4017_end_mask_0 = const()[name = tensor("op_4017_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4017_cast_fp16 = slice_by_index(begin = var_4017_begin_0, end = var_4017_end_0, end_mask = var_4017_end_mask_0, x = transpose_25)[name = tensor("op_4017_cast_fp16")]; tensor var_4021_begin_0 = const()[name = tensor("op_4021_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_4021_end_0 = const()[name = tensor("op_4021_end_0"), val = tensor([2, 1024, 1, 640])]; tensor var_4021_end_mask_0 = const()[name = tensor("op_4021_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4021_cast_fp16 = slice_by_index(begin = var_4021_begin_0, end = var_4021_end_0, end_mask = var_4021_end_mask_0, x = transpose_25)[name = tensor("op_4021_cast_fp16")]; tensor var_4023_begin_0 = const()[name = tensor("op_4023_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4023_end_0 = const()[name = tensor("op_4023_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4023_end_mask_0 = const()[name = tensor("op_4023_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4023_cast_fp16 = slice_by_index(begin = var_4023_begin_0, end = var_4023_end_0, end_mask = var_4023_end_mask_0, x = v_13_cast_fp16)[name = tensor("op_4023_cast_fp16")]; tensor var_4027_begin_0 = const()[name = tensor("op_4027_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_4027_end_0 = const()[name = tensor("op_4027_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_4027_end_mask_0 = const()[name = tensor("op_4027_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4027_cast_fp16 = slice_by_index(begin = var_4027_begin_0, end = var_4027_end_0, end_mask = var_4027_end_mask_0, x = v_13_cast_fp16)[name = tensor("op_4027_cast_fp16")]; tensor var_4031_begin_0 = const()[name = tensor("op_4031_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_4031_end_0 = const()[name = tensor("op_4031_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_4031_end_mask_0 = const()[name = tensor("op_4031_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4031_cast_fp16 = slice_by_index(begin = var_4031_begin_0, end = var_4031_end_0, end_mask = var_4031_end_mask_0, x = v_13_cast_fp16)[name = tensor("op_4031_cast_fp16")]; tensor var_4035_begin_0 = const()[name = tensor("op_4035_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_4035_end_0 = const()[name = tensor("op_4035_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_4035_end_mask_0 = const()[name = tensor("op_4035_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4035_cast_fp16 = slice_by_index(begin = var_4035_begin_0, end = var_4035_end_0, end_mask = var_4035_end_mask_0, x = v_13_cast_fp16)[name = tensor("op_4035_cast_fp16")]; tensor var_4039_begin_0 = const()[name = tensor("op_4039_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4039_end_0 = const()[name = tensor("op_4039_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_4039_end_mask_0 = const()[name = tensor("op_4039_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4039_cast_fp16 = slice_by_index(begin = var_4039_begin_0, end = var_4039_end_0, end_mask = var_4039_end_mask_0, x = v_13_cast_fp16)[name = tensor("op_4039_cast_fp16")]; tensor var_4043_begin_0 = const()[name = tensor("op_4043_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_4043_end_0 = const()[name = tensor("op_4043_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_4043_end_mask_0 = const()[name = tensor("op_4043_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4043_cast_fp16 = slice_by_index(begin = var_4043_begin_0, end = var_4043_end_0, end_mask = var_4043_end_mask_0, x = v_13_cast_fp16)[name = tensor("op_4043_cast_fp16")]; tensor var_4047_begin_0 = const()[name = tensor("op_4047_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_4047_end_0 = const()[name = tensor("op_4047_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_4047_end_mask_0 = const()[name = tensor("op_4047_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4047_cast_fp16 = slice_by_index(begin = var_4047_begin_0, end = var_4047_end_0, end_mask = var_4047_end_mask_0, x = v_13_cast_fp16)[name = tensor("op_4047_cast_fp16")]; tensor var_4051_begin_0 = const()[name = tensor("op_4051_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_4051_end_0 = const()[name = tensor("op_4051_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_4051_end_mask_0 = const()[name = tensor("op_4051_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4051_cast_fp16 = slice_by_index(begin = var_4051_begin_0, end = var_4051_end_0, end_mask = var_4051_end_mask_0, x = v_13_cast_fp16)[name = tensor("op_4051_cast_fp16")]; tensor var_4055_equation_0 = const()[name = tensor("op_4055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4055_cast_fp16 = einsum(equation = var_4055_equation_0, values = (var_3993_cast_fp16, var_3973_cast_fp16))[name = tensor("op_4055_cast_fp16")]; tensor var_4056_to_fp16 = const()[name = tensor("op_4056_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_577_cast_fp16 = mul(x = var_4055_cast_fp16, y = var_4056_to_fp16)[name = tensor("aw_chunk_577_cast_fp16")]; tensor var_4059_equation_0 = const()[name = tensor("op_4059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4059_cast_fp16 = einsum(equation = var_4059_equation_0, values = (var_3993_cast_fp16, var_3974_cast_fp16))[name = tensor("op_4059_cast_fp16")]; tensor var_4060_to_fp16 = const()[name = tensor("op_4060_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_579_cast_fp16 = mul(x = var_4059_cast_fp16, y = var_4060_to_fp16)[name = tensor("aw_chunk_579_cast_fp16")]; tensor var_4063_equation_0 = const()[name = tensor("op_4063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4063_cast_fp16 = einsum(equation = var_4063_equation_0, values = (var_3997_cast_fp16, var_3975_cast_fp16))[name = tensor("op_4063_cast_fp16")]; tensor var_4064_to_fp16 = const()[name = tensor("op_4064_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_581_cast_fp16 = mul(x = var_4063_cast_fp16, y = var_4064_to_fp16)[name = tensor("aw_chunk_581_cast_fp16")]; tensor var_4067_equation_0 = const()[name = tensor("op_4067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4067_cast_fp16 = einsum(equation = var_4067_equation_0, values = (var_3997_cast_fp16, var_3976_cast_fp16))[name = tensor("op_4067_cast_fp16")]; tensor var_4068_to_fp16 = const()[name = tensor("op_4068_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_583_cast_fp16 = mul(x = var_4067_cast_fp16, y = var_4068_to_fp16)[name = tensor("aw_chunk_583_cast_fp16")]; tensor var_4071_equation_0 = const()[name = tensor("op_4071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4071_cast_fp16 = einsum(equation = var_4071_equation_0, values = (var_4001_cast_fp16, var_3977_cast_fp16))[name = tensor("op_4071_cast_fp16")]; tensor var_4072_to_fp16 = const()[name = tensor("op_4072_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_585_cast_fp16 = mul(x = var_4071_cast_fp16, y = var_4072_to_fp16)[name = tensor("aw_chunk_585_cast_fp16")]; tensor var_4075_equation_0 = const()[name = tensor("op_4075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4075_cast_fp16 = einsum(equation = var_4075_equation_0, values = (var_4001_cast_fp16, var_3978_cast_fp16))[name = tensor("op_4075_cast_fp16")]; tensor var_4076_to_fp16 = const()[name = tensor("op_4076_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_587_cast_fp16 = mul(x = var_4075_cast_fp16, y = var_4076_to_fp16)[name = tensor("aw_chunk_587_cast_fp16")]; tensor var_4079_equation_0 = const()[name = tensor("op_4079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4079_cast_fp16 = einsum(equation = var_4079_equation_0, values = (var_4005_cast_fp16, var_3979_cast_fp16))[name = tensor("op_4079_cast_fp16")]; tensor var_4080_to_fp16 = const()[name = tensor("op_4080_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_589_cast_fp16 = mul(x = var_4079_cast_fp16, y = var_4080_to_fp16)[name = tensor("aw_chunk_589_cast_fp16")]; tensor var_4083_equation_0 = const()[name = tensor("op_4083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4083_cast_fp16 = einsum(equation = var_4083_equation_0, values = (var_4005_cast_fp16, var_3980_cast_fp16))[name = tensor("op_4083_cast_fp16")]; tensor var_4084_to_fp16 = const()[name = tensor("op_4084_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_591_cast_fp16 = mul(x = var_4083_cast_fp16, y = var_4084_to_fp16)[name = tensor("aw_chunk_591_cast_fp16")]; tensor var_4087_equation_0 = const()[name = tensor("op_4087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4087_cast_fp16 = einsum(equation = var_4087_equation_0, values = (var_4009_cast_fp16, var_3981_cast_fp16))[name = tensor("op_4087_cast_fp16")]; tensor var_4088_to_fp16 = const()[name = tensor("op_4088_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_593_cast_fp16 = mul(x = var_4087_cast_fp16, y = var_4088_to_fp16)[name = tensor("aw_chunk_593_cast_fp16")]; tensor var_4091_equation_0 = const()[name = tensor("op_4091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4091_cast_fp16 = einsum(equation = var_4091_equation_0, values = (var_4009_cast_fp16, var_3982_cast_fp16))[name = tensor("op_4091_cast_fp16")]; tensor var_4092_to_fp16 = const()[name = tensor("op_4092_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_595_cast_fp16 = mul(x = var_4091_cast_fp16, y = var_4092_to_fp16)[name = tensor("aw_chunk_595_cast_fp16")]; tensor var_4095_equation_0 = const()[name = tensor("op_4095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4095_cast_fp16 = einsum(equation = var_4095_equation_0, values = (var_4013_cast_fp16, var_3983_cast_fp16))[name = tensor("op_4095_cast_fp16")]; tensor var_4096_to_fp16 = const()[name = tensor("op_4096_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_597_cast_fp16 = mul(x = var_4095_cast_fp16, y = var_4096_to_fp16)[name = tensor("aw_chunk_597_cast_fp16")]; tensor var_4099_equation_0 = const()[name = tensor("op_4099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4099_cast_fp16 = einsum(equation = var_4099_equation_0, values = (var_4013_cast_fp16, var_3984_cast_fp16))[name = tensor("op_4099_cast_fp16")]; tensor var_4100_to_fp16 = const()[name = tensor("op_4100_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_599_cast_fp16 = mul(x = var_4099_cast_fp16, y = var_4100_to_fp16)[name = tensor("aw_chunk_599_cast_fp16")]; tensor var_4103_equation_0 = const()[name = tensor("op_4103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4103_cast_fp16 = einsum(equation = var_4103_equation_0, values = (var_4017_cast_fp16, var_3985_cast_fp16))[name = tensor("op_4103_cast_fp16")]; tensor var_4104_to_fp16 = const()[name = tensor("op_4104_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_601_cast_fp16 = mul(x = var_4103_cast_fp16, y = var_4104_to_fp16)[name = tensor("aw_chunk_601_cast_fp16")]; tensor var_4107_equation_0 = const()[name = tensor("op_4107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4107_cast_fp16 = einsum(equation = var_4107_equation_0, values = (var_4017_cast_fp16, var_3986_cast_fp16))[name = tensor("op_4107_cast_fp16")]; tensor var_4108_to_fp16 = const()[name = tensor("op_4108_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_603_cast_fp16 = mul(x = var_4107_cast_fp16, y = var_4108_to_fp16)[name = tensor("aw_chunk_603_cast_fp16")]; tensor var_4111_equation_0 = const()[name = tensor("op_4111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4111_cast_fp16 = einsum(equation = var_4111_equation_0, values = (var_4021_cast_fp16, var_3987_cast_fp16))[name = tensor("op_4111_cast_fp16")]; tensor var_4112_to_fp16 = const()[name = tensor("op_4112_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_605_cast_fp16 = mul(x = var_4111_cast_fp16, y = var_4112_to_fp16)[name = tensor("aw_chunk_605_cast_fp16")]; tensor var_4115_equation_0 = const()[name = tensor("op_4115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4115_cast_fp16 = einsum(equation = var_4115_equation_0, values = (var_4021_cast_fp16, var_3988_cast_fp16))[name = tensor("op_4115_cast_fp16")]; tensor var_4116_to_fp16 = const()[name = tensor("op_4116_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_607_cast_fp16 = mul(x = var_4115_cast_fp16, y = var_4116_to_fp16)[name = tensor("aw_chunk_607_cast_fp16")]; tensor var_4118_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_577_cast_fp16)[name = tensor("op_4118_cast_fp16")]; tensor var_4119_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_579_cast_fp16)[name = tensor("op_4119_cast_fp16")]; tensor var_4120_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_581_cast_fp16)[name = tensor("op_4120_cast_fp16")]; tensor var_4121_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_583_cast_fp16)[name = tensor("op_4121_cast_fp16")]; tensor var_4122_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_585_cast_fp16)[name = tensor("op_4122_cast_fp16")]; tensor var_4123_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_587_cast_fp16)[name = tensor("op_4123_cast_fp16")]; tensor var_4124_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_589_cast_fp16)[name = tensor("op_4124_cast_fp16")]; tensor var_4125_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_591_cast_fp16)[name = tensor("op_4125_cast_fp16")]; tensor var_4126_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_593_cast_fp16)[name = tensor("op_4126_cast_fp16")]; tensor var_4127_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_595_cast_fp16)[name = tensor("op_4127_cast_fp16")]; tensor var_4128_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_597_cast_fp16)[name = tensor("op_4128_cast_fp16")]; tensor var_4129_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_599_cast_fp16)[name = tensor("op_4129_cast_fp16")]; tensor var_4130_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_601_cast_fp16)[name = tensor("op_4130_cast_fp16")]; tensor var_4131_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_603_cast_fp16)[name = tensor("op_4131_cast_fp16")]; tensor var_4132_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_605_cast_fp16)[name = tensor("op_4132_cast_fp16")]; tensor var_4133_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_607_cast_fp16)[name = tensor("op_4133_cast_fp16")]; tensor var_4135_equation_0 = const()[name = tensor("op_4135_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4135_cast_fp16 = einsum(equation = var_4135_equation_0, values = (var_4023_cast_fp16, var_4118_cast_fp16))[name = tensor("op_4135_cast_fp16")]; tensor var_4137_equation_0 = const()[name = tensor("op_4137_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4137_cast_fp16 = einsum(equation = var_4137_equation_0, values = (var_4023_cast_fp16, var_4119_cast_fp16))[name = tensor("op_4137_cast_fp16")]; tensor var_4139_equation_0 = const()[name = tensor("op_4139_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4139_cast_fp16 = einsum(equation = var_4139_equation_0, values = (var_4027_cast_fp16, var_4120_cast_fp16))[name = tensor("op_4139_cast_fp16")]; tensor var_4141_equation_0 = const()[name = tensor("op_4141_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4141_cast_fp16 = einsum(equation = var_4141_equation_0, values = (var_4027_cast_fp16, var_4121_cast_fp16))[name = tensor("op_4141_cast_fp16")]; tensor var_4143_equation_0 = const()[name = tensor("op_4143_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4143_cast_fp16 = einsum(equation = var_4143_equation_0, values = (var_4031_cast_fp16, var_4122_cast_fp16))[name = tensor("op_4143_cast_fp16")]; tensor var_4145_equation_0 = const()[name = tensor("op_4145_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4145_cast_fp16 = einsum(equation = var_4145_equation_0, values = (var_4031_cast_fp16, var_4123_cast_fp16))[name = tensor("op_4145_cast_fp16")]; tensor var_4147_equation_0 = const()[name = tensor("op_4147_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4147_cast_fp16 = einsum(equation = var_4147_equation_0, values = (var_4035_cast_fp16, var_4124_cast_fp16))[name = tensor("op_4147_cast_fp16")]; tensor var_4149_equation_0 = const()[name = tensor("op_4149_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4149_cast_fp16 = einsum(equation = var_4149_equation_0, values = (var_4035_cast_fp16, var_4125_cast_fp16))[name = tensor("op_4149_cast_fp16")]; tensor var_4151_equation_0 = const()[name = tensor("op_4151_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4151_cast_fp16 = einsum(equation = var_4151_equation_0, values = (var_4039_cast_fp16, var_4126_cast_fp16))[name = tensor("op_4151_cast_fp16")]; tensor var_4153_equation_0 = const()[name = tensor("op_4153_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4153_cast_fp16 = einsum(equation = var_4153_equation_0, values = (var_4039_cast_fp16, var_4127_cast_fp16))[name = tensor("op_4153_cast_fp16")]; tensor var_4155_equation_0 = const()[name = tensor("op_4155_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4155_cast_fp16 = einsum(equation = var_4155_equation_0, values = (var_4043_cast_fp16, var_4128_cast_fp16))[name = tensor("op_4155_cast_fp16")]; tensor var_4157_equation_0 = const()[name = tensor("op_4157_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4157_cast_fp16 = einsum(equation = var_4157_equation_0, values = (var_4043_cast_fp16, var_4129_cast_fp16))[name = tensor("op_4157_cast_fp16")]; tensor var_4159_equation_0 = const()[name = tensor("op_4159_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4159_cast_fp16 = einsum(equation = var_4159_equation_0, values = (var_4047_cast_fp16, var_4130_cast_fp16))[name = tensor("op_4159_cast_fp16")]; tensor var_4161_equation_0 = const()[name = tensor("op_4161_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4161_cast_fp16 = einsum(equation = var_4161_equation_0, values = (var_4047_cast_fp16, var_4131_cast_fp16))[name = tensor("op_4161_cast_fp16")]; tensor var_4163_equation_0 = const()[name = tensor("op_4163_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4163_cast_fp16 = einsum(equation = var_4163_equation_0, values = (var_4051_cast_fp16, var_4132_cast_fp16))[name = tensor("op_4163_cast_fp16")]; tensor var_4165_equation_0 = const()[name = tensor("op_4165_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4165_cast_fp16 = einsum(equation = var_4165_equation_0, values = (var_4051_cast_fp16, var_4133_cast_fp16))[name = tensor("op_4165_cast_fp16")]; tensor var_4167_interleave_0 = const()[name = tensor("op_4167_interleave_0"), val = tensor(false)]; tensor var_4167_cast_fp16 = concat(axis = var_3089, interleave = var_4167_interleave_0, values = (var_4135_cast_fp16, var_4137_cast_fp16))[name = tensor("op_4167_cast_fp16")]; tensor var_4169_interleave_0 = const()[name = tensor("op_4169_interleave_0"), val = tensor(false)]; tensor var_4169_cast_fp16 = concat(axis = var_3089, interleave = var_4169_interleave_0, values = (var_4139_cast_fp16, var_4141_cast_fp16))[name = tensor("op_4169_cast_fp16")]; tensor var_4171_interleave_0 = const()[name = tensor("op_4171_interleave_0"), val = tensor(false)]; tensor var_4171_cast_fp16 = concat(axis = var_3089, interleave = var_4171_interleave_0, values = (var_4143_cast_fp16, var_4145_cast_fp16))[name = tensor("op_4171_cast_fp16")]; tensor var_4173_interleave_0 = const()[name = tensor("op_4173_interleave_0"), val = tensor(false)]; tensor var_4173_cast_fp16 = concat(axis = var_3089, interleave = var_4173_interleave_0, values = (var_4147_cast_fp16, var_4149_cast_fp16))[name = tensor("op_4173_cast_fp16")]; tensor var_4175_interleave_0 = const()[name = tensor("op_4175_interleave_0"), val = tensor(false)]; tensor var_4175_cast_fp16 = concat(axis = var_3089, interleave = var_4175_interleave_0, values = (var_4151_cast_fp16, var_4153_cast_fp16))[name = tensor("op_4175_cast_fp16")]; tensor var_4177_interleave_0 = const()[name = tensor("op_4177_interleave_0"), val = tensor(false)]; tensor var_4177_cast_fp16 = concat(axis = var_3089, interleave = var_4177_interleave_0, values = (var_4155_cast_fp16, var_4157_cast_fp16))[name = tensor("op_4177_cast_fp16")]; tensor var_4179_interleave_0 = const()[name = tensor("op_4179_interleave_0"), val = tensor(false)]; tensor var_4179_cast_fp16 = concat(axis = var_3089, interleave = var_4179_interleave_0, values = (var_4159_cast_fp16, var_4161_cast_fp16))[name = tensor("op_4179_cast_fp16")]; tensor var_4181_interleave_0 = const()[name = tensor("op_4181_interleave_0"), val = tensor(false)]; tensor var_4181_cast_fp16 = concat(axis = var_3089, interleave = var_4181_interleave_0, values = (var_4163_cast_fp16, var_4165_cast_fp16))[name = tensor("op_4181_cast_fp16")]; tensor input_105_interleave_0 = const()[name = tensor("input_105_interleave_0"), val = tensor(false)]; tensor input_105_cast_fp16 = concat(axis = var_3111, interleave = input_105_interleave_0, values = (var_4167_cast_fp16, var_4169_cast_fp16, var_4171_cast_fp16, var_4173_cast_fp16, var_4175_cast_fp16, var_4177_cast_fp16, var_4179_cast_fp16, var_4181_cast_fp16))[name = tensor("input_105_cast_fp16")]; tensor var_4187 = const()[name = tensor("op_4187"), val = tensor([1, 1])]; tensor var_4189 = const()[name = tensor("op_4189"), val = tensor([1, 1])]; tensor var_4191_pad_type_0 = const()[name = tensor("op_4191_pad_type_0"), val = tensor("custom")]; tensor var_4191_pad_0 = const()[name = tensor("op_4191_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76356352)))]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77175616)))]; tensor var_4191_cast_fp16 = conv(bias = down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_4189, groups = var_3111, pad = var_4191_pad_0, pad_type = var_4191_pad_type_0, strides = var_4187, weight = down_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_105_cast_fp16)[name = tensor("op_4191_cast_fp16")]; tensor inputs_21_cast_fp16 = add(x = var_4191_cast_fp16, y = inputs_19_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; tensor hidden_states_65_axes_0 = const()[name = tensor("hidden_states_65_axes_0"), val = tensor([1])]; tensor hidden_states_65_gamma_0_to_fp16 = const()[name = tensor("hidden_states_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77176960)))]; tensor hidden_states_65_beta_0_to_fp16 = const()[name = tensor("hidden_states_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77178304)))]; tensor var_4201_to_fp16 = const()[name = tensor("op_4201_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_65_cast_fp16 = layer_norm(axes = hidden_states_65_axes_0, beta = hidden_states_65_beta_0_to_fp16, epsilon = var_4201_to_fp16, gamma = hidden_states_65_gamma_0_to_fp16, x = inputs_21_cast_fp16)[name = tensor("hidden_states_65_cast_fp16")]; tensor var_4216 = const()[name = tensor("op_4216"), val = tensor([1, 1])]; tensor var_4218 = const()[name = tensor("op_4218"), val = tensor([1, 1])]; tensor q_15_pad_type_0 = const()[name = tensor("q_15_pad_type_0"), val = tensor("custom")]; tensor q_15_pad_0 = const()[name = tensor("q_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77179648)))]; tensor q_15_cast_fp16 = conv(dilations = var_4218, groups = var_3111, pad = q_15_pad_0, pad_type = q_15_pad_type_0, strides = var_4216, weight = down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_65_cast_fp16)[name = tensor("q_15_cast_fp16")]; tensor var_4222 = const()[name = tensor("op_4222"), val = tensor([1, 1])]; tensor var_4224 = const()[name = tensor("op_4224"), val = tensor([1, 1])]; tensor k_29_pad_type_0 = const()[name = tensor("k_29_pad_type_0"), val = tensor("custom")]; tensor k_29_pad_0 = const()[name = tensor("k_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77998912)))]; tensor k_29_cast_fp16 = conv(dilations = var_4224, groups = var_3111, pad = k_29_pad_0, pad_type = k_29_pad_type_0, strides = var_4222, weight = down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_29_cast_fp16")]; tensor var_4228 = const()[name = tensor("op_4228"), val = tensor([1, 1])]; tensor var_4230 = const()[name = tensor("op_4230"), val = tensor([1, 1])]; tensor v_15_pad_type_0 = const()[name = tensor("v_15_pad_type_0"), val = tensor("custom")]; tensor v_15_pad_0 = const()[name = tensor("v_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78982016)))]; tensor v_15_cast_fp16 = conv(dilations = var_4230, groups = var_3111, pad = v_15_pad_0, pad_type = v_15_pad_type_0, strides = var_4228, weight = down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_15_cast_fp16")]; tensor var_4234_begin_0 = const()[name = tensor("op_4234_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4234_end_0 = const()[name = tensor("op_4234_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4234_end_mask_0 = const()[name = tensor("op_4234_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4234_cast_fp16 = slice_by_index(begin = var_4234_begin_0, end = var_4234_end_0, end_mask = var_4234_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_4234_cast_fp16")]; tensor var_4238_begin_0 = const()[name = tensor("op_4238_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_4238_end_0 = const()[name = tensor("op_4238_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_4238_end_mask_0 = const()[name = tensor("op_4238_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4238_cast_fp16 = slice_by_index(begin = var_4238_begin_0, end = var_4238_end_0, end_mask = var_4238_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_4238_cast_fp16")]; tensor var_4242_begin_0 = const()[name = tensor("op_4242_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_4242_end_0 = const()[name = tensor("op_4242_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_4242_end_mask_0 = const()[name = tensor("op_4242_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4242_cast_fp16 = slice_by_index(begin = var_4242_begin_0, end = var_4242_end_0, end_mask = var_4242_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_4242_cast_fp16")]; tensor var_4246_begin_0 = const()[name = tensor("op_4246_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_4246_end_0 = const()[name = tensor("op_4246_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_4246_end_mask_0 = const()[name = tensor("op_4246_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4246_cast_fp16 = slice_by_index(begin = var_4246_begin_0, end = var_4246_end_0, end_mask = var_4246_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_4246_cast_fp16")]; tensor var_4250_begin_0 = const()[name = tensor("op_4250_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4250_end_0 = const()[name = tensor("op_4250_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_4250_end_mask_0 = const()[name = tensor("op_4250_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4250_cast_fp16 = slice_by_index(begin = var_4250_begin_0, end = var_4250_end_0, end_mask = var_4250_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_4250_cast_fp16")]; tensor var_4254_begin_0 = const()[name = tensor("op_4254_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_4254_end_0 = const()[name = tensor("op_4254_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_4254_end_mask_0 = const()[name = tensor("op_4254_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4254_cast_fp16 = slice_by_index(begin = var_4254_begin_0, end = var_4254_end_0, end_mask = var_4254_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_4254_cast_fp16")]; tensor var_4258_begin_0 = const()[name = tensor("op_4258_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_4258_end_0 = const()[name = tensor("op_4258_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_4258_end_mask_0 = const()[name = tensor("op_4258_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4258_cast_fp16 = slice_by_index(begin = var_4258_begin_0, end = var_4258_end_0, end_mask = var_4258_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_4258_cast_fp16")]; tensor var_4262_begin_0 = const()[name = tensor("op_4262_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_4262_end_0 = const()[name = tensor("op_4262_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_4262_end_mask_0 = const()[name = tensor("op_4262_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4262_cast_fp16 = slice_by_index(begin = var_4262_begin_0, end = var_4262_end_0, end_mask = var_4262_end_mask_0, x = q_15_cast_fp16)[name = tensor("op_4262_cast_fp16")]; tensor var_4265_begin_0 = const()[name = tensor("op_4265_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4265_end_0 = const()[name = tensor("op_4265_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_4265_end_mask_0 = const()[name = tensor("op_4265_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4265_cast_fp16 = slice_by_index(begin = var_4265_begin_0, end = var_4265_end_0, end_mask = var_4265_end_mask_0, x = var_4234_cast_fp16)[name = tensor("op_4265_cast_fp16")]; tensor var_4266_begin_0 = const()[name = tensor("op_4266_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4266_end_0 = const()[name = tensor("op_4266_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4266_end_mask_0 = const()[name = tensor("op_4266_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4266_cast_fp16 = slice_by_index(begin = var_4266_begin_0, end = var_4266_end_0, end_mask = var_4266_end_mask_0, x = var_4234_cast_fp16)[name = tensor("op_4266_cast_fp16")]; tensor var_4267_begin_0 = const()[name = tensor("op_4267_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4267_end_0 = const()[name = tensor("op_4267_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_4267_end_mask_0 = const()[name = tensor("op_4267_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4267_cast_fp16 = slice_by_index(begin = var_4267_begin_0, end = var_4267_end_0, end_mask = var_4267_end_mask_0, x = var_4238_cast_fp16)[name = tensor("op_4267_cast_fp16")]; tensor var_4268_begin_0 = const()[name = tensor("op_4268_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4268_end_0 = const()[name = tensor("op_4268_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4268_end_mask_0 = const()[name = tensor("op_4268_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4268_cast_fp16 = slice_by_index(begin = var_4268_begin_0, end = var_4268_end_0, end_mask = var_4268_end_mask_0, x = var_4238_cast_fp16)[name = tensor("op_4268_cast_fp16")]; tensor var_4269_begin_0 = const()[name = tensor("op_4269_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4269_end_0 = const()[name = tensor("op_4269_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_4269_end_mask_0 = const()[name = tensor("op_4269_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4269_cast_fp16 = slice_by_index(begin = var_4269_begin_0, end = var_4269_end_0, end_mask = var_4269_end_mask_0, x = var_4242_cast_fp16)[name = tensor("op_4269_cast_fp16")]; tensor var_4270_begin_0 = const()[name = tensor("op_4270_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4270_end_0 = const()[name = tensor("op_4270_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4270_end_mask_0 = const()[name = tensor("op_4270_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4270_cast_fp16 = slice_by_index(begin = var_4270_begin_0, end = var_4270_end_0, end_mask = var_4270_end_mask_0, x = var_4242_cast_fp16)[name = tensor("op_4270_cast_fp16")]; tensor var_4271_begin_0 = const()[name = tensor("op_4271_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4271_end_0 = const()[name = tensor("op_4271_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_4271_end_mask_0 = const()[name = tensor("op_4271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4271_cast_fp16 = slice_by_index(begin = var_4271_begin_0, end = var_4271_end_0, end_mask = var_4271_end_mask_0, x = var_4246_cast_fp16)[name = tensor("op_4271_cast_fp16")]; tensor var_4272_begin_0 = const()[name = tensor("op_4272_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4272_end_0 = const()[name = tensor("op_4272_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4272_end_mask_0 = const()[name = tensor("op_4272_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4272_cast_fp16 = slice_by_index(begin = var_4272_begin_0, end = var_4272_end_0, end_mask = var_4272_end_mask_0, x = var_4246_cast_fp16)[name = tensor("op_4272_cast_fp16")]; tensor var_4273_begin_0 = const()[name = tensor("op_4273_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4273_end_0 = const()[name = tensor("op_4273_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_4273_end_mask_0 = const()[name = tensor("op_4273_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4273_cast_fp16 = slice_by_index(begin = var_4273_begin_0, end = var_4273_end_0, end_mask = var_4273_end_mask_0, x = var_4250_cast_fp16)[name = tensor("op_4273_cast_fp16")]; tensor var_4274_begin_0 = const()[name = tensor("op_4274_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4274_end_0 = const()[name = tensor("op_4274_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4274_end_mask_0 = const()[name = tensor("op_4274_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4274_cast_fp16 = slice_by_index(begin = var_4274_begin_0, end = var_4274_end_0, end_mask = var_4274_end_mask_0, x = var_4250_cast_fp16)[name = tensor("op_4274_cast_fp16")]; tensor var_4275_begin_0 = const()[name = tensor("op_4275_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4275_end_0 = const()[name = tensor("op_4275_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_4275_end_mask_0 = const()[name = tensor("op_4275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4275_cast_fp16 = slice_by_index(begin = var_4275_begin_0, end = var_4275_end_0, end_mask = var_4275_end_mask_0, x = var_4254_cast_fp16)[name = tensor("op_4275_cast_fp16")]; tensor var_4276_begin_0 = const()[name = tensor("op_4276_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4276_end_0 = const()[name = tensor("op_4276_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4276_end_mask_0 = const()[name = tensor("op_4276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4276_cast_fp16 = slice_by_index(begin = var_4276_begin_0, end = var_4276_end_0, end_mask = var_4276_end_mask_0, x = var_4254_cast_fp16)[name = tensor("op_4276_cast_fp16")]; tensor var_4277_begin_0 = const()[name = tensor("op_4277_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4277_end_0 = const()[name = tensor("op_4277_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_4277_end_mask_0 = const()[name = tensor("op_4277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4277_cast_fp16 = slice_by_index(begin = var_4277_begin_0, end = var_4277_end_0, end_mask = var_4277_end_mask_0, x = var_4258_cast_fp16)[name = tensor("op_4277_cast_fp16")]; tensor var_4278_begin_0 = const()[name = tensor("op_4278_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4278_end_0 = const()[name = tensor("op_4278_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4278_end_mask_0 = const()[name = tensor("op_4278_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4278_cast_fp16 = slice_by_index(begin = var_4278_begin_0, end = var_4278_end_0, end_mask = var_4278_end_mask_0, x = var_4258_cast_fp16)[name = tensor("op_4278_cast_fp16")]; tensor var_4279_begin_0 = const()[name = tensor("op_4279_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4279_end_0 = const()[name = tensor("op_4279_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_4279_end_mask_0 = const()[name = tensor("op_4279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4279_cast_fp16 = slice_by_index(begin = var_4279_begin_0, end = var_4279_end_0, end_mask = var_4279_end_mask_0, x = var_4262_cast_fp16)[name = tensor("op_4279_cast_fp16")]; tensor var_4280_begin_0 = const()[name = tensor("op_4280_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4280_end_0 = const()[name = tensor("op_4280_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_4280_end_mask_0 = const()[name = tensor("op_4280_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4280_cast_fp16 = slice_by_index(begin = var_4280_begin_0, end = var_4280_end_0, end_mask = var_4280_end_mask_0, x = var_4262_cast_fp16)[name = tensor("op_4280_cast_fp16")]; tensor k_31_perm_0 = const()[name = tensor("k_31_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_4285_begin_0 = const()[name = tensor("op_4285_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4285_end_0 = const()[name = tensor("op_4285_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_4285_end_mask_0 = const()[name = tensor("op_4285_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_24 = transpose(perm = k_31_perm_0, x = k_29_cast_fp16)[name = tensor("transpose_24")]; tensor var_4285_cast_fp16 = slice_by_index(begin = var_4285_begin_0, end = var_4285_end_0, end_mask = var_4285_end_mask_0, x = transpose_24)[name = tensor("op_4285_cast_fp16")]; tensor var_4289_begin_0 = const()[name = tensor("op_4289_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_4289_end_0 = const()[name = tensor("op_4289_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_4289_end_mask_0 = const()[name = tensor("op_4289_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4289_cast_fp16 = slice_by_index(begin = var_4289_begin_0, end = var_4289_end_0, end_mask = var_4289_end_mask_0, x = transpose_24)[name = tensor("op_4289_cast_fp16")]; tensor var_4293_begin_0 = const()[name = tensor("op_4293_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_4293_end_0 = const()[name = tensor("op_4293_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_4293_end_mask_0 = const()[name = tensor("op_4293_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4293_cast_fp16 = slice_by_index(begin = var_4293_begin_0, end = var_4293_end_0, end_mask = var_4293_end_mask_0, x = transpose_24)[name = tensor("op_4293_cast_fp16")]; tensor var_4297_begin_0 = const()[name = tensor("op_4297_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_4297_end_0 = const()[name = tensor("op_4297_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_4297_end_mask_0 = const()[name = tensor("op_4297_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4297_cast_fp16 = slice_by_index(begin = var_4297_begin_0, end = var_4297_end_0, end_mask = var_4297_end_mask_0, x = transpose_24)[name = tensor("op_4297_cast_fp16")]; tensor var_4301_begin_0 = const()[name = tensor("op_4301_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_4301_end_0 = const()[name = tensor("op_4301_end_0"), val = tensor([2, 77, 1, 400])]; tensor var_4301_end_mask_0 = const()[name = tensor("op_4301_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4301_cast_fp16 = slice_by_index(begin = var_4301_begin_0, end = var_4301_end_0, end_mask = var_4301_end_mask_0, x = transpose_24)[name = tensor("op_4301_cast_fp16")]; tensor var_4305_begin_0 = const()[name = tensor("op_4305_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_4305_end_0 = const()[name = tensor("op_4305_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_4305_end_mask_0 = const()[name = tensor("op_4305_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4305_cast_fp16 = slice_by_index(begin = var_4305_begin_0, end = var_4305_end_0, end_mask = var_4305_end_mask_0, x = transpose_24)[name = tensor("op_4305_cast_fp16")]; tensor var_4309_begin_0 = const()[name = tensor("op_4309_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_4309_end_0 = const()[name = tensor("op_4309_end_0"), val = tensor([2, 77, 1, 560])]; tensor var_4309_end_mask_0 = const()[name = tensor("op_4309_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4309_cast_fp16 = slice_by_index(begin = var_4309_begin_0, end = var_4309_end_0, end_mask = var_4309_end_mask_0, x = transpose_24)[name = tensor("op_4309_cast_fp16")]; tensor var_4313_begin_0 = const()[name = tensor("op_4313_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_4313_end_0 = const()[name = tensor("op_4313_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_4313_end_mask_0 = const()[name = tensor("op_4313_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4313_cast_fp16 = slice_by_index(begin = var_4313_begin_0, end = var_4313_end_0, end_mask = var_4313_end_mask_0, x = transpose_24)[name = tensor("op_4313_cast_fp16")]; tensor var_4315_begin_0 = const()[name = tensor("op_4315_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4315_end_0 = const()[name = tensor("op_4315_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_4315_end_mask_0 = const()[name = tensor("op_4315_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4315_cast_fp16 = slice_by_index(begin = var_4315_begin_0, end = var_4315_end_0, end_mask = var_4315_end_mask_0, x = v_15_cast_fp16)[name = tensor("op_4315_cast_fp16")]; tensor var_4319_begin_0 = const()[name = tensor("op_4319_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_4319_end_0 = const()[name = tensor("op_4319_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_4319_end_mask_0 = const()[name = tensor("op_4319_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4319_cast_fp16 = slice_by_index(begin = var_4319_begin_0, end = var_4319_end_0, end_mask = var_4319_end_mask_0, x = v_15_cast_fp16)[name = tensor("op_4319_cast_fp16")]; tensor var_4323_begin_0 = const()[name = tensor("op_4323_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_4323_end_0 = const()[name = tensor("op_4323_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_4323_end_mask_0 = const()[name = tensor("op_4323_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4323_cast_fp16 = slice_by_index(begin = var_4323_begin_0, end = var_4323_end_0, end_mask = var_4323_end_mask_0, x = v_15_cast_fp16)[name = tensor("op_4323_cast_fp16")]; tensor var_4327_begin_0 = const()[name = tensor("op_4327_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_4327_end_0 = const()[name = tensor("op_4327_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_4327_end_mask_0 = const()[name = tensor("op_4327_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4327_cast_fp16 = slice_by_index(begin = var_4327_begin_0, end = var_4327_end_0, end_mask = var_4327_end_mask_0, x = v_15_cast_fp16)[name = tensor("op_4327_cast_fp16")]; tensor var_4331_begin_0 = const()[name = tensor("op_4331_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4331_end_0 = const()[name = tensor("op_4331_end_0"), val = tensor([2, 400, 1, 77])]; tensor var_4331_end_mask_0 = const()[name = tensor("op_4331_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4331_cast_fp16 = slice_by_index(begin = var_4331_begin_0, end = var_4331_end_0, end_mask = var_4331_end_mask_0, x = v_15_cast_fp16)[name = tensor("op_4331_cast_fp16")]; tensor var_4335_begin_0 = const()[name = tensor("op_4335_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_4335_end_0 = const()[name = tensor("op_4335_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_4335_end_mask_0 = const()[name = tensor("op_4335_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4335_cast_fp16 = slice_by_index(begin = var_4335_begin_0, end = var_4335_end_0, end_mask = var_4335_end_mask_0, x = v_15_cast_fp16)[name = tensor("op_4335_cast_fp16")]; tensor var_4339_begin_0 = const()[name = tensor("op_4339_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_4339_end_0 = const()[name = tensor("op_4339_end_0"), val = tensor([2, 560, 1, 77])]; tensor var_4339_end_mask_0 = const()[name = tensor("op_4339_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4339_cast_fp16 = slice_by_index(begin = var_4339_begin_0, end = var_4339_end_0, end_mask = var_4339_end_mask_0, x = v_15_cast_fp16)[name = tensor("op_4339_cast_fp16")]; tensor var_4343_begin_0 = const()[name = tensor("op_4343_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_4343_end_0 = const()[name = tensor("op_4343_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_4343_end_mask_0 = const()[name = tensor("op_4343_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4343_cast_fp16 = slice_by_index(begin = var_4343_begin_0, end = var_4343_end_0, end_mask = var_4343_end_mask_0, x = v_15_cast_fp16)[name = tensor("op_4343_cast_fp16")]; tensor var_4347_equation_0 = const()[name = tensor("op_4347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4347_cast_fp16 = einsum(equation = var_4347_equation_0, values = (var_4285_cast_fp16, var_4265_cast_fp16))[name = tensor("op_4347_cast_fp16")]; tensor var_4348_to_fp16 = const()[name = tensor("op_4348_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_609_cast_fp16 = mul(x = var_4347_cast_fp16, y = var_4348_to_fp16)[name = tensor("aw_chunk_609_cast_fp16")]; tensor var_4351_equation_0 = const()[name = tensor("op_4351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4351_cast_fp16 = einsum(equation = var_4351_equation_0, values = (var_4285_cast_fp16, var_4266_cast_fp16))[name = tensor("op_4351_cast_fp16")]; tensor var_4352_to_fp16 = const()[name = tensor("op_4352_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_611_cast_fp16 = mul(x = var_4351_cast_fp16, y = var_4352_to_fp16)[name = tensor("aw_chunk_611_cast_fp16")]; tensor var_4355_equation_0 = const()[name = tensor("op_4355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4355_cast_fp16 = einsum(equation = var_4355_equation_0, values = (var_4289_cast_fp16, var_4267_cast_fp16))[name = tensor("op_4355_cast_fp16")]; tensor var_4356_to_fp16 = const()[name = tensor("op_4356_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_613_cast_fp16 = mul(x = var_4355_cast_fp16, y = var_4356_to_fp16)[name = tensor("aw_chunk_613_cast_fp16")]; tensor var_4359_equation_0 = const()[name = tensor("op_4359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4359_cast_fp16 = einsum(equation = var_4359_equation_0, values = (var_4289_cast_fp16, var_4268_cast_fp16))[name = tensor("op_4359_cast_fp16")]; tensor var_4360_to_fp16 = const()[name = tensor("op_4360_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_615_cast_fp16 = mul(x = var_4359_cast_fp16, y = var_4360_to_fp16)[name = tensor("aw_chunk_615_cast_fp16")]; tensor var_4363_equation_0 = const()[name = tensor("op_4363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4363_cast_fp16 = einsum(equation = var_4363_equation_0, values = (var_4293_cast_fp16, var_4269_cast_fp16))[name = tensor("op_4363_cast_fp16")]; tensor var_4364_to_fp16 = const()[name = tensor("op_4364_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_617_cast_fp16 = mul(x = var_4363_cast_fp16, y = var_4364_to_fp16)[name = tensor("aw_chunk_617_cast_fp16")]; tensor var_4367_equation_0 = const()[name = tensor("op_4367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4367_cast_fp16 = einsum(equation = var_4367_equation_0, values = (var_4293_cast_fp16, var_4270_cast_fp16))[name = tensor("op_4367_cast_fp16")]; tensor var_4368_to_fp16 = const()[name = tensor("op_4368_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_619_cast_fp16 = mul(x = var_4367_cast_fp16, y = var_4368_to_fp16)[name = tensor("aw_chunk_619_cast_fp16")]; tensor var_4371_equation_0 = const()[name = tensor("op_4371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4371_cast_fp16 = einsum(equation = var_4371_equation_0, values = (var_4297_cast_fp16, var_4271_cast_fp16))[name = tensor("op_4371_cast_fp16")]; tensor var_4372_to_fp16 = const()[name = tensor("op_4372_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_621_cast_fp16 = mul(x = var_4371_cast_fp16, y = var_4372_to_fp16)[name = tensor("aw_chunk_621_cast_fp16")]; tensor var_4375_equation_0 = const()[name = tensor("op_4375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4375_cast_fp16 = einsum(equation = var_4375_equation_0, values = (var_4297_cast_fp16, var_4272_cast_fp16))[name = tensor("op_4375_cast_fp16")]; tensor var_4376_to_fp16 = const()[name = tensor("op_4376_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_623_cast_fp16 = mul(x = var_4375_cast_fp16, y = var_4376_to_fp16)[name = tensor("aw_chunk_623_cast_fp16")]; tensor var_4379_equation_0 = const()[name = tensor("op_4379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4379_cast_fp16 = einsum(equation = var_4379_equation_0, values = (var_4301_cast_fp16, var_4273_cast_fp16))[name = tensor("op_4379_cast_fp16")]; tensor var_4380_to_fp16 = const()[name = tensor("op_4380_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_625_cast_fp16 = mul(x = var_4379_cast_fp16, y = var_4380_to_fp16)[name = tensor("aw_chunk_625_cast_fp16")]; tensor var_4383_equation_0 = const()[name = tensor("op_4383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4383_cast_fp16 = einsum(equation = var_4383_equation_0, values = (var_4301_cast_fp16, var_4274_cast_fp16))[name = tensor("op_4383_cast_fp16")]; tensor var_4384_to_fp16 = const()[name = tensor("op_4384_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_627_cast_fp16 = mul(x = var_4383_cast_fp16, y = var_4384_to_fp16)[name = tensor("aw_chunk_627_cast_fp16")]; tensor var_4387_equation_0 = const()[name = tensor("op_4387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4387_cast_fp16 = einsum(equation = var_4387_equation_0, values = (var_4305_cast_fp16, var_4275_cast_fp16))[name = tensor("op_4387_cast_fp16")]; tensor var_4388_to_fp16 = const()[name = tensor("op_4388_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_629_cast_fp16 = mul(x = var_4387_cast_fp16, y = var_4388_to_fp16)[name = tensor("aw_chunk_629_cast_fp16")]; tensor var_4391_equation_0 = const()[name = tensor("op_4391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4391_cast_fp16 = einsum(equation = var_4391_equation_0, values = (var_4305_cast_fp16, var_4276_cast_fp16))[name = tensor("op_4391_cast_fp16")]; tensor var_4392_to_fp16 = const()[name = tensor("op_4392_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_631_cast_fp16 = mul(x = var_4391_cast_fp16, y = var_4392_to_fp16)[name = tensor("aw_chunk_631_cast_fp16")]; tensor var_4395_equation_0 = const()[name = tensor("op_4395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4395_cast_fp16 = einsum(equation = var_4395_equation_0, values = (var_4309_cast_fp16, var_4277_cast_fp16))[name = tensor("op_4395_cast_fp16")]; tensor var_4396_to_fp16 = const()[name = tensor("op_4396_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_633_cast_fp16 = mul(x = var_4395_cast_fp16, y = var_4396_to_fp16)[name = tensor("aw_chunk_633_cast_fp16")]; tensor var_4399_equation_0 = const()[name = tensor("op_4399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4399_cast_fp16 = einsum(equation = var_4399_equation_0, values = (var_4309_cast_fp16, var_4278_cast_fp16))[name = tensor("op_4399_cast_fp16")]; tensor var_4400_to_fp16 = const()[name = tensor("op_4400_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_635_cast_fp16 = mul(x = var_4399_cast_fp16, y = var_4400_to_fp16)[name = tensor("aw_chunk_635_cast_fp16")]; tensor var_4403_equation_0 = const()[name = tensor("op_4403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4403_cast_fp16 = einsum(equation = var_4403_equation_0, values = (var_4313_cast_fp16, var_4279_cast_fp16))[name = tensor("op_4403_cast_fp16")]; tensor var_4404_to_fp16 = const()[name = tensor("op_4404_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_637_cast_fp16 = mul(x = var_4403_cast_fp16, y = var_4404_to_fp16)[name = tensor("aw_chunk_637_cast_fp16")]; tensor var_4407_equation_0 = const()[name = tensor("op_4407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4407_cast_fp16 = einsum(equation = var_4407_equation_0, values = (var_4313_cast_fp16, var_4280_cast_fp16))[name = tensor("op_4407_cast_fp16")]; tensor var_4408_to_fp16 = const()[name = tensor("op_4408_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_639_cast_fp16 = mul(x = var_4407_cast_fp16, y = var_4408_to_fp16)[name = tensor("aw_chunk_639_cast_fp16")]; tensor var_4410_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_609_cast_fp16)[name = tensor("op_4410_cast_fp16")]; tensor var_4411_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_611_cast_fp16)[name = tensor("op_4411_cast_fp16")]; tensor var_4412_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_613_cast_fp16)[name = tensor("op_4412_cast_fp16")]; tensor var_4413_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_615_cast_fp16)[name = tensor("op_4413_cast_fp16")]; tensor var_4414_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_617_cast_fp16)[name = tensor("op_4414_cast_fp16")]; tensor var_4415_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_619_cast_fp16)[name = tensor("op_4415_cast_fp16")]; tensor var_4416_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_621_cast_fp16)[name = tensor("op_4416_cast_fp16")]; tensor var_4417_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_623_cast_fp16)[name = tensor("op_4417_cast_fp16")]; tensor var_4418_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_625_cast_fp16)[name = tensor("op_4418_cast_fp16")]; tensor var_4419_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_627_cast_fp16)[name = tensor("op_4419_cast_fp16")]; tensor var_4420_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_629_cast_fp16)[name = tensor("op_4420_cast_fp16")]; tensor var_4421_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_631_cast_fp16)[name = tensor("op_4421_cast_fp16")]; tensor var_4422_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_633_cast_fp16)[name = tensor("op_4422_cast_fp16")]; tensor var_4423_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_635_cast_fp16)[name = tensor("op_4423_cast_fp16")]; tensor var_4424_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_637_cast_fp16)[name = tensor("op_4424_cast_fp16")]; tensor var_4425_cast_fp16 = softmax(axis = var_3111, x = aw_chunk_639_cast_fp16)[name = tensor("op_4425_cast_fp16")]; tensor var_4427_equation_0 = const()[name = tensor("op_4427_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4427_cast_fp16 = einsum(equation = var_4427_equation_0, values = (var_4315_cast_fp16, var_4410_cast_fp16))[name = tensor("op_4427_cast_fp16")]; tensor var_4429_equation_0 = const()[name = tensor("op_4429_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4429_cast_fp16 = einsum(equation = var_4429_equation_0, values = (var_4315_cast_fp16, var_4411_cast_fp16))[name = tensor("op_4429_cast_fp16")]; tensor var_4431_equation_0 = const()[name = tensor("op_4431_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4431_cast_fp16 = einsum(equation = var_4431_equation_0, values = (var_4319_cast_fp16, var_4412_cast_fp16))[name = tensor("op_4431_cast_fp16")]; tensor var_4433_equation_0 = const()[name = tensor("op_4433_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4433_cast_fp16 = einsum(equation = var_4433_equation_0, values = (var_4319_cast_fp16, var_4413_cast_fp16))[name = tensor("op_4433_cast_fp16")]; tensor var_4435_equation_0 = const()[name = tensor("op_4435_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4435_cast_fp16 = einsum(equation = var_4435_equation_0, values = (var_4323_cast_fp16, var_4414_cast_fp16))[name = tensor("op_4435_cast_fp16")]; tensor var_4437_equation_0 = const()[name = tensor("op_4437_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4437_cast_fp16 = einsum(equation = var_4437_equation_0, values = (var_4323_cast_fp16, var_4415_cast_fp16))[name = tensor("op_4437_cast_fp16")]; tensor var_4439_equation_0 = const()[name = tensor("op_4439_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4439_cast_fp16 = einsum(equation = var_4439_equation_0, values = (var_4327_cast_fp16, var_4416_cast_fp16))[name = tensor("op_4439_cast_fp16")]; tensor var_4441_equation_0 = const()[name = tensor("op_4441_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4441_cast_fp16 = einsum(equation = var_4441_equation_0, values = (var_4327_cast_fp16, var_4417_cast_fp16))[name = tensor("op_4441_cast_fp16")]; tensor var_4443_equation_0 = const()[name = tensor("op_4443_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4443_cast_fp16 = einsum(equation = var_4443_equation_0, values = (var_4331_cast_fp16, var_4418_cast_fp16))[name = tensor("op_4443_cast_fp16")]; tensor var_4445_equation_0 = const()[name = tensor("op_4445_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4445_cast_fp16 = einsum(equation = var_4445_equation_0, values = (var_4331_cast_fp16, var_4419_cast_fp16))[name = tensor("op_4445_cast_fp16")]; tensor var_4447_equation_0 = const()[name = tensor("op_4447_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4447_cast_fp16 = einsum(equation = var_4447_equation_0, values = (var_4335_cast_fp16, var_4420_cast_fp16))[name = tensor("op_4447_cast_fp16")]; tensor var_4449_equation_0 = const()[name = tensor("op_4449_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4449_cast_fp16 = einsum(equation = var_4449_equation_0, values = (var_4335_cast_fp16, var_4421_cast_fp16))[name = tensor("op_4449_cast_fp16")]; tensor var_4451_equation_0 = const()[name = tensor("op_4451_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4451_cast_fp16 = einsum(equation = var_4451_equation_0, values = (var_4339_cast_fp16, var_4422_cast_fp16))[name = tensor("op_4451_cast_fp16")]; tensor var_4453_equation_0 = const()[name = tensor("op_4453_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4453_cast_fp16 = einsum(equation = var_4453_equation_0, values = (var_4339_cast_fp16, var_4423_cast_fp16))[name = tensor("op_4453_cast_fp16")]; tensor var_4455_equation_0 = const()[name = tensor("op_4455_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4455_cast_fp16 = einsum(equation = var_4455_equation_0, values = (var_4343_cast_fp16, var_4424_cast_fp16))[name = tensor("op_4455_cast_fp16")]; tensor var_4457_equation_0 = const()[name = tensor("op_4457_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4457_cast_fp16 = einsum(equation = var_4457_equation_0, values = (var_4343_cast_fp16, var_4425_cast_fp16))[name = tensor("op_4457_cast_fp16")]; tensor var_4459_interleave_0 = const()[name = tensor("op_4459_interleave_0"), val = tensor(false)]; tensor var_4459_cast_fp16 = concat(axis = var_3089, interleave = var_4459_interleave_0, values = (var_4427_cast_fp16, var_4429_cast_fp16))[name = tensor("op_4459_cast_fp16")]; tensor var_4461_interleave_0 = const()[name = tensor("op_4461_interleave_0"), val = tensor(false)]; tensor var_4461_cast_fp16 = concat(axis = var_3089, interleave = var_4461_interleave_0, values = (var_4431_cast_fp16, var_4433_cast_fp16))[name = tensor("op_4461_cast_fp16")]; tensor var_4463_interleave_0 = const()[name = tensor("op_4463_interleave_0"), val = tensor(false)]; tensor var_4463_cast_fp16 = concat(axis = var_3089, interleave = var_4463_interleave_0, values = (var_4435_cast_fp16, var_4437_cast_fp16))[name = tensor("op_4463_cast_fp16")]; tensor var_4465_interleave_0 = const()[name = tensor("op_4465_interleave_0"), val = tensor(false)]; tensor var_4465_cast_fp16 = concat(axis = var_3089, interleave = var_4465_interleave_0, values = (var_4439_cast_fp16, var_4441_cast_fp16))[name = tensor("op_4465_cast_fp16")]; tensor var_4467_interleave_0 = const()[name = tensor("op_4467_interleave_0"), val = tensor(false)]; tensor var_4467_cast_fp16 = concat(axis = var_3089, interleave = var_4467_interleave_0, values = (var_4443_cast_fp16, var_4445_cast_fp16))[name = tensor("op_4467_cast_fp16")]; tensor var_4469_interleave_0 = const()[name = tensor("op_4469_interleave_0"), val = tensor(false)]; tensor var_4469_cast_fp16 = concat(axis = var_3089, interleave = var_4469_interleave_0, values = (var_4447_cast_fp16, var_4449_cast_fp16))[name = tensor("op_4469_cast_fp16")]; tensor var_4471_interleave_0 = const()[name = tensor("op_4471_interleave_0"), val = tensor(false)]; tensor var_4471_cast_fp16 = concat(axis = var_3089, interleave = var_4471_interleave_0, values = (var_4451_cast_fp16, var_4453_cast_fp16))[name = tensor("op_4471_cast_fp16")]; tensor var_4473_interleave_0 = const()[name = tensor("op_4473_interleave_0"), val = tensor(false)]; tensor var_4473_cast_fp16 = concat(axis = var_3089, interleave = var_4473_interleave_0, values = (var_4455_cast_fp16, var_4457_cast_fp16))[name = tensor("op_4473_cast_fp16")]; tensor input_107_interleave_0 = const()[name = tensor("input_107_interleave_0"), val = tensor(false)]; tensor input_107_cast_fp16 = concat(axis = var_3111, interleave = input_107_interleave_0, values = (var_4459_cast_fp16, var_4461_cast_fp16, var_4463_cast_fp16, var_4465_cast_fp16, var_4467_cast_fp16, var_4469_cast_fp16, var_4471_cast_fp16, var_4473_cast_fp16))[name = tensor("input_107_cast_fp16")]; tensor var_4479 = const()[name = tensor("op_4479"), val = tensor([1, 1])]; tensor var_4481 = const()[name = tensor("op_4481"), val = tensor([1, 1])]; tensor var_4483_pad_type_0 = const()[name = tensor("op_4483_pad_type_0"), val = tensor("custom")]; tensor var_4483_pad_0 = const()[name = tensor("op_4483_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79965120)))]; tensor down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80784384)))]; tensor var_4483_cast_fp16 = conv(bias = down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_4481, groups = var_3111, pad = var_4483_pad_0, pad_type = var_4483_pad_type_0, strides = var_4479, weight = down_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_107_cast_fp16)[name = tensor("op_4483_cast_fp16")]; tensor inputs_23_cast_fp16 = add(x = var_4483_cast_fp16, y = inputs_21_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; tensor input_109_axes_0 = const()[name = tensor("input_109_axes_0"), val = tensor([1])]; tensor input_109_gamma_0_to_fp16 = const()[name = tensor("input_109_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80785728)))]; tensor input_109_beta_0_to_fp16 = const()[name = tensor("input_109_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80787072)))]; tensor var_4493_to_fp16 = const()[name = tensor("op_4493_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_109_cast_fp16 = layer_norm(axes = input_109_axes_0, beta = input_109_beta_0_to_fp16, epsilon = var_4493_to_fp16, gamma = input_109_gamma_0_to_fp16, x = inputs_23_cast_fp16)[name = tensor("input_109_cast_fp16")]; tensor var_4509 = const()[name = tensor("op_4509"), val = tensor([1, 1])]; tensor var_4511 = const()[name = tensor("op_4511"), val = tensor([1, 1])]; tensor var_4513_pad_type_0 = const()[name = tensor("op_4513_pad_type_0"), val = tensor("custom")]; tensor var_4513_pad_0 = const()[name = tensor("op_4513_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80788416)))]; tensor down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87342080)))]; tensor var_4513_cast_fp16 = conv(bias = down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_4511, groups = var_3111, pad = var_4513_pad_0, pad_type = var_4513_pad_type_0, strides = var_4509, weight = down_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_109_cast_fp16)[name = tensor("op_4513_cast_fp16")]; tensor var_4514_split_sizes_0 = const()[name = tensor("op_4514_split_sizes_0"), val = tensor([2560, 2560])]; tensor var_4514_axis_0 = const()[name = tensor("op_4514_axis_0"), val = tensor(1)]; tensor var_4514_cast_fp16_0, tensor var_4514_cast_fp16_1 = split(axis = var_4514_axis_0, split_sizes = var_4514_split_sizes_0, x = var_4513_cast_fp16)[name = tensor("op_4514_cast_fp16")]; tensor var_4516_mode_0 = const()[name = tensor("op_4516_mode_0"), val = tensor("EXACT")]; tensor var_4516_cast_fp16 = gelu(mode = var_4516_mode_0, x = var_4514_cast_fp16_1)[name = tensor("op_4516_cast_fp16")]; tensor input_111_cast_fp16 = mul(x = var_4514_cast_fp16_0, y = var_4516_cast_fp16)[name = tensor("input_111_cast_fp16")]; tensor var_4520 = const()[name = tensor("op_4520"), val = tensor([1, 1])]; tensor var_4522 = const()[name = tensor("op_4522"), val = tensor([1, 1])]; tensor var_4524_pad_type_0 = const()[name = tensor("op_4524_pad_type_0"), val = tensor("custom")]; tensor var_4524_pad_0 = const()[name = tensor("op_4524_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87352384)))]; tensor down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90629248)))]; tensor var_4524_cast_fp16 = conv(bias = down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_4522, groups = var_3111, pad = var_4524_pad_0, pad_type = var_4524_pad_type_0, strides = var_4520, weight = down_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor("op_4524_cast_fp16")]; tensor hidden_states_69_cast_fp16 = add(x = var_4524_cast_fp16, y = inputs_23_cast_fp16)[name = tensor("hidden_states_69_cast_fp16")]; tensor var_4526 = const()[name = tensor("op_4526"), val = tensor([2, 640, 32, 32])]; tensor input_113_cast_fp16 = reshape(shape = var_4526, x = hidden_states_69_cast_fp16)[name = tensor("input_113_cast_fp16")]; tensor var_4530 = const()[name = tensor("op_4530"), val = tensor([1, 1])]; tensor var_4532 = const()[name = tensor("op_4532"), val = tensor([1, 1])]; tensor hidden_states_71_pad_type_0 = const()[name = tensor("hidden_states_71_pad_type_0"), val = tensor("custom")]; tensor hidden_states_71_pad_0 = const()[name = tensor("hidden_states_71_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_1_attentions_1_proj_out_weight_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90630592)))]; tensor down_blocks_1_attentions_1_proj_out_bias_to_fp16 = const()[name = tensor("down_blocks_1_attentions_1_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91449856)))]; tensor hidden_states_71_cast_fp16 = conv(bias = down_blocks_1_attentions_1_proj_out_bias_to_fp16, dilations = var_4532, groups = var_3111, pad = hidden_states_71_pad_0, pad_type = hidden_states_71_pad_type_0, strides = var_4530, weight = down_blocks_1_attentions_1_proj_out_weight_to_fp16, x = input_113_cast_fp16)[name = tensor("hidden_states_71_cast_fp16")]; tensor input_115_cast_fp16 = add(x = hidden_states_71_cast_fp16, y = hidden_states_59_cast_fp16)[name = tensor("input_115_cast_fp16")]; tensor var_4539 = const()[name = tensor("op_4539"), val = tensor([2, 2])]; tensor var_4541 = const()[name = tensor("op_4541"), val = tensor([1, 1])]; tensor input_117_pad_type_0 = const()[name = tensor("input_117_pad_type_0"), val = tensor("custom")]; tensor input_117_pad_0 = const()[name = tensor("input_117_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_1_downsamplers_0_conv_weight_to_fp16 = const()[name = tensor("down_blocks_1_downsamplers_0_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91451200)))]; tensor down_blocks_1_downsamplers_0_conv_bias_to_fp16 = const()[name = tensor("down_blocks_1_downsamplers_0_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98824064)))]; tensor input_117_cast_fp16 = conv(bias = down_blocks_1_downsamplers_0_conv_bias_to_fp16, dilations = var_4541, groups = var_3111, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = var_4539, weight = down_blocks_1_downsamplers_0_conv_weight_to_fp16, x = input_115_cast_fp16)[name = tensor("input_117_cast_fp16")]; tensor var_4569 = const()[name = tensor("op_4569"), val = tensor(1)]; tensor reshape_48_shape_0 = const()[name = tensor("reshape_48_shape_0"), val = tensor([2, 32, 20, 16, 16])]; tensor reshape_48_cast_fp16 = reshape(shape = reshape_48_shape_0, x = input_117_cast_fp16)[name = tensor("reshape_48_cast_fp16")]; tensor reduce_mean_36_axes_0 = const()[name = tensor("reduce_mean_36_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_36_keep_dims_0 = const()[name = tensor("reduce_mean_36_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_36_cast_fp16 = reduce_mean(axes = reduce_mean_36_axes_0, keep_dims = reduce_mean_36_keep_dims_0, x = reshape_48_cast_fp16)[name = tensor("reduce_mean_36_cast_fp16")]; tensor sub_24_cast_fp16 = sub(x = reshape_48_cast_fp16, y = reduce_mean_36_cast_fp16)[name = tensor("sub_24_cast_fp16")]; tensor square_12_cast_fp16 = square(x = sub_24_cast_fp16)[name = tensor("square_12_cast_fp16")]; tensor reduce_mean_38_axes_0 = const()[name = tensor("reduce_mean_38_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_38_keep_dims_0 = const()[name = tensor("reduce_mean_38_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_38_cast_fp16 = reduce_mean(axes = reduce_mean_38_axes_0, keep_dims = reduce_mean_38_keep_dims_0, x = square_12_cast_fp16)[name = tensor("reduce_mean_38_cast_fp16")]; tensor add_24_y_0_to_fp16 = const()[name = tensor("add_24_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_24_cast_fp16 = add(x = reduce_mean_38_cast_fp16, y = add_24_y_0_to_fp16)[name = tensor("add_24_cast_fp16")]; tensor sqrt_12_cast_fp16 = sqrt(x = add_24_cast_fp16)[name = tensor("sqrt_12_cast_fp16")]; tensor real_div_12_cast_fp16 = real_div(x = sub_24_cast_fp16, y = sqrt_12_cast_fp16)[name = tensor("real_div_12_cast_fp16")]; tensor reshape_49_shape_0 = const()[name = tensor("reshape_49_shape_0"), val = tensor([2, 640, 16, 16])]; tensor reshape_49_cast_fp16 = reshape(shape = reshape_49_shape_0, x = real_div_12_cast_fp16)[name = tensor("reshape_49_cast_fp16")]; tensor add_25_gamma_0_to_fp16 = const()[name = tensor("add_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98825408)))]; tensor add_25_beta_0_to_fp16 = const()[name = tensor("add_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98826752)))]; tensor add_25_epsilon_0_to_fp16 = const()[name = tensor("add_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_25_cast_fp16 = batch_norm(beta = add_25_beta_0_to_fp16, epsilon = add_25_epsilon_0_to_fp16, gamma = add_25_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_49_cast_fp16)[name = tensor("add_25_cast_fp16")]; tensor input_121_cast_fp16 = silu(x = add_25_cast_fp16)[name = tensor("input_121_cast_fp16")]; tensor var_4592 = const()[name = tensor("op_4592"), val = tensor([1, 1])]; tensor var_4594 = const()[name = tensor("op_4594"), val = tensor([1, 1])]; tensor hidden_states_73_pad_type_0 = const()[name = tensor("hidden_states_73_pad_type_0"), val = tensor("custom")]; tensor hidden_states_73_pad_0 = const()[name = tensor("hidden_states_73_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_2_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("down_blocks_2_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98828096)))]; tensor down_blocks_2_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("down_blocks_2_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113573760)))]; tensor hidden_states_73_cast_fp16 = conv(bias = down_blocks_2_resnets_0_conv1_bias_to_fp16, dilations = var_4594, groups = var_4569, pad = hidden_states_73_pad_0, pad_type = hidden_states_73_pad_type_0, strides = var_4592, weight = down_blocks_2_resnets_0_conv1_weight_to_fp16, x = input_121_cast_fp16)[name = tensor("hidden_states_73_cast_fp16")]; tensor var_4600 = const()[name = tensor("op_4600"), val = tensor([1, 1])]; tensor var_4602 = const()[name = tensor("op_4602"), val = tensor([1, 1])]; tensor temb_9_pad_type_0 = const()[name = tensor("temb_9_pad_type_0"), val = tensor("custom")]; tensor temb_9_pad_0 = const()[name = tensor("temb_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("down_blocks_2_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113576384)))]; tensor down_blocks_2_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("down_blocks_2_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116853248)))]; tensor temb_9_cast_fp16 = conv(bias = down_blocks_2_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_4602, groups = var_4569, pad = temb_9_pad_0, pad_type = temb_9_pad_type_0, strides = var_4600, weight = down_blocks_2_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_9_cast_fp16")]; tensor input_125_cast_fp16 = add(x = hidden_states_73_cast_fp16, y = temb_9_cast_fp16)[name = tensor("input_125_cast_fp16")]; tensor reshape_52_shape_0 = const()[name = tensor("reshape_52_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_52_cast_fp16 = reshape(shape = reshape_52_shape_0, x = input_125_cast_fp16)[name = tensor("reshape_52_cast_fp16")]; tensor reduce_mean_39_axes_0 = const()[name = tensor("reduce_mean_39_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_39_keep_dims_0 = const()[name = tensor("reduce_mean_39_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_39_cast_fp16 = reduce_mean(axes = reduce_mean_39_axes_0, keep_dims = reduce_mean_39_keep_dims_0, x = reshape_52_cast_fp16)[name = tensor("reduce_mean_39_cast_fp16")]; tensor sub_26_cast_fp16 = sub(x = reshape_52_cast_fp16, y = reduce_mean_39_cast_fp16)[name = tensor("sub_26_cast_fp16")]; tensor square_13_cast_fp16 = square(x = sub_26_cast_fp16)[name = tensor("square_13_cast_fp16")]; tensor reduce_mean_41_axes_0 = const()[name = tensor("reduce_mean_41_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_41_keep_dims_0 = const()[name = tensor("reduce_mean_41_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_41_cast_fp16 = reduce_mean(axes = reduce_mean_41_axes_0, keep_dims = reduce_mean_41_keep_dims_0, x = square_13_cast_fp16)[name = tensor("reduce_mean_41_cast_fp16")]; tensor add_26_y_0_to_fp16 = const()[name = tensor("add_26_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_26_cast_fp16 = add(x = reduce_mean_41_cast_fp16, y = add_26_y_0_to_fp16)[name = tensor("add_26_cast_fp16")]; tensor sqrt_13_cast_fp16 = sqrt(x = add_26_cast_fp16)[name = tensor("sqrt_13_cast_fp16")]; tensor real_div_13_cast_fp16 = real_div(x = sub_26_cast_fp16, y = sqrt_13_cast_fp16)[name = tensor("real_div_13_cast_fp16")]; tensor reshape_53_shape_0 = const()[name = tensor("reshape_53_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_53_cast_fp16 = reshape(shape = reshape_53_shape_0, x = real_div_13_cast_fp16)[name = tensor("reshape_53_cast_fp16")]; tensor add_27_mean_0_to_fp16 = const()[name = tensor("add_27_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116855872)))]; tensor add_27_variance_0_to_fp16 = const()[name = tensor("add_27_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116858496)))]; tensor add_27_gamma_0_to_fp16 = const()[name = tensor("add_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116861120)))]; tensor add_27_beta_0_to_fp16 = const()[name = tensor("add_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116863744)))]; tensor add_27_epsilon_0_to_fp16 = const()[name = tensor("add_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_27_cast_fp16 = batch_norm(beta = add_27_beta_0_to_fp16, epsilon = add_27_epsilon_0_to_fp16, gamma = add_27_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_53_cast_fp16)[name = tensor("add_27_cast_fp16")]; tensor input_129_cast_fp16 = silu(x = add_27_cast_fp16)[name = tensor("input_129_cast_fp16")]; tensor var_4612 = const()[name = tensor("op_4612"), val = tensor([1, 1])]; tensor var_4614 = const()[name = tensor("op_4614"), val = tensor([1, 1])]; tensor hidden_states_75_pad_type_0 = const()[name = tensor("hidden_states_75_pad_type_0"), val = tensor("custom")]; tensor hidden_states_75_pad_0 = const()[name = tensor("hidden_states_75_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_2_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("down_blocks_2_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116866368)))]; tensor down_blocks_2_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("down_blocks_2_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146357632)))]; tensor hidden_states_75_cast_fp16 = conv(bias = down_blocks_2_resnets_0_conv2_bias_to_fp16, dilations = var_4614, groups = var_4569, pad = hidden_states_75_pad_0, pad_type = hidden_states_75_pad_type_0, strides = var_4612, weight = down_blocks_2_resnets_0_conv2_weight_to_fp16, x = input_129_cast_fp16)[name = tensor("hidden_states_75_cast_fp16")]; tensor var_4619 = const()[name = tensor("op_4619"), val = tensor([1, 1])]; tensor var_4621 = const()[name = tensor("op_4621"), val = tensor([1, 1])]; tensor x_3_pad_type_0 = const()[name = tensor("x_3_pad_type_0"), val = tensor("custom")]; tensor x_3_pad_0 = const()[name = tensor("x_3_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_resnets_0_conv_shortcut_weight_to_fp16 = const()[name = tensor("down_blocks_2_resnets_0_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146360256)))]; tensor down_blocks_2_resnets_0_conv_shortcut_bias_to_fp16 = const()[name = tensor("down_blocks_2_resnets_0_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147998720)))]; tensor x_3_cast_fp16 = conv(bias = down_blocks_2_resnets_0_conv_shortcut_bias_to_fp16, dilations = var_4621, groups = var_4569, pad = x_3_pad_0, pad_type = x_3_pad_type_0, strides = var_4619, weight = down_blocks_2_resnets_0_conv_shortcut_weight_to_fp16, x = input_117_cast_fp16)[name = tensor("x_3_cast_fp16")]; tensor hidden_states_77_cast_fp16 = add(x = x_3_cast_fp16, y = hidden_states_75_cast_fp16)[name = tensor("hidden_states_77_cast_fp16")]; tensor reshape_56_shape_0 = const()[name = tensor("reshape_56_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_56_cast_fp16 = reshape(shape = reshape_56_shape_0, x = hidden_states_77_cast_fp16)[name = tensor("reshape_56_cast_fp16")]; tensor reduce_mean_42_axes_0 = const()[name = tensor("reduce_mean_42_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_42_keep_dims_0 = const()[name = tensor("reduce_mean_42_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_42_cast_fp16 = reduce_mean(axes = reduce_mean_42_axes_0, keep_dims = reduce_mean_42_keep_dims_0, x = reshape_56_cast_fp16)[name = tensor("reduce_mean_42_cast_fp16")]; tensor sub_28_cast_fp16 = sub(x = reshape_56_cast_fp16, y = reduce_mean_42_cast_fp16)[name = tensor("sub_28_cast_fp16")]; tensor square_14_cast_fp16 = square(x = sub_28_cast_fp16)[name = tensor("square_14_cast_fp16")]; tensor reduce_mean_44_axes_0 = const()[name = tensor("reduce_mean_44_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_44_keep_dims_0 = const()[name = tensor("reduce_mean_44_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_44_cast_fp16 = reduce_mean(axes = reduce_mean_44_axes_0, keep_dims = reduce_mean_44_keep_dims_0, x = square_14_cast_fp16)[name = tensor("reduce_mean_44_cast_fp16")]; tensor add_28_y_0_to_fp16 = const()[name = tensor("add_28_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_28_cast_fp16 = add(x = reduce_mean_44_cast_fp16, y = add_28_y_0_to_fp16)[name = tensor("add_28_cast_fp16")]; tensor sqrt_14_cast_fp16 = sqrt(x = add_28_cast_fp16)[name = tensor("sqrt_14_cast_fp16")]; tensor real_div_14_cast_fp16 = real_div(x = sub_28_cast_fp16, y = sqrt_14_cast_fp16)[name = tensor("real_div_14_cast_fp16")]; tensor reshape_57_shape_0 = const()[name = tensor("reshape_57_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_57_cast_fp16 = reshape(shape = reshape_57_shape_0, x = real_div_14_cast_fp16)[name = tensor("reshape_57_cast_fp16")]; tensor add_29_gamma_0_to_fp16 = const()[name = tensor("add_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148001344)))]; tensor add_29_beta_0_to_fp16 = const()[name = tensor("add_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148003968)))]; tensor add_29_epsilon_0_to_fp16 = const()[name = tensor("add_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_29_cast_fp16 = batch_norm(beta = add_29_beta_0_to_fp16, epsilon = add_29_epsilon_0_to_fp16, gamma = add_29_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_57_cast_fp16)[name = tensor("add_29_cast_fp16")]; tensor var_4641 = const()[name = tensor("op_4641"), val = tensor([1, 1])]; tensor var_4643 = const()[name = tensor("op_4643"), val = tensor([1, 1])]; tensor hidden_states_79_pad_type_0 = const()[name = tensor("hidden_states_79_pad_type_0"), val = tensor("custom")]; tensor hidden_states_79_pad_0 = const()[name = tensor("hidden_states_79_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_proj_in_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148006592)))]; tensor down_blocks_2_attentions_0_proj_in_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151283456)))]; tensor hidden_states_79_cast_fp16 = conv(bias = down_blocks_2_attentions_0_proj_in_bias_to_fp16, dilations = var_4643, groups = var_4569, pad = hidden_states_79_pad_0, pad_type = hidden_states_79_pad_type_0, strides = var_4641, weight = down_blocks_2_attentions_0_proj_in_weight_to_fp16, x = add_29_cast_fp16)[name = tensor("hidden_states_79_cast_fp16")]; tensor var_4648 = const()[name = tensor("op_4648"), val = tensor([2, 1280, 1, 256])]; tensor inputs_25_cast_fp16 = reshape(shape = var_4648, x = hidden_states_79_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; tensor hidden_states_81_axes_0 = const()[name = tensor("hidden_states_81_axes_0"), val = tensor([1])]; tensor hidden_states_81_gamma_0_to_fp16 = const()[name = tensor("hidden_states_81_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151286080)))]; tensor hidden_states_81_beta_0_to_fp16 = const()[name = tensor("hidden_states_81_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151288704)))]; tensor var_4664_to_fp16 = const()[name = tensor("op_4664_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_81_cast_fp16 = layer_norm(axes = hidden_states_81_axes_0, beta = hidden_states_81_beta_0_to_fp16, epsilon = var_4664_to_fp16, gamma = hidden_states_81_gamma_0_to_fp16, x = inputs_25_cast_fp16)[name = tensor("hidden_states_81_cast_fp16")]; tensor var_4679 = const()[name = tensor("op_4679"), val = tensor([1, 1])]; tensor var_4681 = const()[name = tensor("op_4681"), val = tensor([1, 1])]; tensor q_17_pad_type_0 = const()[name = tensor("q_17_pad_type_0"), val = tensor("custom")]; tensor q_17_pad_0 = const()[name = tensor("q_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151291328)))]; tensor q_17_cast_fp16 = conv(dilations = var_4681, groups = var_4569, pad = q_17_pad_0, pad_type = q_17_pad_type_0, strides = var_4679, weight = down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_81_cast_fp16)[name = tensor("q_17_cast_fp16")]; tensor var_4685 = const()[name = tensor("op_4685"), val = tensor([1, 1])]; tensor var_4687 = const()[name = tensor("op_4687"), val = tensor([1, 1])]; tensor k_33_pad_type_0 = const()[name = tensor("k_33_pad_type_0"), val = tensor("custom")]; tensor k_33_pad_0 = const()[name = tensor("k_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(154568192)))]; tensor k_33_cast_fp16 = conv(dilations = var_4687, groups = var_4569, pad = k_33_pad_0, pad_type = k_33_pad_type_0, strides = var_4685, weight = down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_81_cast_fp16)[name = tensor("k_33_cast_fp16")]; tensor var_4691 = const()[name = tensor("op_4691"), val = tensor([1, 1])]; tensor var_4693 = const()[name = tensor("op_4693"), val = tensor([1, 1])]; tensor v_17_pad_type_0 = const()[name = tensor("v_17_pad_type_0"), val = tensor("custom")]; tensor v_17_pad_0 = const()[name = tensor("v_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157845056)))]; tensor v_17_cast_fp16 = conv(dilations = var_4693, groups = var_4569, pad = v_17_pad_0, pad_type = v_17_pad_type_0, strides = var_4691, weight = down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_81_cast_fp16)[name = tensor("v_17_cast_fp16")]; tensor var_4697_begin_0 = const()[name = tensor("op_4697_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4697_end_0 = const()[name = tensor("op_4697_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_4697_end_mask_0 = const()[name = tensor("op_4697_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4697_cast_fp16 = slice_by_index(begin = var_4697_begin_0, end = var_4697_end_0, end_mask = var_4697_end_mask_0, x = q_17_cast_fp16)[name = tensor("op_4697_cast_fp16")]; tensor var_4701_begin_0 = const()[name = tensor("op_4701_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_4701_end_0 = const()[name = tensor("op_4701_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_4701_end_mask_0 = const()[name = tensor("op_4701_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4701_cast_fp16 = slice_by_index(begin = var_4701_begin_0, end = var_4701_end_0, end_mask = var_4701_end_mask_0, x = q_17_cast_fp16)[name = tensor("op_4701_cast_fp16")]; tensor var_4705_begin_0 = const()[name = tensor("op_4705_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4705_end_0 = const()[name = tensor("op_4705_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_4705_end_mask_0 = const()[name = tensor("op_4705_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4705_cast_fp16 = slice_by_index(begin = var_4705_begin_0, end = var_4705_end_0, end_mask = var_4705_end_mask_0, x = q_17_cast_fp16)[name = tensor("op_4705_cast_fp16")]; tensor var_4709_begin_0 = const()[name = tensor("op_4709_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_4709_end_0 = const()[name = tensor("op_4709_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_4709_end_mask_0 = const()[name = tensor("op_4709_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4709_cast_fp16 = slice_by_index(begin = var_4709_begin_0, end = var_4709_end_0, end_mask = var_4709_end_mask_0, x = q_17_cast_fp16)[name = tensor("op_4709_cast_fp16")]; tensor var_4713_begin_0 = const()[name = tensor("op_4713_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_4713_end_0 = const()[name = tensor("op_4713_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_4713_end_mask_0 = const()[name = tensor("op_4713_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4713_cast_fp16 = slice_by_index(begin = var_4713_begin_0, end = var_4713_end_0, end_mask = var_4713_end_mask_0, x = q_17_cast_fp16)[name = tensor("op_4713_cast_fp16")]; tensor var_4717_begin_0 = const()[name = tensor("op_4717_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_4717_end_0 = const()[name = tensor("op_4717_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_4717_end_mask_0 = const()[name = tensor("op_4717_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4717_cast_fp16 = slice_by_index(begin = var_4717_begin_0, end = var_4717_end_0, end_mask = var_4717_end_mask_0, x = q_17_cast_fp16)[name = tensor("op_4717_cast_fp16")]; tensor var_4721_begin_0 = const()[name = tensor("op_4721_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_4721_end_0 = const()[name = tensor("op_4721_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_4721_end_mask_0 = const()[name = tensor("op_4721_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4721_cast_fp16 = slice_by_index(begin = var_4721_begin_0, end = var_4721_end_0, end_mask = var_4721_end_mask_0, x = q_17_cast_fp16)[name = tensor("op_4721_cast_fp16")]; tensor var_4725_begin_0 = const()[name = tensor("op_4725_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_4725_end_0 = const()[name = tensor("op_4725_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_4725_end_mask_0 = const()[name = tensor("op_4725_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4725_cast_fp16 = slice_by_index(begin = var_4725_begin_0, end = var_4725_end_0, end_mask = var_4725_end_mask_0, x = q_17_cast_fp16)[name = tensor("op_4725_cast_fp16")]; tensor k_35_perm_0 = const()[name = tensor("k_35_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_4732_begin_0 = const()[name = tensor("op_4732_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4732_end_0 = const()[name = tensor("op_4732_end_0"), val = tensor([2, 256, 1, 160])]; tensor var_4732_end_mask_0 = const()[name = tensor("op_4732_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_23 = transpose(perm = k_35_perm_0, x = k_33_cast_fp16)[name = tensor("transpose_23")]; tensor var_4732_cast_fp16 = slice_by_index(begin = var_4732_begin_0, end = var_4732_end_0, end_mask = var_4732_end_mask_0, x = transpose_23)[name = tensor("op_4732_cast_fp16")]; tensor var_4736_begin_0 = const()[name = tensor("op_4736_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_4736_end_0 = const()[name = tensor("op_4736_end_0"), val = tensor([2, 256, 1, 320])]; tensor var_4736_end_mask_0 = const()[name = tensor("op_4736_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4736_cast_fp16 = slice_by_index(begin = var_4736_begin_0, end = var_4736_end_0, end_mask = var_4736_end_mask_0, x = transpose_23)[name = tensor("op_4736_cast_fp16")]; tensor var_4740_begin_0 = const()[name = tensor("op_4740_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_4740_end_0 = const()[name = tensor("op_4740_end_0"), val = tensor([2, 256, 1, 480])]; tensor var_4740_end_mask_0 = const()[name = tensor("op_4740_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4740_cast_fp16 = slice_by_index(begin = var_4740_begin_0, end = var_4740_end_0, end_mask = var_4740_end_mask_0, x = transpose_23)[name = tensor("op_4740_cast_fp16")]; tensor var_4744_begin_0 = const()[name = tensor("op_4744_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_4744_end_0 = const()[name = tensor("op_4744_end_0"), val = tensor([2, 256, 1, 640])]; tensor var_4744_end_mask_0 = const()[name = tensor("op_4744_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4744_cast_fp16 = slice_by_index(begin = var_4744_begin_0, end = var_4744_end_0, end_mask = var_4744_end_mask_0, x = transpose_23)[name = tensor("op_4744_cast_fp16")]; tensor var_4748_begin_0 = const()[name = tensor("op_4748_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_4748_end_0 = const()[name = tensor("op_4748_end_0"), val = tensor([2, 256, 1, 800])]; tensor var_4748_end_mask_0 = const()[name = tensor("op_4748_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4748_cast_fp16 = slice_by_index(begin = var_4748_begin_0, end = var_4748_end_0, end_mask = var_4748_end_mask_0, x = transpose_23)[name = tensor("op_4748_cast_fp16")]; tensor var_4752_begin_0 = const()[name = tensor("op_4752_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_4752_end_0 = const()[name = tensor("op_4752_end_0"), val = tensor([2, 256, 1, 960])]; tensor var_4752_end_mask_0 = const()[name = tensor("op_4752_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4752_cast_fp16 = slice_by_index(begin = var_4752_begin_0, end = var_4752_end_0, end_mask = var_4752_end_mask_0, x = transpose_23)[name = tensor("op_4752_cast_fp16")]; tensor var_4756_begin_0 = const()[name = tensor("op_4756_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_4756_end_0 = const()[name = tensor("op_4756_end_0"), val = tensor([2, 256, 1, 1120])]; tensor var_4756_end_mask_0 = const()[name = tensor("op_4756_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4756_cast_fp16 = slice_by_index(begin = var_4756_begin_0, end = var_4756_end_0, end_mask = var_4756_end_mask_0, x = transpose_23)[name = tensor("op_4756_cast_fp16")]; tensor var_4760_begin_0 = const()[name = tensor("op_4760_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_4760_end_0 = const()[name = tensor("op_4760_end_0"), val = tensor([2, 256, 1, 1280])]; tensor var_4760_end_mask_0 = const()[name = tensor("op_4760_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4760_cast_fp16 = slice_by_index(begin = var_4760_begin_0, end = var_4760_end_0, end_mask = var_4760_end_mask_0, x = transpose_23)[name = tensor("op_4760_cast_fp16")]; tensor var_4762_begin_0 = const()[name = tensor("op_4762_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4762_end_0 = const()[name = tensor("op_4762_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_4762_end_mask_0 = const()[name = tensor("op_4762_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4762_cast_fp16 = slice_by_index(begin = var_4762_begin_0, end = var_4762_end_0, end_mask = var_4762_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_4762_cast_fp16")]; tensor var_4766_begin_0 = const()[name = tensor("op_4766_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_4766_end_0 = const()[name = tensor("op_4766_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_4766_end_mask_0 = const()[name = tensor("op_4766_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4766_cast_fp16 = slice_by_index(begin = var_4766_begin_0, end = var_4766_end_0, end_mask = var_4766_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_4766_cast_fp16")]; tensor var_4770_begin_0 = const()[name = tensor("op_4770_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4770_end_0 = const()[name = tensor("op_4770_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_4770_end_mask_0 = const()[name = tensor("op_4770_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4770_cast_fp16 = slice_by_index(begin = var_4770_begin_0, end = var_4770_end_0, end_mask = var_4770_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_4770_cast_fp16")]; tensor var_4774_begin_0 = const()[name = tensor("op_4774_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_4774_end_0 = const()[name = tensor("op_4774_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_4774_end_mask_0 = const()[name = tensor("op_4774_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4774_cast_fp16 = slice_by_index(begin = var_4774_begin_0, end = var_4774_end_0, end_mask = var_4774_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_4774_cast_fp16")]; tensor var_4778_begin_0 = const()[name = tensor("op_4778_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_4778_end_0 = const()[name = tensor("op_4778_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_4778_end_mask_0 = const()[name = tensor("op_4778_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4778_cast_fp16 = slice_by_index(begin = var_4778_begin_0, end = var_4778_end_0, end_mask = var_4778_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_4778_cast_fp16")]; tensor var_4782_begin_0 = const()[name = tensor("op_4782_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_4782_end_0 = const()[name = tensor("op_4782_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_4782_end_mask_0 = const()[name = tensor("op_4782_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4782_cast_fp16 = slice_by_index(begin = var_4782_begin_0, end = var_4782_end_0, end_mask = var_4782_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_4782_cast_fp16")]; tensor var_4786_begin_0 = const()[name = tensor("op_4786_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_4786_end_0 = const()[name = tensor("op_4786_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_4786_end_mask_0 = const()[name = tensor("op_4786_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4786_cast_fp16 = slice_by_index(begin = var_4786_begin_0, end = var_4786_end_0, end_mask = var_4786_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_4786_cast_fp16")]; tensor var_4790_begin_0 = const()[name = tensor("op_4790_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_4790_end_0 = const()[name = tensor("op_4790_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_4790_end_mask_0 = const()[name = tensor("op_4790_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4790_cast_fp16 = slice_by_index(begin = var_4790_begin_0, end = var_4790_end_0, end_mask = var_4790_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_4790_cast_fp16")]; tensor var_4794_equation_0 = const()[name = tensor("op_4794_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4794_cast_fp16 = einsum(equation = var_4794_equation_0, values = (var_4732_cast_fp16, var_4697_cast_fp16))[name = tensor("op_4794_cast_fp16")]; tensor var_4795_to_fp16 = const()[name = tensor("op_4795_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_1_cast_fp16 = mul(x = var_4794_cast_fp16, y = var_4795_to_fp16)[name = tensor("aw_1_cast_fp16")]; tensor var_4798_equation_0 = const()[name = tensor("op_4798_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4798_cast_fp16 = einsum(equation = var_4798_equation_0, values = (var_4736_cast_fp16, var_4701_cast_fp16))[name = tensor("op_4798_cast_fp16")]; tensor var_4799_to_fp16 = const()[name = tensor("op_4799_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_3_cast_fp16 = mul(x = var_4798_cast_fp16, y = var_4799_to_fp16)[name = tensor("aw_3_cast_fp16")]; tensor var_4802_equation_0 = const()[name = tensor("op_4802_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4802_cast_fp16 = einsum(equation = var_4802_equation_0, values = (var_4740_cast_fp16, var_4705_cast_fp16))[name = tensor("op_4802_cast_fp16")]; tensor var_4803_to_fp16 = const()[name = tensor("op_4803_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_5_cast_fp16 = mul(x = var_4802_cast_fp16, y = var_4803_to_fp16)[name = tensor("aw_5_cast_fp16")]; tensor var_4806_equation_0 = const()[name = tensor("op_4806_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4806_cast_fp16 = einsum(equation = var_4806_equation_0, values = (var_4744_cast_fp16, var_4709_cast_fp16))[name = tensor("op_4806_cast_fp16")]; tensor var_4807_to_fp16 = const()[name = tensor("op_4807_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_7_cast_fp16 = mul(x = var_4806_cast_fp16, y = var_4807_to_fp16)[name = tensor("aw_7_cast_fp16")]; tensor var_4810_equation_0 = const()[name = tensor("op_4810_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4810_cast_fp16 = einsum(equation = var_4810_equation_0, values = (var_4748_cast_fp16, var_4713_cast_fp16))[name = tensor("op_4810_cast_fp16")]; tensor var_4811_to_fp16 = const()[name = tensor("op_4811_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_9_cast_fp16 = mul(x = var_4810_cast_fp16, y = var_4811_to_fp16)[name = tensor("aw_9_cast_fp16")]; tensor var_4814_equation_0 = const()[name = tensor("op_4814_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4814_cast_fp16 = einsum(equation = var_4814_equation_0, values = (var_4752_cast_fp16, var_4717_cast_fp16))[name = tensor("op_4814_cast_fp16")]; tensor var_4815_to_fp16 = const()[name = tensor("op_4815_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_11_cast_fp16 = mul(x = var_4814_cast_fp16, y = var_4815_to_fp16)[name = tensor("aw_11_cast_fp16")]; tensor var_4818_equation_0 = const()[name = tensor("op_4818_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4818_cast_fp16 = einsum(equation = var_4818_equation_0, values = (var_4756_cast_fp16, var_4721_cast_fp16))[name = tensor("op_4818_cast_fp16")]; tensor var_4819_to_fp16 = const()[name = tensor("op_4819_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_13_cast_fp16 = mul(x = var_4818_cast_fp16, y = var_4819_to_fp16)[name = tensor("aw_13_cast_fp16")]; tensor var_4822_equation_0 = const()[name = tensor("op_4822_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4822_cast_fp16 = einsum(equation = var_4822_equation_0, values = (var_4760_cast_fp16, var_4725_cast_fp16))[name = tensor("op_4822_cast_fp16")]; tensor var_4823_to_fp16 = const()[name = tensor("op_4823_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_15_cast_fp16 = mul(x = var_4822_cast_fp16, y = var_4823_to_fp16)[name = tensor("aw_15_cast_fp16")]; tensor var_4825_cast_fp16 = softmax(axis = var_4569, x = aw_1_cast_fp16)[name = tensor("op_4825_cast_fp16")]; tensor var_4826_cast_fp16 = softmax(axis = var_4569, x = aw_3_cast_fp16)[name = tensor("op_4826_cast_fp16")]; tensor var_4827_cast_fp16 = softmax(axis = var_4569, x = aw_5_cast_fp16)[name = tensor("op_4827_cast_fp16")]; tensor var_4828_cast_fp16 = softmax(axis = var_4569, x = aw_7_cast_fp16)[name = tensor("op_4828_cast_fp16")]; tensor var_4829_cast_fp16 = softmax(axis = var_4569, x = aw_9_cast_fp16)[name = tensor("op_4829_cast_fp16")]; tensor var_4830_cast_fp16 = softmax(axis = var_4569, x = aw_11_cast_fp16)[name = tensor("op_4830_cast_fp16")]; tensor var_4831_cast_fp16 = softmax(axis = var_4569, x = aw_13_cast_fp16)[name = tensor("op_4831_cast_fp16")]; tensor var_4832_cast_fp16 = softmax(axis = var_4569, x = aw_15_cast_fp16)[name = tensor("op_4832_cast_fp16")]; tensor var_4834_equation_0 = const()[name = tensor("op_4834_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4834_cast_fp16 = einsum(equation = var_4834_equation_0, values = (var_4762_cast_fp16, var_4825_cast_fp16))[name = tensor("op_4834_cast_fp16")]; tensor var_4836_equation_0 = const()[name = tensor("op_4836_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4836_cast_fp16 = einsum(equation = var_4836_equation_0, values = (var_4766_cast_fp16, var_4826_cast_fp16))[name = tensor("op_4836_cast_fp16")]; tensor var_4838_equation_0 = const()[name = tensor("op_4838_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4838_cast_fp16 = einsum(equation = var_4838_equation_0, values = (var_4770_cast_fp16, var_4827_cast_fp16))[name = tensor("op_4838_cast_fp16")]; tensor var_4840_equation_0 = const()[name = tensor("op_4840_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4840_cast_fp16 = einsum(equation = var_4840_equation_0, values = (var_4774_cast_fp16, var_4828_cast_fp16))[name = tensor("op_4840_cast_fp16")]; tensor var_4842_equation_0 = const()[name = tensor("op_4842_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4842_cast_fp16 = einsum(equation = var_4842_equation_0, values = (var_4778_cast_fp16, var_4829_cast_fp16))[name = tensor("op_4842_cast_fp16")]; tensor var_4844_equation_0 = const()[name = tensor("op_4844_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4844_cast_fp16 = einsum(equation = var_4844_equation_0, values = (var_4782_cast_fp16, var_4830_cast_fp16))[name = tensor("op_4844_cast_fp16")]; tensor var_4846_equation_0 = const()[name = tensor("op_4846_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4846_cast_fp16 = einsum(equation = var_4846_equation_0, values = (var_4786_cast_fp16, var_4831_cast_fp16))[name = tensor("op_4846_cast_fp16")]; tensor var_4848_equation_0 = const()[name = tensor("op_4848_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4848_cast_fp16 = einsum(equation = var_4848_equation_0, values = (var_4790_cast_fp16, var_4832_cast_fp16))[name = tensor("op_4848_cast_fp16")]; tensor input_133_interleave_0 = const()[name = tensor("input_133_interleave_0"), val = tensor(false)]; tensor input_133_cast_fp16 = concat(axis = var_4569, interleave = input_133_interleave_0, values = (var_4834_cast_fp16, var_4836_cast_fp16, var_4838_cast_fp16, var_4840_cast_fp16, var_4842_cast_fp16, var_4844_cast_fp16, var_4846_cast_fp16, var_4848_cast_fp16))[name = tensor("input_133_cast_fp16")]; tensor var_4854 = const()[name = tensor("op_4854"), val = tensor([1, 1])]; tensor var_4856 = const()[name = tensor("op_4856"), val = tensor([1, 1])]; tensor var_4858_pad_type_0 = const()[name = tensor("op_4858_pad_type_0"), val = tensor("custom")]; tensor var_4858_pad_0 = const()[name = tensor("op_4858_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161121920)))]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164398784)))]; tensor var_4858_cast_fp16 = conv(bias = down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_4856, groups = var_4569, pad = var_4858_pad_0, pad_type = var_4858_pad_type_0, strides = var_4854, weight = down_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_133_cast_fp16)[name = tensor("op_4858_cast_fp16")]; tensor inputs_27_cast_fp16 = add(x = var_4858_cast_fp16, y = inputs_25_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; tensor hidden_states_83_axes_0 = const()[name = tensor("hidden_states_83_axes_0"), val = tensor([1])]; tensor hidden_states_83_gamma_0_to_fp16 = const()[name = tensor("hidden_states_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164401408)))]; tensor hidden_states_83_beta_0_to_fp16 = const()[name = tensor("hidden_states_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164404032)))]; tensor var_4868_to_fp16 = const()[name = tensor("op_4868_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_83_cast_fp16 = layer_norm(axes = hidden_states_83_axes_0, beta = hidden_states_83_beta_0_to_fp16, epsilon = var_4868_to_fp16, gamma = hidden_states_83_gamma_0_to_fp16, x = inputs_27_cast_fp16)[name = tensor("hidden_states_83_cast_fp16")]; tensor var_4883 = const()[name = tensor("op_4883"), val = tensor([1, 1])]; tensor var_4885 = const()[name = tensor("op_4885"), val = tensor([1, 1])]; tensor q_19_pad_type_0 = const()[name = tensor("q_19_pad_type_0"), val = tensor("custom")]; tensor q_19_pad_0 = const()[name = tensor("q_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164406656)))]; tensor q_19_cast_fp16 = conv(dilations = var_4885, groups = var_4569, pad = q_19_pad_0, pad_type = q_19_pad_type_0, strides = var_4883, weight = down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_83_cast_fp16)[name = tensor("q_19_cast_fp16")]; tensor var_4889 = const()[name = tensor("op_4889"), val = tensor([1, 1])]; tensor var_4891 = const()[name = tensor("op_4891"), val = tensor([1, 1])]; tensor k_37_pad_type_0 = const()[name = tensor("k_37_pad_type_0"), val = tensor("custom")]; tensor k_37_pad_0 = const()[name = tensor("k_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(167683520)))]; tensor k_37_cast_fp16 = conv(dilations = var_4891, groups = var_4569, pad = k_37_pad_0, pad_type = k_37_pad_type_0, strides = var_4889, weight = down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_37_cast_fp16")]; tensor var_4895 = const()[name = tensor("op_4895"), val = tensor([1, 1])]; tensor var_4897 = const()[name = tensor("op_4897"), val = tensor([1, 1])]; tensor v_19_pad_type_0 = const()[name = tensor("v_19_pad_type_0"), val = tensor("custom")]; tensor v_19_pad_0 = const()[name = tensor("v_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169649664)))]; tensor v_19_cast_fp16 = conv(dilations = var_4897, groups = var_4569, pad = v_19_pad_0, pad_type = v_19_pad_type_0, strides = var_4895, weight = down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_19_cast_fp16")]; tensor var_4901_begin_0 = const()[name = tensor("op_4901_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4901_end_0 = const()[name = tensor("op_4901_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_4901_end_mask_0 = const()[name = tensor("op_4901_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4901_cast_fp16 = slice_by_index(begin = var_4901_begin_0, end = var_4901_end_0, end_mask = var_4901_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_4901_cast_fp16")]; tensor var_4905_begin_0 = const()[name = tensor("op_4905_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_4905_end_0 = const()[name = tensor("op_4905_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_4905_end_mask_0 = const()[name = tensor("op_4905_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4905_cast_fp16 = slice_by_index(begin = var_4905_begin_0, end = var_4905_end_0, end_mask = var_4905_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_4905_cast_fp16")]; tensor var_4909_begin_0 = const()[name = tensor("op_4909_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4909_end_0 = const()[name = tensor("op_4909_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_4909_end_mask_0 = const()[name = tensor("op_4909_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4909_cast_fp16 = slice_by_index(begin = var_4909_begin_0, end = var_4909_end_0, end_mask = var_4909_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_4909_cast_fp16")]; tensor var_4913_begin_0 = const()[name = tensor("op_4913_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_4913_end_0 = const()[name = tensor("op_4913_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_4913_end_mask_0 = const()[name = tensor("op_4913_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4913_cast_fp16 = slice_by_index(begin = var_4913_begin_0, end = var_4913_end_0, end_mask = var_4913_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_4913_cast_fp16")]; tensor var_4917_begin_0 = const()[name = tensor("op_4917_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_4917_end_0 = const()[name = tensor("op_4917_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_4917_end_mask_0 = const()[name = tensor("op_4917_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4917_cast_fp16 = slice_by_index(begin = var_4917_begin_0, end = var_4917_end_0, end_mask = var_4917_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_4917_cast_fp16")]; tensor var_4921_begin_0 = const()[name = tensor("op_4921_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_4921_end_0 = const()[name = tensor("op_4921_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_4921_end_mask_0 = const()[name = tensor("op_4921_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4921_cast_fp16 = slice_by_index(begin = var_4921_begin_0, end = var_4921_end_0, end_mask = var_4921_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_4921_cast_fp16")]; tensor var_4925_begin_0 = const()[name = tensor("op_4925_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_4925_end_0 = const()[name = tensor("op_4925_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_4925_end_mask_0 = const()[name = tensor("op_4925_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4925_cast_fp16 = slice_by_index(begin = var_4925_begin_0, end = var_4925_end_0, end_mask = var_4925_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_4925_cast_fp16")]; tensor var_4929_begin_0 = const()[name = tensor("op_4929_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_4929_end_0 = const()[name = tensor("op_4929_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_4929_end_mask_0 = const()[name = tensor("op_4929_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4929_cast_fp16 = slice_by_index(begin = var_4929_begin_0, end = var_4929_end_0, end_mask = var_4929_end_mask_0, x = q_19_cast_fp16)[name = tensor("op_4929_cast_fp16")]; tensor k_39_perm_0 = const()[name = tensor("k_39_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_4936_begin_0 = const()[name = tensor("op_4936_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4936_end_0 = const()[name = tensor("op_4936_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_4936_end_mask_0 = const()[name = tensor("op_4936_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_22 = transpose(perm = k_39_perm_0, x = k_37_cast_fp16)[name = tensor("transpose_22")]; tensor var_4936_cast_fp16 = slice_by_index(begin = var_4936_begin_0, end = var_4936_end_0, end_mask = var_4936_end_mask_0, x = transpose_22)[name = tensor("op_4936_cast_fp16")]; tensor var_4940_begin_0 = const()[name = tensor("op_4940_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_4940_end_0 = const()[name = tensor("op_4940_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_4940_end_mask_0 = const()[name = tensor("op_4940_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4940_cast_fp16 = slice_by_index(begin = var_4940_begin_0, end = var_4940_end_0, end_mask = var_4940_end_mask_0, x = transpose_22)[name = tensor("op_4940_cast_fp16")]; tensor var_4944_begin_0 = const()[name = tensor("op_4944_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_4944_end_0 = const()[name = tensor("op_4944_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_4944_end_mask_0 = const()[name = tensor("op_4944_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4944_cast_fp16 = slice_by_index(begin = var_4944_begin_0, end = var_4944_end_0, end_mask = var_4944_end_mask_0, x = transpose_22)[name = tensor("op_4944_cast_fp16")]; tensor var_4948_begin_0 = const()[name = tensor("op_4948_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_4948_end_0 = const()[name = tensor("op_4948_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_4948_end_mask_0 = const()[name = tensor("op_4948_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4948_cast_fp16 = slice_by_index(begin = var_4948_begin_0, end = var_4948_end_0, end_mask = var_4948_end_mask_0, x = transpose_22)[name = tensor("op_4948_cast_fp16")]; tensor var_4952_begin_0 = const()[name = tensor("op_4952_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_4952_end_0 = const()[name = tensor("op_4952_end_0"), val = tensor([2, 77, 1, 800])]; tensor var_4952_end_mask_0 = const()[name = tensor("op_4952_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4952_cast_fp16 = slice_by_index(begin = var_4952_begin_0, end = var_4952_end_0, end_mask = var_4952_end_mask_0, x = transpose_22)[name = tensor("op_4952_cast_fp16")]; tensor var_4956_begin_0 = const()[name = tensor("op_4956_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_4956_end_0 = const()[name = tensor("op_4956_end_0"), val = tensor([2, 77, 1, 960])]; tensor var_4956_end_mask_0 = const()[name = tensor("op_4956_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4956_cast_fp16 = slice_by_index(begin = var_4956_begin_0, end = var_4956_end_0, end_mask = var_4956_end_mask_0, x = transpose_22)[name = tensor("op_4956_cast_fp16")]; tensor var_4960_begin_0 = const()[name = tensor("op_4960_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_4960_end_0 = const()[name = tensor("op_4960_end_0"), val = tensor([2, 77, 1, 1120])]; tensor var_4960_end_mask_0 = const()[name = tensor("op_4960_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4960_cast_fp16 = slice_by_index(begin = var_4960_begin_0, end = var_4960_end_0, end_mask = var_4960_end_mask_0, x = transpose_22)[name = tensor("op_4960_cast_fp16")]; tensor var_4964_begin_0 = const()[name = tensor("op_4964_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_4964_end_0 = const()[name = tensor("op_4964_end_0"), val = tensor([2, 77, 1, 1280])]; tensor var_4964_end_mask_0 = const()[name = tensor("op_4964_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4964_cast_fp16 = slice_by_index(begin = var_4964_begin_0, end = var_4964_end_0, end_mask = var_4964_end_mask_0, x = transpose_22)[name = tensor("op_4964_cast_fp16")]; tensor var_4966_begin_0 = const()[name = tensor("op_4966_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4966_end_0 = const()[name = tensor("op_4966_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_4966_end_mask_0 = const()[name = tensor("op_4966_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4966_cast_fp16 = slice_by_index(begin = var_4966_begin_0, end = var_4966_end_0, end_mask = var_4966_end_mask_0, x = v_19_cast_fp16)[name = tensor("op_4966_cast_fp16")]; tensor var_4970_begin_0 = const()[name = tensor("op_4970_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_4970_end_0 = const()[name = tensor("op_4970_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_4970_end_mask_0 = const()[name = tensor("op_4970_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4970_cast_fp16 = slice_by_index(begin = var_4970_begin_0, end = var_4970_end_0, end_mask = var_4970_end_mask_0, x = v_19_cast_fp16)[name = tensor("op_4970_cast_fp16")]; tensor var_4974_begin_0 = const()[name = tensor("op_4974_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4974_end_0 = const()[name = tensor("op_4974_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_4974_end_mask_0 = const()[name = tensor("op_4974_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4974_cast_fp16 = slice_by_index(begin = var_4974_begin_0, end = var_4974_end_0, end_mask = var_4974_end_mask_0, x = v_19_cast_fp16)[name = tensor("op_4974_cast_fp16")]; tensor var_4978_begin_0 = const()[name = tensor("op_4978_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_4978_end_0 = const()[name = tensor("op_4978_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_4978_end_mask_0 = const()[name = tensor("op_4978_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4978_cast_fp16 = slice_by_index(begin = var_4978_begin_0, end = var_4978_end_0, end_mask = var_4978_end_mask_0, x = v_19_cast_fp16)[name = tensor("op_4978_cast_fp16")]; tensor var_4982_begin_0 = const()[name = tensor("op_4982_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_4982_end_0 = const()[name = tensor("op_4982_end_0"), val = tensor([2, 800, 1, 77])]; tensor var_4982_end_mask_0 = const()[name = tensor("op_4982_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4982_cast_fp16 = slice_by_index(begin = var_4982_begin_0, end = var_4982_end_0, end_mask = var_4982_end_mask_0, x = v_19_cast_fp16)[name = tensor("op_4982_cast_fp16")]; tensor var_4986_begin_0 = const()[name = tensor("op_4986_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_4986_end_0 = const()[name = tensor("op_4986_end_0"), val = tensor([2, 960, 1, 77])]; tensor var_4986_end_mask_0 = const()[name = tensor("op_4986_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4986_cast_fp16 = slice_by_index(begin = var_4986_begin_0, end = var_4986_end_0, end_mask = var_4986_end_mask_0, x = v_19_cast_fp16)[name = tensor("op_4986_cast_fp16")]; tensor var_4990_begin_0 = const()[name = tensor("op_4990_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_4990_end_0 = const()[name = tensor("op_4990_end_0"), val = tensor([2, 1120, 1, 77])]; tensor var_4990_end_mask_0 = const()[name = tensor("op_4990_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4990_cast_fp16 = slice_by_index(begin = var_4990_begin_0, end = var_4990_end_0, end_mask = var_4990_end_mask_0, x = v_19_cast_fp16)[name = tensor("op_4990_cast_fp16")]; tensor var_4994_begin_0 = const()[name = tensor("op_4994_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_4994_end_0 = const()[name = tensor("op_4994_end_0"), val = tensor([2, 1280, 1, 77])]; tensor var_4994_end_mask_0 = const()[name = tensor("op_4994_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4994_cast_fp16 = slice_by_index(begin = var_4994_begin_0, end = var_4994_end_0, end_mask = var_4994_end_mask_0, x = v_19_cast_fp16)[name = tensor("op_4994_cast_fp16")]; tensor var_4998_equation_0 = const()[name = tensor("op_4998_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_4998_cast_fp16 = einsum(equation = var_4998_equation_0, values = (var_4936_cast_fp16, var_4901_cast_fp16))[name = tensor("op_4998_cast_fp16")]; tensor var_4999_to_fp16 = const()[name = tensor("op_4999_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_17_cast_fp16 = mul(x = var_4998_cast_fp16, y = var_4999_to_fp16)[name = tensor("aw_17_cast_fp16")]; tensor var_5002_equation_0 = const()[name = tensor("op_5002_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5002_cast_fp16 = einsum(equation = var_5002_equation_0, values = (var_4940_cast_fp16, var_4905_cast_fp16))[name = tensor("op_5002_cast_fp16")]; tensor var_5003_to_fp16 = const()[name = tensor("op_5003_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_19_cast_fp16 = mul(x = var_5002_cast_fp16, y = var_5003_to_fp16)[name = tensor("aw_19_cast_fp16")]; tensor var_5006_equation_0 = const()[name = tensor("op_5006_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5006_cast_fp16 = einsum(equation = var_5006_equation_0, values = (var_4944_cast_fp16, var_4909_cast_fp16))[name = tensor("op_5006_cast_fp16")]; tensor var_5007_to_fp16 = const()[name = tensor("op_5007_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_21_cast_fp16 = mul(x = var_5006_cast_fp16, y = var_5007_to_fp16)[name = tensor("aw_21_cast_fp16")]; tensor var_5010_equation_0 = const()[name = tensor("op_5010_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5010_cast_fp16 = einsum(equation = var_5010_equation_0, values = (var_4948_cast_fp16, var_4913_cast_fp16))[name = tensor("op_5010_cast_fp16")]; tensor var_5011_to_fp16 = const()[name = tensor("op_5011_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_23_cast_fp16 = mul(x = var_5010_cast_fp16, y = var_5011_to_fp16)[name = tensor("aw_23_cast_fp16")]; tensor var_5014_equation_0 = const()[name = tensor("op_5014_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5014_cast_fp16 = einsum(equation = var_5014_equation_0, values = (var_4952_cast_fp16, var_4917_cast_fp16))[name = tensor("op_5014_cast_fp16")]; tensor var_5015_to_fp16 = const()[name = tensor("op_5015_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_25_cast_fp16 = mul(x = var_5014_cast_fp16, y = var_5015_to_fp16)[name = tensor("aw_25_cast_fp16")]; tensor var_5018_equation_0 = const()[name = tensor("op_5018_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5018_cast_fp16 = einsum(equation = var_5018_equation_0, values = (var_4956_cast_fp16, var_4921_cast_fp16))[name = tensor("op_5018_cast_fp16")]; tensor var_5019_to_fp16 = const()[name = tensor("op_5019_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_27_cast_fp16 = mul(x = var_5018_cast_fp16, y = var_5019_to_fp16)[name = tensor("aw_27_cast_fp16")]; tensor var_5022_equation_0 = const()[name = tensor("op_5022_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5022_cast_fp16 = einsum(equation = var_5022_equation_0, values = (var_4960_cast_fp16, var_4925_cast_fp16))[name = tensor("op_5022_cast_fp16")]; tensor var_5023_to_fp16 = const()[name = tensor("op_5023_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_29_cast_fp16 = mul(x = var_5022_cast_fp16, y = var_5023_to_fp16)[name = tensor("aw_29_cast_fp16")]; tensor var_5026_equation_0 = const()[name = tensor("op_5026_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5026_cast_fp16 = einsum(equation = var_5026_equation_0, values = (var_4964_cast_fp16, var_4929_cast_fp16))[name = tensor("op_5026_cast_fp16")]; tensor var_5027_to_fp16 = const()[name = tensor("op_5027_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_31_cast_fp16 = mul(x = var_5026_cast_fp16, y = var_5027_to_fp16)[name = tensor("aw_31_cast_fp16")]; tensor var_5029_cast_fp16 = softmax(axis = var_4569, x = aw_17_cast_fp16)[name = tensor("op_5029_cast_fp16")]; tensor var_5030_cast_fp16 = softmax(axis = var_4569, x = aw_19_cast_fp16)[name = tensor("op_5030_cast_fp16")]; tensor var_5031_cast_fp16 = softmax(axis = var_4569, x = aw_21_cast_fp16)[name = tensor("op_5031_cast_fp16")]; tensor var_5032_cast_fp16 = softmax(axis = var_4569, x = aw_23_cast_fp16)[name = tensor("op_5032_cast_fp16")]; tensor var_5033_cast_fp16 = softmax(axis = var_4569, x = aw_25_cast_fp16)[name = tensor("op_5033_cast_fp16")]; tensor var_5034_cast_fp16 = softmax(axis = var_4569, x = aw_27_cast_fp16)[name = tensor("op_5034_cast_fp16")]; tensor var_5035_cast_fp16 = softmax(axis = var_4569, x = aw_29_cast_fp16)[name = tensor("op_5035_cast_fp16")]; tensor var_5036_cast_fp16 = softmax(axis = var_4569, x = aw_31_cast_fp16)[name = tensor("op_5036_cast_fp16")]; tensor var_5038_equation_0 = const()[name = tensor("op_5038_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5038_cast_fp16 = einsum(equation = var_5038_equation_0, values = (var_4966_cast_fp16, var_5029_cast_fp16))[name = tensor("op_5038_cast_fp16")]; tensor var_5040_equation_0 = const()[name = tensor("op_5040_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5040_cast_fp16 = einsum(equation = var_5040_equation_0, values = (var_4970_cast_fp16, var_5030_cast_fp16))[name = tensor("op_5040_cast_fp16")]; tensor var_5042_equation_0 = const()[name = tensor("op_5042_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5042_cast_fp16 = einsum(equation = var_5042_equation_0, values = (var_4974_cast_fp16, var_5031_cast_fp16))[name = tensor("op_5042_cast_fp16")]; tensor var_5044_equation_0 = const()[name = tensor("op_5044_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5044_cast_fp16 = einsum(equation = var_5044_equation_0, values = (var_4978_cast_fp16, var_5032_cast_fp16))[name = tensor("op_5044_cast_fp16")]; tensor var_5046_equation_0 = const()[name = tensor("op_5046_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5046_cast_fp16 = einsum(equation = var_5046_equation_0, values = (var_4982_cast_fp16, var_5033_cast_fp16))[name = tensor("op_5046_cast_fp16")]; tensor var_5048_equation_0 = const()[name = tensor("op_5048_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5048_cast_fp16 = einsum(equation = var_5048_equation_0, values = (var_4986_cast_fp16, var_5034_cast_fp16))[name = tensor("op_5048_cast_fp16")]; tensor var_5050_equation_0 = const()[name = tensor("op_5050_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5050_cast_fp16 = einsum(equation = var_5050_equation_0, values = (var_4990_cast_fp16, var_5035_cast_fp16))[name = tensor("op_5050_cast_fp16")]; tensor var_5052_equation_0 = const()[name = tensor("op_5052_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5052_cast_fp16 = einsum(equation = var_5052_equation_0, values = (var_4994_cast_fp16, var_5036_cast_fp16))[name = tensor("op_5052_cast_fp16")]; tensor input_135_interleave_0 = const()[name = tensor("input_135_interleave_0"), val = tensor(false)]; tensor input_135_cast_fp16 = concat(axis = var_4569, interleave = input_135_interleave_0, values = (var_5038_cast_fp16, var_5040_cast_fp16, var_5042_cast_fp16, var_5044_cast_fp16, var_5046_cast_fp16, var_5048_cast_fp16, var_5050_cast_fp16, var_5052_cast_fp16))[name = tensor("input_135_cast_fp16")]; tensor var_5058 = const()[name = tensor("op_5058"), val = tensor([1, 1])]; tensor var_5060 = const()[name = tensor("op_5060"), val = tensor([1, 1])]; tensor var_5062_pad_type_0 = const()[name = tensor("op_5062_pad_type_0"), val = tensor("custom")]; tensor var_5062_pad_0 = const()[name = tensor("op_5062_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171615808)))]; tensor down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174892672)))]; tensor var_5062_cast_fp16 = conv(bias = down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_5060, groups = var_4569, pad = var_5062_pad_0, pad_type = var_5062_pad_type_0, strides = var_5058, weight = down_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_135_cast_fp16)[name = tensor("op_5062_cast_fp16")]; tensor inputs_29_cast_fp16 = add(x = var_5062_cast_fp16, y = inputs_27_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; tensor input_137_axes_0 = const()[name = tensor("input_137_axes_0"), val = tensor([1])]; tensor input_137_gamma_0_to_fp16 = const()[name = tensor("input_137_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174895296)))]; tensor input_137_beta_0_to_fp16 = const()[name = tensor("input_137_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174897920)))]; tensor var_5072_to_fp16 = const()[name = tensor("op_5072_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_137_cast_fp16 = layer_norm(axes = input_137_axes_0, beta = input_137_beta_0_to_fp16, epsilon = var_5072_to_fp16, gamma = input_137_gamma_0_to_fp16, x = inputs_29_cast_fp16)[name = tensor("input_137_cast_fp16")]; tensor var_5088 = const()[name = tensor("op_5088"), val = tensor([1, 1])]; tensor var_5090 = const()[name = tensor("op_5090"), val = tensor([1, 1])]; tensor var_5092_pad_type_0 = const()[name = tensor("op_5092_pad_type_0"), val = tensor("custom")]; tensor var_5092_pad_0 = const()[name = tensor("op_5092_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174900544)))]; tensor down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201115008)))]; tensor var_5092_cast_fp16 = conv(bias = down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_5090, groups = var_4569, pad = var_5092_pad_0, pad_type = var_5092_pad_type_0, strides = var_5088, weight = down_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_137_cast_fp16)[name = tensor("op_5092_cast_fp16")]; tensor var_5093_split_sizes_0 = const()[name = tensor("op_5093_split_sizes_0"), val = tensor([5120, 5120])]; tensor var_5093_axis_0 = const()[name = tensor("op_5093_axis_0"), val = tensor(1)]; tensor var_5093_cast_fp16_0, tensor var_5093_cast_fp16_1 = split(axis = var_5093_axis_0, split_sizes = var_5093_split_sizes_0, x = var_5092_cast_fp16)[name = tensor("op_5093_cast_fp16")]; tensor var_5095_mode_0 = const()[name = tensor("op_5095_mode_0"), val = tensor("EXACT")]; tensor var_5095_cast_fp16 = gelu(mode = var_5095_mode_0, x = var_5093_cast_fp16_1)[name = tensor("op_5095_cast_fp16")]; tensor input_139_cast_fp16 = mul(x = var_5093_cast_fp16_0, y = var_5095_cast_fp16)[name = tensor("input_139_cast_fp16")]; tensor var_5099 = const()[name = tensor("op_5099"), val = tensor([1, 1])]; tensor var_5101 = const()[name = tensor("op_5101"), val = tensor([1, 1])]; tensor var_5103_pad_type_0 = const()[name = tensor("op_5103_pad_type_0"), val = tensor("custom")]; tensor var_5103_pad_0 = const()[name = tensor("op_5103_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201135552)))]; tensor down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214242816)))]; tensor var_5103_cast_fp16 = conv(bias = down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_5101, groups = var_4569, pad = var_5103_pad_0, pad_type = var_5103_pad_type_0, strides = var_5099, weight = down_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_139_cast_fp16)[name = tensor("op_5103_cast_fp16")]; tensor hidden_states_87_cast_fp16 = add(x = var_5103_cast_fp16, y = inputs_29_cast_fp16)[name = tensor("hidden_states_87_cast_fp16")]; tensor var_5105 = const()[name = tensor("op_5105"), val = tensor([2, 1280, 16, 16])]; tensor input_141_cast_fp16 = reshape(shape = var_5105, x = hidden_states_87_cast_fp16)[name = tensor("input_141_cast_fp16")]; tensor var_5109 = const()[name = tensor("op_5109"), val = tensor([1, 1])]; tensor var_5111 = const()[name = tensor("op_5111"), val = tensor([1, 1])]; tensor hidden_states_89_pad_type_0 = const()[name = tensor("hidden_states_89_pad_type_0"), val = tensor("custom")]; tensor hidden_states_89_pad_0 = const()[name = tensor("hidden_states_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_0_proj_out_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214245440)))]; tensor down_blocks_2_attentions_0_proj_out_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_0_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217522304)))]; tensor hidden_states_89_cast_fp16 = conv(bias = down_blocks_2_attentions_0_proj_out_bias_to_fp16, dilations = var_5111, groups = var_4569, pad = hidden_states_89_pad_0, pad_type = hidden_states_89_pad_type_0, strides = var_5109, weight = down_blocks_2_attentions_0_proj_out_weight_to_fp16, x = input_141_cast_fp16)[name = tensor("hidden_states_89_cast_fp16")]; tensor input_143_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = hidden_states_77_cast_fp16)[name = tensor("input_143_cast_fp16")]; tensor reshape_60_shape_0 = const()[name = tensor("reshape_60_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_60_cast_fp16 = reshape(shape = reshape_60_shape_0, x = input_143_cast_fp16)[name = tensor("reshape_60_cast_fp16")]; tensor reduce_mean_45_axes_0 = const()[name = tensor("reduce_mean_45_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_45_keep_dims_0 = const()[name = tensor("reduce_mean_45_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_45_cast_fp16 = reduce_mean(axes = reduce_mean_45_axes_0, keep_dims = reduce_mean_45_keep_dims_0, x = reshape_60_cast_fp16)[name = tensor("reduce_mean_45_cast_fp16")]; tensor sub_30_cast_fp16 = sub(x = reshape_60_cast_fp16, y = reduce_mean_45_cast_fp16)[name = tensor("sub_30_cast_fp16")]; tensor square_15_cast_fp16 = square(x = sub_30_cast_fp16)[name = tensor("square_15_cast_fp16")]; tensor reduce_mean_47_axes_0 = const()[name = tensor("reduce_mean_47_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_47_keep_dims_0 = const()[name = tensor("reduce_mean_47_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_47_cast_fp16 = reduce_mean(axes = reduce_mean_47_axes_0, keep_dims = reduce_mean_47_keep_dims_0, x = square_15_cast_fp16)[name = tensor("reduce_mean_47_cast_fp16")]; tensor add_30_y_0_to_fp16 = const()[name = tensor("add_30_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_30_cast_fp16 = add(x = reduce_mean_47_cast_fp16, y = add_30_y_0_to_fp16)[name = tensor("add_30_cast_fp16")]; tensor sqrt_15_cast_fp16 = sqrt(x = add_30_cast_fp16)[name = tensor("sqrt_15_cast_fp16")]; tensor real_div_15_cast_fp16 = real_div(x = sub_30_cast_fp16, y = sqrt_15_cast_fp16)[name = tensor("real_div_15_cast_fp16")]; tensor reshape_61_shape_0 = const()[name = tensor("reshape_61_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_61_cast_fp16 = reshape(shape = reshape_61_shape_0, x = real_div_15_cast_fp16)[name = tensor("reshape_61_cast_fp16")]; tensor add_31_gamma_0_to_fp16 = const()[name = tensor("add_31_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217524928)))]; tensor add_31_beta_0_to_fp16 = const()[name = tensor("add_31_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217527552)))]; tensor add_31_epsilon_0_to_fp16 = const()[name = tensor("add_31_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_31_cast_fp16 = batch_norm(beta = add_31_beta_0_to_fp16, epsilon = add_31_epsilon_0_to_fp16, gamma = add_31_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_61_cast_fp16)[name = tensor("add_31_cast_fp16")]; tensor input_147_cast_fp16 = silu(x = add_31_cast_fp16)[name = tensor("input_147_cast_fp16")]; tensor var_5126 = const()[name = tensor("op_5126"), val = tensor([1, 1])]; tensor var_5128 = const()[name = tensor("op_5128"), val = tensor([1, 1])]; tensor hidden_states_91_pad_type_0 = const()[name = tensor("hidden_states_91_pad_type_0"), val = tensor("custom")]; tensor hidden_states_91_pad_0 = const()[name = tensor("hidden_states_91_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_2_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("down_blocks_2_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217530176)))]; tensor down_blocks_2_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("down_blocks_2_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247021440)))]; tensor hidden_states_91_cast_fp16 = conv(bias = down_blocks_2_resnets_1_conv1_bias_to_fp16, dilations = var_5128, groups = var_4569, pad = hidden_states_91_pad_0, pad_type = hidden_states_91_pad_type_0, strides = var_5126, weight = down_blocks_2_resnets_1_conv1_weight_to_fp16, x = input_147_cast_fp16)[name = tensor("hidden_states_91_cast_fp16")]; tensor var_5134 = const()[name = tensor("op_5134"), val = tensor([1, 1])]; tensor var_5136 = const()[name = tensor("op_5136"), val = tensor([1, 1])]; tensor temb_11_pad_type_0 = const()[name = tensor("temb_11_pad_type_0"), val = tensor("custom")]; tensor temb_11_pad_0 = const()[name = tensor("temb_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("down_blocks_2_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247024064)))]; tensor down_blocks_2_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("down_blocks_2_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250300928)))]; tensor temb_11_cast_fp16 = conv(bias = down_blocks_2_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_5136, groups = var_4569, pad = temb_11_pad_0, pad_type = temb_11_pad_type_0, strides = var_5134, weight = down_blocks_2_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_11_cast_fp16")]; tensor input_151_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = temb_11_cast_fp16)[name = tensor("input_151_cast_fp16")]; tensor reshape_64_shape_0 = const()[name = tensor("reshape_64_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_64_cast_fp16 = reshape(shape = reshape_64_shape_0, x = input_151_cast_fp16)[name = tensor("reshape_64_cast_fp16")]; tensor reduce_mean_48_axes_0 = const()[name = tensor("reduce_mean_48_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_48_keep_dims_0 = const()[name = tensor("reduce_mean_48_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_48_cast_fp16 = reduce_mean(axes = reduce_mean_48_axes_0, keep_dims = reduce_mean_48_keep_dims_0, x = reshape_64_cast_fp16)[name = tensor("reduce_mean_48_cast_fp16")]; tensor sub_32_cast_fp16 = sub(x = reshape_64_cast_fp16, y = reduce_mean_48_cast_fp16)[name = tensor("sub_32_cast_fp16")]; tensor square_16_cast_fp16 = square(x = sub_32_cast_fp16)[name = tensor("square_16_cast_fp16")]; tensor reduce_mean_50_axes_0 = const()[name = tensor("reduce_mean_50_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_50_keep_dims_0 = const()[name = tensor("reduce_mean_50_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_50_cast_fp16 = reduce_mean(axes = reduce_mean_50_axes_0, keep_dims = reduce_mean_50_keep_dims_0, x = square_16_cast_fp16)[name = tensor("reduce_mean_50_cast_fp16")]; tensor add_32_y_0_to_fp16 = const()[name = tensor("add_32_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_32_cast_fp16 = add(x = reduce_mean_50_cast_fp16, y = add_32_y_0_to_fp16)[name = tensor("add_32_cast_fp16")]; tensor sqrt_16_cast_fp16 = sqrt(x = add_32_cast_fp16)[name = tensor("sqrt_16_cast_fp16")]; tensor real_div_16_cast_fp16 = real_div(x = sub_32_cast_fp16, y = sqrt_16_cast_fp16)[name = tensor("real_div_16_cast_fp16")]; tensor reshape_65_shape_0 = const()[name = tensor("reshape_65_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_65_cast_fp16 = reshape(shape = reshape_65_shape_0, x = real_div_16_cast_fp16)[name = tensor("reshape_65_cast_fp16")]; tensor add_33_gamma_0_to_fp16 = const()[name = tensor("add_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250303552)))]; tensor add_33_beta_0_to_fp16 = const()[name = tensor("add_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250306176)))]; tensor add_33_epsilon_0_to_fp16 = const()[name = tensor("add_33_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_33_cast_fp16 = batch_norm(beta = add_33_beta_0_to_fp16, epsilon = add_33_epsilon_0_to_fp16, gamma = add_33_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_65_cast_fp16)[name = tensor("add_33_cast_fp16")]; tensor input_155_cast_fp16 = silu(x = add_33_cast_fp16)[name = tensor("input_155_cast_fp16")]; tensor var_5146 = const()[name = tensor("op_5146"), val = tensor([1, 1])]; tensor var_5148 = const()[name = tensor("op_5148"), val = tensor([1, 1])]; tensor hidden_states_93_pad_type_0 = const()[name = tensor("hidden_states_93_pad_type_0"), val = tensor("custom")]; tensor hidden_states_93_pad_0 = const()[name = tensor("hidden_states_93_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_2_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("down_blocks_2_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250308800)))]; tensor down_blocks_2_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("down_blocks_2_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279800064)))]; tensor hidden_states_93_cast_fp16 = conv(bias = down_blocks_2_resnets_1_conv2_bias_to_fp16, dilations = var_5148, groups = var_4569, pad = hidden_states_93_pad_0, pad_type = hidden_states_93_pad_type_0, strides = var_5146, weight = down_blocks_2_resnets_1_conv2_weight_to_fp16, x = input_155_cast_fp16)[name = tensor("hidden_states_93_cast_fp16")]; tensor hidden_states_95_cast_fp16 = add(x = input_143_cast_fp16, y = hidden_states_93_cast_fp16)[name = tensor("hidden_states_95_cast_fp16")]; tensor reshape_68_shape_0 = const()[name = tensor("reshape_68_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_68_cast_fp16 = reshape(shape = reshape_68_shape_0, x = hidden_states_95_cast_fp16)[name = tensor("reshape_68_cast_fp16")]; tensor reduce_mean_51_axes_0 = const()[name = tensor("reduce_mean_51_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_51_keep_dims_0 = const()[name = tensor("reduce_mean_51_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_51_cast_fp16 = reduce_mean(axes = reduce_mean_51_axes_0, keep_dims = reduce_mean_51_keep_dims_0, x = reshape_68_cast_fp16)[name = tensor("reduce_mean_51_cast_fp16")]; tensor sub_34_cast_fp16 = sub(x = reshape_68_cast_fp16, y = reduce_mean_51_cast_fp16)[name = tensor("sub_34_cast_fp16")]; tensor square_17_cast_fp16 = square(x = sub_34_cast_fp16)[name = tensor("square_17_cast_fp16")]; tensor reduce_mean_53_axes_0 = const()[name = tensor("reduce_mean_53_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_53_keep_dims_0 = const()[name = tensor("reduce_mean_53_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_53_cast_fp16 = reduce_mean(axes = reduce_mean_53_axes_0, keep_dims = reduce_mean_53_keep_dims_0, x = square_17_cast_fp16)[name = tensor("reduce_mean_53_cast_fp16")]; tensor add_34_y_0_to_fp16 = const()[name = tensor("add_34_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_34_cast_fp16 = add(x = reduce_mean_53_cast_fp16, y = add_34_y_0_to_fp16)[name = tensor("add_34_cast_fp16")]; tensor sqrt_17_cast_fp16 = sqrt(x = add_34_cast_fp16)[name = tensor("sqrt_17_cast_fp16")]; tensor real_div_17_cast_fp16 = real_div(x = sub_34_cast_fp16, y = sqrt_17_cast_fp16)[name = tensor("real_div_17_cast_fp16")]; tensor reshape_69_shape_0 = const()[name = tensor("reshape_69_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_69_cast_fp16 = reshape(shape = reshape_69_shape_0, x = real_div_17_cast_fp16)[name = tensor("reshape_69_cast_fp16")]; tensor add_35_gamma_0_to_fp16 = const()[name = tensor("add_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279802688)))]; tensor add_35_beta_0_to_fp16 = const()[name = tensor("add_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279805312)))]; tensor add_35_epsilon_0_to_fp16 = const()[name = tensor("add_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_35_cast_fp16 = batch_norm(beta = add_35_beta_0_to_fp16, epsilon = add_35_epsilon_0_to_fp16, gamma = add_35_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_69_cast_fp16)[name = tensor("add_35_cast_fp16")]; tensor var_5168 = const()[name = tensor("op_5168"), val = tensor([1, 1])]; tensor var_5170 = const()[name = tensor("op_5170"), val = tensor([1, 1])]; tensor hidden_states_97_pad_type_0 = const()[name = tensor("hidden_states_97_pad_type_0"), val = tensor("custom")]; tensor hidden_states_97_pad_0 = const()[name = tensor("hidden_states_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_proj_in_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279807936)))]; tensor down_blocks_2_attentions_1_proj_in_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283084800)))]; tensor hidden_states_97_cast_fp16 = conv(bias = down_blocks_2_attentions_1_proj_in_bias_to_fp16, dilations = var_5170, groups = var_4569, pad = hidden_states_97_pad_0, pad_type = hidden_states_97_pad_type_0, strides = var_5168, weight = down_blocks_2_attentions_1_proj_in_weight_to_fp16, x = add_35_cast_fp16)[name = tensor("hidden_states_97_cast_fp16")]; tensor var_5175 = const()[name = tensor("op_5175"), val = tensor([2, 1280, 1, 256])]; tensor inputs_31_cast_fp16 = reshape(shape = var_5175, x = hidden_states_97_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; tensor hidden_states_99_axes_0 = const()[name = tensor("hidden_states_99_axes_0"), val = tensor([1])]; tensor hidden_states_99_gamma_0_to_fp16 = const()[name = tensor("hidden_states_99_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283087424)))]; tensor hidden_states_99_beta_0_to_fp16 = const()[name = tensor("hidden_states_99_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283090048)))]; tensor var_5191_to_fp16 = const()[name = tensor("op_5191_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_99_cast_fp16 = layer_norm(axes = hidden_states_99_axes_0, beta = hidden_states_99_beta_0_to_fp16, epsilon = var_5191_to_fp16, gamma = hidden_states_99_gamma_0_to_fp16, x = inputs_31_cast_fp16)[name = tensor("hidden_states_99_cast_fp16")]; tensor var_5206 = const()[name = tensor("op_5206"), val = tensor([1, 1])]; tensor var_5208 = const()[name = tensor("op_5208"), val = tensor([1, 1])]; tensor q_21_pad_type_0 = const()[name = tensor("q_21_pad_type_0"), val = tensor("custom")]; tensor q_21_pad_0 = const()[name = tensor("q_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283092672)))]; tensor q_21_cast_fp16 = conv(dilations = var_5208, groups = var_4569, pad = q_21_pad_0, pad_type = q_21_pad_type_0, strides = var_5206, weight = down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_99_cast_fp16)[name = tensor("q_21_cast_fp16")]; tensor var_5212 = const()[name = tensor("op_5212"), val = tensor([1, 1])]; tensor var_5214 = const()[name = tensor("op_5214"), val = tensor([1, 1])]; tensor k_41_pad_type_0 = const()[name = tensor("k_41_pad_type_0"), val = tensor("custom")]; tensor k_41_pad_0 = const()[name = tensor("k_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286369536)))]; tensor k_41_cast_fp16 = conv(dilations = var_5214, groups = var_4569, pad = k_41_pad_0, pad_type = k_41_pad_type_0, strides = var_5212, weight = down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_99_cast_fp16)[name = tensor("k_41_cast_fp16")]; tensor var_5218 = const()[name = tensor("op_5218"), val = tensor([1, 1])]; tensor var_5220 = const()[name = tensor("op_5220"), val = tensor([1, 1])]; tensor v_21_pad_type_0 = const()[name = tensor("v_21_pad_type_0"), val = tensor("custom")]; tensor v_21_pad_0 = const()[name = tensor("v_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289646400)))]; tensor v_21_cast_fp16 = conv(dilations = var_5220, groups = var_4569, pad = v_21_pad_0, pad_type = v_21_pad_type_0, strides = var_5218, weight = down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_99_cast_fp16)[name = tensor("v_21_cast_fp16")]; tensor var_5224_begin_0 = const()[name = tensor("op_5224_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5224_end_0 = const()[name = tensor("op_5224_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_5224_end_mask_0 = const()[name = tensor("op_5224_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5224_cast_fp16 = slice_by_index(begin = var_5224_begin_0, end = var_5224_end_0, end_mask = var_5224_end_mask_0, x = q_21_cast_fp16)[name = tensor("op_5224_cast_fp16")]; tensor var_5228_begin_0 = const()[name = tensor("op_5228_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_5228_end_0 = const()[name = tensor("op_5228_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_5228_end_mask_0 = const()[name = tensor("op_5228_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5228_cast_fp16 = slice_by_index(begin = var_5228_begin_0, end = var_5228_end_0, end_mask = var_5228_end_mask_0, x = q_21_cast_fp16)[name = tensor("op_5228_cast_fp16")]; tensor var_5232_begin_0 = const()[name = tensor("op_5232_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5232_end_0 = const()[name = tensor("op_5232_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_5232_end_mask_0 = const()[name = tensor("op_5232_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5232_cast_fp16 = slice_by_index(begin = var_5232_begin_0, end = var_5232_end_0, end_mask = var_5232_end_mask_0, x = q_21_cast_fp16)[name = tensor("op_5232_cast_fp16")]; tensor var_5236_begin_0 = const()[name = tensor("op_5236_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_5236_end_0 = const()[name = tensor("op_5236_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_5236_end_mask_0 = const()[name = tensor("op_5236_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5236_cast_fp16 = slice_by_index(begin = var_5236_begin_0, end = var_5236_end_0, end_mask = var_5236_end_mask_0, x = q_21_cast_fp16)[name = tensor("op_5236_cast_fp16")]; tensor var_5240_begin_0 = const()[name = tensor("op_5240_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5240_end_0 = const()[name = tensor("op_5240_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_5240_end_mask_0 = const()[name = tensor("op_5240_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5240_cast_fp16 = slice_by_index(begin = var_5240_begin_0, end = var_5240_end_0, end_mask = var_5240_end_mask_0, x = q_21_cast_fp16)[name = tensor("op_5240_cast_fp16")]; tensor var_5244_begin_0 = const()[name = tensor("op_5244_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_5244_end_0 = const()[name = tensor("op_5244_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_5244_end_mask_0 = const()[name = tensor("op_5244_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5244_cast_fp16 = slice_by_index(begin = var_5244_begin_0, end = var_5244_end_0, end_mask = var_5244_end_mask_0, x = q_21_cast_fp16)[name = tensor("op_5244_cast_fp16")]; tensor var_5248_begin_0 = const()[name = tensor("op_5248_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_5248_end_0 = const()[name = tensor("op_5248_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_5248_end_mask_0 = const()[name = tensor("op_5248_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5248_cast_fp16 = slice_by_index(begin = var_5248_begin_0, end = var_5248_end_0, end_mask = var_5248_end_mask_0, x = q_21_cast_fp16)[name = tensor("op_5248_cast_fp16")]; tensor var_5252_begin_0 = const()[name = tensor("op_5252_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_5252_end_0 = const()[name = tensor("op_5252_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_5252_end_mask_0 = const()[name = tensor("op_5252_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5252_cast_fp16 = slice_by_index(begin = var_5252_begin_0, end = var_5252_end_0, end_mask = var_5252_end_mask_0, x = q_21_cast_fp16)[name = tensor("op_5252_cast_fp16")]; tensor k_43_perm_0 = const()[name = tensor("k_43_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_5259_begin_0 = const()[name = tensor("op_5259_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5259_end_0 = const()[name = tensor("op_5259_end_0"), val = tensor([2, 256, 1, 160])]; tensor var_5259_end_mask_0 = const()[name = tensor("op_5259_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_21 = transpose(perm = k_43_perm_0, x = k_41_cast_fp16)[name = tensor("transpose_21")]; tensor var_5259_cast_fp16 = slice_by_index(begin = var_5259_begin_0, end = var_5259_end_0, end_mask = var_5259_end_mask_0, x = transpose_21)[name = tensor("op_5259_cast_fp16")]; tensor var_5263_begin_0 = const()[name = tensor("op_5263_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_5263_end_0 = const()[name = tensor("op_5263_end_0"), val = tensor([2, 256, 1, 320])]; tensor var_5263_end_mask_0 = const()[name = tensor("op_5263_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5263_cast_fp16 = slice_by_index(begin = var_5263_begin_0, end = var_5263_end_0, end_mask = var_5263_end_mask_0, x = transpose_21)[name = tensor("op_5263_cast_fp16")]; tensor var_5267_begin_0 = const()[name = tensor("op_5267_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_5267_end_0 = const()[name = tensor("op_5267_end_0"), val = tensor([2, 256, 1, 480])]; tensor var_5267_end_mask_0 = const()[name = tensor("op_5267_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5267_cast_fp16 = slice_by_index(begin = var_5267_begin_0, end = var_5267_end_0, end_mask = var_5267_end_mask_0, x = transpose_21)[name = tensor("op_5267_cast_fp16")]; tensor var_5271_begin_0 = const()[name = tensor("op_5271_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_5271_end_0 = const()[name = tensor("op_5271_end_0"), val = tensor([2, 256, 1, 640])]; tensor var_5271_end_mask_0 = const()[name = tensor("op_5271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5271_cast_fp16 = slice_by_index(begin = var_5271_begin_0, end = var_5271_end_0, end_mask = var_5271_end_mask_0, x = transpose_21)[name = tensor("op_5271_cast_fp16")]; tensor var_5275_begin_0 = const()[name = tensor("op_5275_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_5275_end_0 = const()[name = tensor("op_5275_end_0"), val = tensor([2, 256, 1, 800])]; tensor var_5275_end_mask_0 = const()[name = tensor("op_5275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5275_cast_fp16 = slice_by_index(begin = var_5275_begin_0, end = var_5275_end_0, end_mask = var_5275_end_mask_0, x = transpose_21)[name = tensor("op_5275_cast_fp16")]; tensor var_5279_begin_0 = const()[name = tensor("op_5279_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_5279_end_0 = const()[name = tensor("op_5279_end_0"), val = tensor([2, 256, 1, 960])]; tensor var_5279_end_mask_0 = const()[name = tensor("op_5279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5279_cast_fp16 = slice_by_index(begin = var_5279_begin_0, end = var_5279_end_0, end_mask = var_5279_end_mask_0, x = transpose_21)[name = tensor("op_5279_cast_fp16")]; tensor var_5283_begin_0 = const()[name = tensor("op_5283_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_5283_end_0 = const()[name = tensor("op_5283_end_0"), val = tensor([2, 256, 1, 1120])]; tensor var_5283_end_mask_0 = const()[name = tensor("op_5283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5283_cast_fp16 = slice_by_index(begin = var_5283_begin_0, end = var_5283_end_0, end_mask = var_5283_end_mask_0, x = transpose_21)[name = tensor("op_5283_cast_fp16")]; tensor var_5287_begin_0 = const()[name = tensor("op_5287_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_5287_end_0 = const()[name = tensor("op_5287_end_0"), val = tensor([2, 256, 1, 1280])]; tensor var_5287_end_mask_0 = const()[name = tensor("op_5287_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5287_cast_fp16 = slice_by_index(begin = var_5287_begin_0, end = var_5287_end_0, end_mask = var_5287_end_mask_0, x = transpose_21)[name = tensor("op_5287_cast_fp16")]; tensor var_5289_begin_0 = const()[name = tensor("op_5289_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5289_end_0 = const()[name = tensor("op_5289_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_5289_end_mask_0 = const()[name = tensor("op_5289_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5289_cast_fp16 = slice_by_index(begin = var_5289_begin_0, end = var_5289_end_0, end_mask = var_5289_end_mask_0, x = v_21_cast_fp16)[name = tensor("op_5289_cast_fp16")]; tensor var_5293_begin_0 = const()[name = tensor("op_5293_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_5293_end_0 = const()[name = tensor("op_5293_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_5293_end_mask_0 = const()[name = tensor("op_5293_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5293_cast_fp16 = slice_by_index(begin = var_5293_begin_0, end = var_5293_end_0, end_mask = var_5293_end_mask_0, x = v_21_cast_fp16)[name = tensor("op_5293_cast_fp16")]; tensor var_5297_begin_0 = const()[name = tensor("op_5297_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5297_end_0 = const()[name = tensor("op_5297_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_5297_end_mask_0 = const()[name = tensor("op_5297_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5297_cast_fp16 = slice_by_index(begin = var_5297_begin_0, end = var_5297_end_0, end_mask = var_5297_end_mask_0, x = v_21_cast_fp16)[name = tensor("op_5297_cast_fp16")]; tensor var_5301_begin_0 = const()[name = tensor("op_5301_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_5301_end_0 = const()[name = tensor("op_5301_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_5301_end_mask_0 = const()[name = tensor("op_5301_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5301_cast_fp16 = slice_by_index(begin = var_5301_begin_0, end = var_5301_end_0, end_mask = var_5301_end_mask_0, x = v_21_cast_fp16)[name = tensor("op_5301_cast_fp16")]; tensor var_5305_begin_0 = const()[name = tensor("op_5305_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5305_end_0 = const()[name = tensor("op_5305_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_5305_end_mask_0 = const()[name = tensor("op_5305_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5305_cast_fp16 = slice_by_index(begin = var_5305_begin_0, end = var_5305_end_0, end_mask = var_5305_end_mask_0, x = v_21_cast_fp16)[name = tensor("op_5305_cast_fp16")]; tensor var_5309_begin_0 = const()[name = tensor("op_5309_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_5309_end_0 = const()[name = tensor("op_5309_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_5309_end_mask_0 = const()[name = tensor("op_5309_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5309_cast_fp16 = slice_by_index(begin = var_5309_begin_0, end = var_5309_end_0, end_mask = var_5309_end_mask_0, x = v_21_cast_fp16)[name = tensor("op_5309_cast_fp16")]; tensor var_5313_begin_0 = const()[name = tensor("op_5313_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_5313_end_0 = const()[name = tensor("op_5313_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_5313_end_mask_0 = const()[name = tensor("op_5313_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5313_cast_fp16 = slice_by_index(begin = var_5313_begin_0, end = var_5313_end_0, end_mask = var_5313_end_mask_0, x = v_21_cast_fp16)[name = tensor("op_5313_cast_fp16")]; tensor var_5317_begin_0 = const()[name = tensor("op_5317_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_5317_end_0 = const()[name = tensor("op_5317_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_5317_end_mask_0 = const()[name = tensor("op_5317_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5317_cast_fp16 = slice_by_index(begin = var_5317_begin_0, end = var_5317_end_0, end_mask = var_5317_end_mask_0, x = v_21_cast_fp16)[name = tensor("op_5317_cast_fp16")]; tensor var_5321_equation_0 = const()[name = tensor("op_5321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5321_cast_fp16 = einsum(equation = var_5321_equation_0, values = (var_5259_cast_fp16, var_5224_cast_fp16))[name = tensor("op_5321_cast_fp16")]; tensor var_5322_to_fp16 = const()[name = tensor("op_5322_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_33_cast_fp16 = mul(x = var_5321_cast_fp16, y = var_5322_to_fp16)[name = tensor("aw_33_cast_fp16")]; tensor var_5325_equation_0 = const()[name = tensor("op_5325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5325_cast_fp16 = einsum(equation = var_5325_equation_0, values = (var_5263_cast_fp16, var_5228_cast_fp16))[name = tensor("op_5325_cast_fp16")]; tensor var_5326_to_fp16 = const()[name = tensor("op_5326_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_35_cast_fp16 = mul(x = var_5325_cast_fp16, y = var_5326_to_fp16)[name = tensor("aw_35_cast_fp16")]; tensor var_5329_equation_0 = const()[name = tensor("op_5329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5329_cast_fp16 = einsum(equation = var_5329_equation_0, values = (var_5267_cast_fp16, var_5232_cast_fp16))[name = tensor("op_5329_cast_fp16")]; tensor var_5330_to_fp16 = const()[name = tensor("op_5330_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_37_cast_fp16 = mul(x = var_5329_cast_fp16, y = var_5330_to_fp16)[name = tensor("aw_37_cast_fp16")]; tensor var_5333_equation_0 = const()[name = tensor("op_5333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5333_cast_fp16 = einsum(equation = var_5333_equation_0, values = (var_5271_cast_fp16, var_5236_cast_fp16))[name = tensor("op_5333_cast_fp16")]; tensor var_5334_to_fp16 = const()[name = tensor("op_5334_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_39_cast_fp16 = mul(x = var_5333_cast_fp16, y = var_5334_to_fp16)[name = tensor("aw_39_cast_fp16")]; tensor var_5337_equation_0 = const()[name = tensor("op_5337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5337_cast_fp16 = einsum(equation = var_5337_equation_0, values = (var_5275_cast_fp16, var_5240_cast_fp16))[name = tensor("op_5337_cast_fp16")]; tensor var_5338_to_fp16 = const()[name = tensor("op_5338_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_41_cast_fp16 = mul(x = var_5337_cast_fp16, y = var_5338_to_fp16)[name = tensor("aw_41_cast_fp16")]; tensor var_5341_equation_0 = const()[name = tensor("op_5341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5341_cast_fp16 = einsum(equation = var_5341_equation_0, values = (var_5279_cast_fp16, var_5244_cast_fp16))[name = tensor("op_5341_cast_fp16")]; tensor var_5342_to_fp16 = const()[name = tensor("op_5342_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_43_cast_fp16 = mul(x = var_5341_cast_fp16, y = var_5342_to_fp16)[name = tensor("aw_43_cast_fp16")]; tensor var_5345_equation_0 = const()[name = tensor("op_5345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5345_cast_fp16 = einsum(equation = var_5345_equation_0, values = (var_5283_cast_fp16, var_5248_cast_fp16))[name = tensor("op_5345_cast_fp16")]; tensor var_5346_to_fp16 = const()[name = tensor("op_5346_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_45_cast_fp16 = mul(x = var_5345_cast_fp16, y = var_5346_to_fp16)[name = tensor("aw_45_cast_fp16")]; tensor var_5349_equation_0 = const()[name = tensor("op_5349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5349_cast_fp16 = einsum(equation = var_5349_equation_0, values = (var_5287_cast_fp16, var_5252_cast_fp16))[name = tensor("op_5349_cast_fp16")]; tensor var_5350_to_fp16 = const()[name = tensor("op_5350_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_47_cast_fp16 = mul(x = var_5349_cast_fp16, y = var_5350_to_fp16)[name = tensor("aw_47_cast_fp16")]; tensor var_5352_cast_fp16 = softmax(axis = var_4569, x = aw_33_cast_fp16)[name = tensor("op_5352_cast_fp16")]; tensor var_5353_cast_fp16 = softmax(axis = var_4569, x = aw_35_cast_fp16)[name = tensor("op_5353_cast_fp16")]; tensor var_5354_cast_fp16 = softmax(axis = var_4569, x = aw_37_cast_fp16)[name = tensor("op_5354_cast_fp16")]; tensor var_5355_cast_fp16 = softmax(axis = var_4569, x = aw_39_cast_fp16)[name = tensor("op_5355_cast_fp16")]; tensor var_5356_cast_fp16 = softmax(axis = var_4569, x = aw_41_cast_fp16)[name = tensor("op_5356_cast_fp16")]; tensor var_5357_cast_fp16 = softmax(axis = var_4569, x = aw_43_cast_fp16)[name = tensor("op_5357_cast_fp16")]; tensor var_5358_cast_fp16 = softmax(axis = var_4569, x = aw_45_cast_fp16)[name = tensor("op_5358_cast_fp16")]; tensor var_5359_cast_fp16 = softmax(axis = var_4569, x = aw_47_cast_fp16)[name = tensor("op_5359_cast_fp16")]; tensor var_5361_equation_0 = const()[name = tensor("op_5361_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5361_cast_fp16 = einsum(equation = var_5361_equation_0, values = (var_5289_cast_fp16, var_5352_cast_fp16))[name = tensor("op_5361_cast_fp16")]; tensor var_5363_equation_0 = const()[name = tensor("op_5363_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5363_cast_fp16 = einsum(equation = var_5363_equation_0, values = (var_5293_cast_fp16, var_5353_cast_fp16))[name = tensor("op_5363_cast_fp16")]; tensor var_5365_equation_0 = const()[name = tensor("op_5365_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5365_cast_fp16 = einsum(equation = var_5365_equation_0, values = (var_5297_cast_fp16, var_5354_cast_fp16))[name = tensor("op_5365_cast_fp16")]; tensor var_5367_equation_0 = const()[name = tensor("op_5367_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5367_cast_fp16 = einsum(equation = var_5367_equation_0, values = (var_5301_cast_fp16, var_5355_cast_fp16))[name = tensor("op_5367_cast_fp16")]; tensor var_5369_equation_0 = const()[name = tensor("op_5369_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5369_cast_fp16 = einsum(equation = var_5369_equation_0, values = (var_5305_cast_fp16, var_5356_cast_fp16))[name = tensor("op_5369_cast_fp16")]; tensor var_5371_equation_0 = const()[name = tensor("op_5371_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5371_cast_fp16 = einsum(equation = var_5371_equation_0, values = (var_5309_cast_fp16, var_5357_cast_fp16))[name = tensor("op_5371_cast_fp16")]; tensor var_5373_equation_0 = const()[name = tensor("op_5373_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5373_cast_fp16 = einsum(equation = var_5373_equation_0, values = (var_5313_cast_fp16, var_5358_cast_fp16))[name = tensor("op_5373_cast_fp16")]; tensor var_5375_equation_0 = const()[name = tensor("op_5375_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5375_cast_fp16 = einsum(equation = var_5375_equation_0, values = (var_5317_cast_fp16, var_5359_cast_fp16))[name = tensor("op_5375_cast_fp16")]; tensor input_159_interleave_0 = const()[name = tensor("input_159_interleave_0"), val = tensor(false)]; tensor input_159_cast_fp16 = concat(axis = var_4569, interleave = input_159_interleave_0, values = (var_5361_cast_fp16, var_5363_cast_fp16, var_5365_cast_fp16, var_5367_cast_fp16, var_5369_cast_fp16, var_5371_cast_fp16, var_5373_cast_fp16, var_5375_cast_fp16))[name = tensor("input_159_cast_fp16")]; tensor var_5381 = const()[name = tensor("op_5381"), val = tensor([1, 1])]; tensor var_5383 = const()[name = tensor("op_5383"), val = tensor([1, 1])]; tensor var_5385_pad_type_0 = const()[name = tensor("op_5385_pad_type_0"), val = tensor("custom")]; tensor var_5385_pad_0 = const()[name = tensor("op_5385_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(292923264)))]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296200128)))]; tensor var_5385_cast_fp16 = conv(bias = down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_5383, groups = var_4569, pad = var_5385_pad_0, pad_type = var_5385_pad_type_0, strides = var_5381, weight = down_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_159_cast_fp16)[name = tensor("op_5385_cast_fp16")]; tensor inputs_33_cast_fp16 = add(x = var_5385_cast_fp16, y = inputs_31_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; tensor hidden_states_101_axes_0 = const()[name = tensor("hidden_states_101_axes_0"), val = tensor([1])]; tensor hidden_states_101_gamma_0_to_fp16 = const()[name = tensor("hidden_states_101_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296202752)))]; tensor hidden_states_101_beta_0_to_fp16 = const()[name = tensor("hidden_states_101_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296205376)))]; tensor var_5395_to_fp16 = const()[name = tensor("op_5395_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_101_cast_fp16 = layer_norm(axes = hidden_states_101_axes_0, beta = hidden_states_101_beta_0_to_fp16, epsilon = var_5395_to_fp16, gamma = hidden_states_101_gamma_0_to_fp16, x = inputs_33_cast_fp16)[name = tensor("hidden_states_101_cast_fp16")]; tensor var_5410 = const()[name = tensor("op_5410"), val = tensor([1, 1])]; tensor var_5412 = const()[name = tensor("op_5412"), val = tensor([1, 1])]; tensor q_23_pad_type_0 = const()[name = tensor("q_23_pad_type_0"), val = tensor("custom")]; tensor q_23_pad_0 = const()[name = tensor("q_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296208000)))]; tensor q_23_cast_fp16 = conv(dilations = var_5412, groups = var_4569, pad = q_23_pad_0, pad_type = q_23_pad_type_0, strides = var_5410, weight = down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_101_cast_fp16)[name = tensor("q_23_cast_fp16")]; tensor var_5416 = const()[name = tensor("op_5416"), val = tensor([1, 1])]; tensor var_5418 = const()[name = tensor("op_5418"), val = tensor([1, 1])]; tensor k_45_pad_type_0 = const()[name = tensor("k_45_pad_type_0"), val = tensor("custom")]; tensor k_45_pad_0 = const()[name = tensor("k_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299484864)))]; tensor k_45_cast_fp16 = conv(dilations = var_5418, groups = var_4569, pad = k_45_pad_0, pad_type = k_45_pad_type_0, strides = var_5416, weight = down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_45_cast_fp16")]; tensor var_5422 = const()[name = tensor("op_5422"), val = tensor([1, 1])]; tensor var_5424 = const()[name = tensor("op_5424"), val = tensor([1, 1])]; tensor v_23_pad_type_0 = const()[name = tensor("v_23_pad_type_0"), val = tensor("custom")]; tensor v_23_pad_0 = const()[name = tensor("v_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301451008)))]; tensor v_23_cast_fp16 = conv(dilations = var_5424, groups = var_4569, pad = v_23_pad_0, pad_type = v_23_pad_type_0, strides = var_5422, weight = down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_23_cast_fp16")]; tensor var_5428_begin_0 = const()[name = tensor("op_5428_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5428_end_0 = const()[name = tensor("op_5428_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_5428_end_mask_0 = const()[name = tensor("op_5428_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5428_cast_fp16 = slice_by_index(begin = var_5428_begin_0, end = var_5428_end_0, end_mask = var_5428_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_5428_cast_fp16")]; tensor var_5432_begin_0 = const()[name = tensor("op_5432_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_5432_end_0 = const()[name = tensor("op_5432_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_5432_end_mask_0 = const()[name = tensor("op_5432_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5432_cast_fp16 = slice_by_index(begin = var_5432_begin_0, end = var_5432_end_0, end_mask = var_5432_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_5432_cast_fp16")]; tensor var_5436_begin_0 = const()[name = tensor("op_5436_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5436_end_0 = const()[name = tensor("op_5436_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_5436_end_mask_0 = const()[name = tensor("op_5436_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5436_cast_fp16 = slice_by_index(begin = var_5436_begin_0, end = var_5436_end_0, end_mask = var_5436_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_5436_cast_fp16")]; tensor var_5440_begin_0 = const()[name = tensor("op_5440_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_5440_end_0 = const()[name = tensor("op_5440_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_5440_end_mask_0 = const()[name = tensor("op_5440_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5440_cast_fp16 = slice_by_index(begin = var_5440_begin_0, end = var_5440_end_0, end_mask = var_5440_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_5440_cast_fp16")]; tensor var_5444_begin_0 = const()[name = tensor("op_5444_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5444_end_0 = const()[name = tensor("op_5444_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_5444_end_mask_0 = const()[name = tensor("op_5444_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5444_cast_fp16 = slice_by_index(begin = var_5444_begin_0, end = var_5444_end_0, end_mask = var_5444_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_5444_cast_fp16")]; tensor var_5448_begin_0 = const()[name = tensor("op_5448_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_5448_end_0 = const()[name = tensor("op_5448_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_5448_end_mask_0 = const()[name = tensor("op_5448_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5448_cast_fp16 = slice_by_index(begin = var_5448_begin_0, end = var_5448_end_0, end_mask = var_5448_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_5448_cast_fp16")]; tensor var_5452_begin_0 = const()[name = tensor("op_5452_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_5452_end_0 = const()[name = tensor("op_5452_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_5452_end_mask_0 = const()[name = tensor("op_5452_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5452_cast_fp16 = slice_by_index(begin = var_5452_begin_0, end = var_5452_end_0, end_mask = var_5452_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_5452_cast_fp16")]; tensor var_5456_begin_0 = const()[name = tensor("op_5456_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_5456_end_0 = const()[name = tensor("op_5456_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_5456_end_mask_0 = const()[name = tensor("op_5456_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5456_cast_fp16 = slice_by_index(begin = var_5456_begin_0, end = var_5456_end_0, end_mask = var_5456_end_mask_0, x = q_23_cast_fp16)[name = tensor("op_5456_cast_fp16")]; tensor k_47_perm_0 = const()[name = tensor("k_47_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_5463_begin_0 = const()[name = tensor("op_5463_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5463_end_0 = const()[name = tensor("op_5463_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_5463_end_mask_0 = const()[name = tensor("op_5463_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_20 = transpose(perm = k_47_perm_0, x = k_45_cast_fp16)[name = tensor("transpose_20")]; tensor var_5463_cast_fp16 = slice_by_index(begin = var_5463_begin_0, end = var_5463_end_0, end_mask = var_5463_end_mask_0, x = transpose_20)[name = tensor("op_5463_cast_fp16")]; tensor var_5467_begin_0 = const()[name = tensor("op_5467_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_5467_end_0 = const()[name = tensor("op_5467_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_5467_end_mask_0 = const()[name = tensor("op_5467_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5467_cast_fp16 = slice_by_index(begin = var_5467_begin_0, end = var_5467_end_0, end_mask = var_5467_end_mask_0, x = transpose_20)[name = tensor("op_5467_cast_fp16")]; tensor var_5471_begin_0 = const()[name = tensor("op_5471_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_5471_end_0 = const()[name = tensor("op_5471_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_5471_end_mask_0 = const()[name = tensor("op_5471_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5471_cast_fp16 = slice_by_index(begin = var_5471_begin_0, end = var_5471_end_0, end_mask = var_5471_end_mask_0, x = transpose_20)[name = tensor("op_5471_cast_fp16")]; tensor var_5475_begin_0 = const()[name = tensor("op_5475_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_5475_end_0 = const()[name = tensor("op_5475_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_5475_end_mask_0 = const()[name = tensor("op_5475_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5475_cast_fp16 = slice_by_index(begin = var_5475_begin_0, end = var_5475_end_0, end_mask = var_5475_end_mask_0, x = transpose_20)[name = tensor("op_5475_cast_fp16")]; tensor var_5479_begin_0 = const()[name = tensor("op_5479_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_5479_end_0 = const()[name = tensor("op_5479_end_0"), val = tensor([2, 77, 1, 800])]; tensor var_5479_end_mask_0 = const()[name = tensor("op_5479_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5479_cast_fp16 = slice_by_index(begin = var_5479_begin_0, end = var_5479_end_0, end_mask = var_5479_end_mask_0, x = transpose_20)[name = tensor("op_5479_cast_fp16")]; tensor var_5483_begin_0 = const()[name = tensor("op_5483_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_5483_end_0 = const()[name = tensor("op_5483_end_0"), val = tensor([2, 77, 1, 960])]; tensor var_5483_end_mask_0 = const()[name = tensor("op_5483_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5483_cast_fp16 = slice_by_index(begin = var_5483_begin_0, end = var_5483_end_0, end_mask = var_5483_end_mask_0, x = transpose_20)[name = tensor("op_5483_cast_fp16")]; tensor var_5487_begin_0 = const()[name = tensor("op_5487_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_5487_end_0 = const()[name = tensor("op_5487_end_0"), val = tensor([2, 77, 1, 1120])]; tensor var_5487_end_mask_0 = const()[name = tensor("op_5487_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5487_cast_fp16 = slice_by_index(begin = var_5487_begin_0, end = var_5487_end_0, end_mask = var_5487_end_mask_0, x = transpose_20)[name = tensor("op_5487_cast_fp16")]; tensor var_5491_begin_0 = const()[name = tensor("op_5491_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_5491_end_0 = const()[name = tensor("op_5491_end_0"), val = tensor([2, 77, 1, 1280])]; tensor var_5491_end_mask_0 = const()[name = tensor("op_5491_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5491_cast_fp16 = slice_by_index(begin = var_5491_begin_0, end = var_5491_end_0, end_mask = var_5491_end_mask_0, x = transpose_20)[name = tensor("op_5491_cast_fp16")]; tensor var_5493_begin_0 = const()[name = tensor("op_5493_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5493_end_0 = const()[name = tensor("op_5493_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_5493_end_mask_0 = const()[name = tensor("op_5493_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5493_cast_fp16 = slice_by_index(begin = var_5493_begin_0, end = var_5493_end_0, end_mask = var_5493_end_mask_0, x = v_23_cast_fp16)[name = tensor("op_5493_cast_fp16")]; tensor var_5497_begin_0 = const()[name = tensor("op_5497_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_5497_end_0 = const()[name = tensor("op_5497_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_5497_end_mask_0 = const()[name = tensor("op_5497_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5497_cast_fp16 = slice_by_index(begin = var_5497_begin_0, end = var_5497_end_0, end_mask = var_5497_end_mask_0, x = v_23_cast_fp16)[name = tensor("op_5497_cast_fp16")]; tensor var_5501_begin_0 = const()[name = tensor("op_5501_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5501_end_0 = const()[name = tensor("op_5501_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_5501_end_mask_0 = const()[name = tensor("op_5501_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5501_cast_fp16 = slice_by_index(begin = var_5501_begin_0, end = var_5501_end_0, end_mask = var_5501_end_mask_0, x = v_23_cast_fp16)[name = tensor("op_5501_cast_fp16")]; tensor var_5505_begin_0 = const()[name = tensor("op_5505_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_5505_end_0 = const()[name = tensor("op_5505_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_5505_end_mask_0 = const()[name = tensor("op_5505_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5505_cast_fp16 = slice_by_index(begin = var_5505_begin_0, end = var_5505_end_0, end_mask = var_5505_end_mask_0, x = v_23_cast_fp16)[name = tensor("op_5505_cast_fp16")]; tensor var_5509_begin_0 = const()[name = tensor("op_5509_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5509_end_0 = const()[name = tensor("op_5509_end_0"), val = tensor([2, 800, 1, 77])]; tensor var_5509_end_mask_0 = const()[name = tensor("op_5509_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5509_cast_fp16 = slice_by_index(begin = var_5509_begin_0, end = var_5509_end_0, end_mask = var_5509_end_mask_0, x = v_23_cast_fp16)[name = tensor("op_5509_cast_fp16")]; tensor var_5513_begin_0 = const()[name = tensor("op_5513_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_5513_end_0 = const()[name = tensor("op_5513_end_0"), val = tensor([2, 960, 1, 77])]; tensor var_5513_end_mask_0 = const()[name = tensor("op_5513_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5513_cast_fp16 = slice_by_index(begin = var_5513_begin_0, end = var_5513_end_0, end_mask = var_5513_end_mask_0, x = v_23_cast_fp16)[name = tensor("op_5513_cast_fp16")]; tensor var_5517_begin_0 = const()[name = tensor("op_5517_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_5517_end_0 = const()[name = tensor("op_5517_end_0"), val = tensor([2, 1120, 1, 77])]; tensor var_5517_end_mask_0 = const()[name = tensor("op_5517_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5517_cast_fp16 = slice_by_index(begin = var_5517_begin_0, end = var_5517_end_0, end_mask = var_5517_end_mask_0, x = v_23_cast_fp16)[name = tensor("op_5517_cast_fp16")]; tensor var_5521_begin_0 = const()[name = tensor("op_5521_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_5521_end_0 = const()[name = tensor("op_5521_end_0"), val = tensor([2, 1280, 1, 77])]; tensor var_5521_end_mask_0 = const()[name = tensor("op_5521_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5521_cast_fp16 = slice_by_index(begin = var_5521_begin_0, end = var_5521_end_0, end_mask = var_5521_end_mask_0, x = v_23_cast_fp16)[name = tensor("op_5521_cast_fp16")]; tensor var_5525_equation_0 = const()[name = tensor("op_5525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5525_cast_fp16 = einsum(equation = var_5525_equation_0, values = (var_5463_cast_fp16, var_5428_cast_fp16))[name = tensor("op_5525_cast_fp16")]; tensor var_5526_to_fp16 = const()[name = tensor("op_5526_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_49_cast_fp16 = mul(x = var_5525_cast_fp16, y = var_5526_to_fp16)[name = tensor("aw_49_cast_fp16")]; tensor var_5529_equation_0 = const()[name = tensor("op_5529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5529_cast_fp16 = einsum(equation = var_5529_equation_0, values = (var_5467_cast_fp16, var_5432_cast_fp16))[name = tensor("op_5529_cast_fp16")]; tensor var_5530_to_fp16 = const()[name = tensor("op_5530_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_51_cast_fp16 = mul(x = var_5529_cast_fp16, y = var_5530_to_fp16)[name = tensor("aw_51_cast_fp16")]; tensor var_5533_equation_0 = const()[name = tensor("op_5533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5533_cast_fp16 = einsum(equation = var_5533_equation_0, values = (var_5471_cast_fp16, var_5436_cast_fp16))[name = tensor("op_5533_cast_fp16")]; tensor var_5534_to_fp16 = const()[name = tensor("op_5534_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_53_cast_fp16 = mul(x = var_5533_cast_fp16, y = var_5534_to_fp16)[name = tensor("aw_53_cast_fp16")]; tensor var_5537_equation_0 = const()[name = tensor("op_5537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5537_cast_fp16 = einsum(equation = var_5537_equation_0, values = (var_5475_cast_fp16, var_5440_cast_fp16))[name = tensor("op_5537_cast_fp16")]; tensor var_5538_to_fp16 = const()[name = tensor("op_5538_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_55_cast_fp16 = mul(x = var_5537_cast_fp16, y = var_5538_to_fp16)[name = tensor("aw_55_cast_fp16")]; tensor var_5541_equation_0 = const()[name = tensor("op_5541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5541_cast_fp16 = einsum(equation = var_5541_equation_0, values = (var_5479_cast_fp16, var_5444_cast_fp16))[name = tensor("op_5541_cast_fp16")]; tensor var_5542_to_fp16 = const()[name = tensor("op_5542_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_57_cast_fp16 = mul(x = var_5541_cast_fp16, y = var_5542_to_fp16)[name = tensor("aw_57_cast_fp16")]; tensor var_5545_equation_0 = const()[name = tensor("op_5545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5545_cast_fp16 = einsum(equation = var_5545_equation_0, values = (var_5483_cast_fp16, var_5448_cast_fp16))[name = tensor("op_5545_cast_fp16")]; tensor var_5546_to_fp16 = const()[name = tensor("op_5546_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_59_cast_fp16 = mul(x = var_5545_cast_fp16, y = var_5546_to_fp16)[name = tensor("aw_59_cast_fp16")]; tensor var_5549_equation_0 = const()[name = tensor("op_5549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5549_cast_fp16 = einsum(equation = var_5549_equation_0, values = (var_5487_cast_fp16, var_5452_cast_fp16))[name = tensor("op_5549_cast_fp16")]; tensor var_5550_to_fp16 = const()[name = tensor("op_5550_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_61_cast_fp16 = mul(x = var_5549_cast_fp16, y = var_5550_to_fp16)[name = tensor("aw_61_cast_fp16")]; tensor var_5553_equation_0 = const()[name = tensor("op_5553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5553_cast_fp16 = einsum(equation = var_5553_equation_0, values = (var_5491_cast_fp16, var_5456_cast_fp16))[name = tensor("op_5553_cast_fp16")]; tensor var_5554_to_fp16 = const()[name = tensor("op_5554_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_63_cast_fp16 = mul(x = var_5553_cast_fp16, y = var_5554_to_fp16)[name = tensor("aw_63_cast_fp16")]; tensor var_5556_cast_fp16 = softmax(axis = var_4569, x = aw_49_cast_fp16)[name = tensor("op_5556_cast_fp16")]; tensor var_5557_cast_fp16 = softmax(axis = var_4569, x = aw_51_cast_fp16)[name = tensor("op_5557_cast_fp16")]; tensor var_5558_cast_fp16 = softmax(axis = var_4569, x = aw_53_cast_fp16)[name = tensor("op_5558_cast_fp16")]; tensor var_5559_cast_fp16 = softmax(axis = var_4569, x = aw_55_cast_fp16)[name = tensor("op_5559_cast_fp16")]; tensor var_5560_cast_fp16 = softmax(axis = var_4569, x = aw_57_cast_fp16)[name = tensor("op_5560_cast_fp16")]; tensor var_5561_cast_fp16 = softmax(axis = var_4569, x = aw_59_cast_fp16)[name = tensor("op_5561_cast_fp16")]; tensor var_5562_cast_fp16 = softmax(axis = var_4569, x = aw_61_cast_fp16)[name = tensor("op_5562_cast_fp16")]; tensor var_5563_cast_fp16 = softmax(axis = var_4569, x = aw_63_cast_fp16)[name = tensor("op_5563_cast_fp16")]; tensor var_5565_equation_0 = const()[name = tensor("op_5565_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5565_cast_fp16 = einsum(equation = var_5565_equation_0, values = (var_5493_cast_fp16, var_5556_cast_fp16))[name = tensor("op_5565_cast_fp16")]; tensor var_5567_equation_0 = const()[name = tensor("op_5567_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5567_cast_fp16 = einsum(equation = var_5567_equation_0, values = (var_5497_cast_fp16, var_5557_cast_fp16))[name = tensor("op_5567_cast_fp16")]; tensor var_5569_equation_0 = const()[name = tensor("op_5569_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5569_cast_fp16 = einsum(equation = var_5569_equation_0, values = (var_5501_cast_fp16, var_5558_cast_fp16))[name = tensor("op_5569_cast_fp16")]; tensor var_5571_equation_0 = const()[name = tensor("op_5571_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5571_cast_fp16 = einsum(equation = var_5571_equation_0, values = (var_5505_cast_fp16, var_5559_cast_fp16))[name = tensor("op_5571_cast_fp16")]; tensor var_5573_equation_0 = const()[name = tensor("op_5573_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5573_cast_fp16 = einsum(equation = var_5573_equation_0, values = (var_5509_cast_fp16, var_5560_cast_fp16))[name = tensor("op_5573_cast_fp16")]; tensor var_5575_equation_0 = const()[name = tensor("op_5575_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5575_cast_fp16 = einsum(equation = var_5575_equation_0, values = (var_5513_cast_fp16, var_5561_cast_fp16))[name = tensor("op_5575_cast_fp16")]; tensor var_5577_equation_0 = const()[name = tensor("op_5577_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5577_cast_fp16 = einsum(equation = var_5577_equation_0, values = (var_5517_cast_fp16, var_5562_cast_fp16))[name = tensor("op_5577_cast_fp16")]; tensor var_5579_equation_0 = const()[name = tensor("op_5579_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_5579_cast_fp16 = einsum(equation = var_5579_equation_0, values = (var_5521_cast_fp16, var_5563_cast_fp16))[name = tensor("op_5579_cast_fp16")]; tensor input_161_interleave_0 = const()[name = tensor("input_161_interleave_0"), val = tensor(false)]; tensor input_161_cast_fp16 = concat(axis = var_4569, interleave = input_161_interleave_0, values = (var_5565_cast_fp16, var_5567_cast_fp16, var_5569_cast_fp16, var_5571_cast_fp16, var_5573_cast_fp16, var_5575_cast_fp16, var_5577_cast_fp16, var_5579_cast_fp16))[name = tensor("input_161_cast_fp16")]; tensor var_5585 = const()[name = tensor("op_5585"), val = tensor([1, 1])]; tensor var_5587 = const()[name = tensor("op_5587"), val = tensor([1, 1])]; tensor var_5589_pad_type_0 = const()[name = tensor("op_5589_pad_type_0"), val = tensor("custom")]; tensor var_5589_pad_0 = const()[name = tensor("op_5589_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(303417152)))]; tensor down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306694016)))]; tensor var_5589_cast_fp16 = conv(bias = down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_5587, groups = var_4569, pad = var_5589_pad_0, pad_type = var_5589_pad_type_0, strides = var_5585, weight = down_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_161_cast_fp16)[name = tensor("op_5589_cast_fp16")]; tensor inputs_35_cast_fp16 = add(x = var_5589_cast_fp16, y = inputs_33_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; tensor input_163_axes_0 = const()[name = tensor("input_163_axes_0"), val = tensor([1])]; tensor input_163_gamma_0_to_fp16 = const()[name = tensor("input_163_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306696640)))]; tensor input_163_beta_0_to_fp16 = const()[name = tensor("input_163_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306699264)))]; tensor var_5599_to_fp16 = const()[name = tensor("op_5599_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_163_cast_fp16 = layer_norm(axes = input_163_axes_0, beta = input_163_beta_0_to_fp16, epsilon = var_5599_to_fp16, gamma = input_163_gamma_0_to_fp16, x = inputs_35_cast_fp16)[name = tensor("input_163_cast_fp16")]; tensor var_5615 = const()[name = tensor("op_5615"), val = tensor([1, 1])]; tensor var_5617 = const()[name = tensor("op_5617"), val = tensor([1, 1])]; tensor var_5619_pad_type_0 = const()[name = tensor("op_5619_pad_type_0"), val = tensor("custom")]; tensor var_5619_pad_0 = const()[name = tensor("op_5619_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(306701888)))]; tensor down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332916352)))]; tensor var_5619_cast_fp16 = conv(bias = down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_5617, groups = var_4569, pad = var_5619_pad_0, pad_type = var_5619_pad_type_0, strides = var_5615, weight = down_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_163_cast_fp16)[name = tensor("op_5619_cast_fp16")]; tensor var_5620_split_sizes_0 = const()[name = tensor("op_5620_split_sizes_0"), val = tensor([5120, 5120])]; tensor var_5620_axis_0 = const()[name = tensor("op_5620_axis_0"), val = tensor(1)]; tensor var_5620_cast_fp16_0, tensor var_5620_cast_fp16_1 = split(axis = var_5620_axis_0, split_sizes = var_5620_split_sizes_0, x = var_5619_cast_fp16)[name = tensor("op_5620_cast_fp16")]; tensor var_5622_mode_0 = const()[name = tensor("op_5622_mode_0"), val = tensor("EXACT")]; tensor var_5622_cast_fp16 = gelu(mode = var_5622_mode_0, x = var_5620_cast_fp16_1)[name = tensor("op_5622_cast_fp16")]; tensor input_165_cast_fp16 = mul(x = var_5620_cast_fp16_0, y = var_5622_cast_fp16)[name = tensor("input_165_cast_fp16")]; tensor var_5626 = const()[name = tensor("op_5626"), val = tensor([1, 1])]; tensor var_5628 = const()[name = tensor("op_5628"), val = tensor([1, 1])]; tensor var_5630_pad_type_0 = const()[name = tensor("op_5630_pad_type_0"), val = tensor("custom")]; tensor var_5630_pad_0 = const()[name = tensor("op_5630_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332936896)))]; tensor down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346044160)))]; tensor var_5630_cast_fp16 = conv(bias = down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_5628, groups = var_4569, pad = var_5630_pad_0, pad_type = var_5630_pad_type_0, strides = var_5626, weight = down_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_165_cast_fp16)[name = tensor("op_5630_cast_fp16")]; tensor hidden_states_105_cast_fp16 = add(x = var_5630_cast_fp16, y = inputs_35_cast_fp16)[name = tensor("hidden_states_105_cast_fp16")]; tensor var_5632 = const()[name = tensor("op_5632"), val = tensor([2, 1280, 16, 16])]; tensor input_167_cast_fp16 = reshape(shape = var_5632, x = hidden_states_105_cast_fp16)[name = tensor("input_167_cast_fp16")]; tensor var_5636 = const()[name = tensor("op_5636"), val = tensor([1, 1])]; tensor var_5638 = const()[name = tensor("op_5638"), val = tensor([1, 1])]; tensor hidden_states_107_pad_type_0 = const()[name = tensor("hidden_states_107_pad_type_0"), val = tensor("custom")]; tensor hidden_states_107_pad_0 = const()[name = tensor("hidden_states_107_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_2_attentions_1_proj_out_weight_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346046784)))]; tensor down_blocks_2_attentions_1_proj_out_bias_to_fp16 = const()[name = tensor("down_blocks_2_attentions_1_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349323648)))]; tensor hidden_states_107_cast_fp16 = conv(bias = down_blocks_2_attentions_1_proj_out_bias_to_fp16, dilations = var_5638, groups = var_4569, pad = hidden_states_107_pad_0, pad_type = hidden_states_107_pad_type_0, strides = var_5636, weight = down_blocks_2_attentions_1_proj_out_weight_to_fp16, x = input_167_cast_fp16)[name = tensor("hidden_states_107_cast_fp16")]; tensor input_169_cast_fp16 = add(x = hidden_states_107_cast_fp16, y = hidden_states_95_cast_fp16)[name = tensor("input_169_cast_fp16")]; tensor var_5645 = const()[name = tensor("op_5645"), val = tensor([2, 2])]; tensor var_5647 = const()[name = tensor("op_5647"), val = tensor([1, 1])]; tensor input_171_pad_type_0 = const()[name = tensor("input_171_pad_type_0"), val = tensor("custom")]; tensor input_171_pad_0 = const()[name = tensor("input_171_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_2_downsamplers_0_conv_weight_to_fp16 = const()[name = tensor("down_blocks_2_downsamplers_0_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349326272)))]; tensor down_blocks_2_downsamplers_0_conv_bias_to_fp16 = const()[name = tensor("down_blocks_2_downsamplers_0_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(378817536)))]; tensor input_171_cast_fp16 = conv(bias = down_blocks_2_downsamplers_0_conv_bias_to_fp16, dilations = var_5647, groups = var_4569, pad = input_171_pad_0, pad_type = input_171_pad_type_0, strides = var_5645, weight = down_blocks_2_downsamplers_0_conv_weight_to_fp16, x = input_169_cast_fp16)[name = tensor("input_171_cast_fp16")]; tensor var_5659 = const()[name = tensor("op_5659"), val = tensor(1)]; tensor reshape_72_shape_0 = const()[name = tensor("reshape_72_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_72_cast_fp16 = reshape(shape = reshape_72_shape_0, x = input_171_cast_fp16)[name = tensor("reshape_72_cast_fp16")]; tensor reduce_mean_54_axes_0 = const()[name = tensor("reduce_mean_54_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_54_keep_dims_0 = const()[name = tensor("reduce_mean_54_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_54_cast_fp16 = reduce_mean(axes = reduce_mean_54_axes_0, keep_dims = reduce_mean_54_keep_dims_0, x = reshape_72_cast_fp16)[name = tensor("reduce_mean_54_cast_fp16")]; tensor sub_36_cast_fp16 = sub(x = reshape_72_cast_fp16, y = reduce_mean_54_cast_fp16)[name = tensor("sub_36_cast_fp16")]; tensor square_18_cast_fp16 = square(x = sub_36_cast_fp16)[name = tensor("square_18_cast_fp16")]; tensor reduce_mean_56_axes_0 = const()[name = tensor("reduce_mean_56_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_56_keep_dims_0 = const()[name = tensor("reduce_mean_56_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_56_cast_fp16 = reduce_mean(axes = reduce_mean_56_axes_0, keep_dims = reduce_mean_56_keep_dims_0, x = square_18_cast_fp16)[name = tensor("reduce_mean_56_cast_fp16")]; tensor add_36_y_0_to_fp16 = const()[name = tensor("add_36_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_36_cast_fp16 = add(x = reduce_mean_56_cast_fp16, y = add_36_y_0_to_fp16)[name = tensor("add_36_cast_fp16")]; tensor sqrt_18_cast_fp16 = sqrt(x = add_36_cast_fp16)[name = tensor("sqrt_18_cast_fp16")]; tensor real_div_18_cast_fp16 = real_div(x = sub_36_cast_fp16, y = sqrt_18_cast_fp16)[name = tensor("real_div_18_cast_fp16")]; tensor reshape_73_shape_0 = const()[name = tensor("reshape_73_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_73_cast_fp16 = reshape(shape = reshape_73_shape_0, x = real_div_18_cast_fp16)[name = tensor("reshape_73_cast_fp16")]; tensor add_37_gamma_0_to_fp16 = const()[name = tensor("add_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(378820160)))]; tensor add_37_beta_0_to_fp16 = const()[name = tensor("add_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(378822784)))]; tensor add_37_epsilon_0_to_fp16 = const()[name = tensor("add_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_37_cast_fp16 = batch_norm(beta = add_37_beta_0_to_fp16, epsilon = add_37_epsilon_0_to_fp16, gamma = add_37_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_73_cast_fp16)[name = tensor("add_37_cast_fp16")]; tensor input_175_cast_fp16 = silu(x = add_37_cast_fp16)[name = tensor("input_175_cast_fp16")]; tensor var_5675 = const()[name = tensor("op_5675"), val = tensor([1, 1])]; tensor var_5677 = const()[name = tensor("op_5677"), val = tensor([1, 1])]; tensor hidden_states_109_pad_type_0 = const()[name = tensor("hidden_states_109_pad_type_0"), val = tensor("custom")]; tensor hidden_states_109_pad_0 = const()[name = tensor("hidden_states_109_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_3_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("down_blocks_3_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(378825408)))]; tensor down_blocks_3_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("down_blocks_3_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408316672)))]; tensor hidden_states_109_cast_fp16 = conv(bias = down_blocks_3_resnets_0_conv1_bias_to_fp16, dilations = var_5677, groups = var_5659, pad = hidden_states_109_pad_0, pad_type = hidden_states_109_pad_type_0, strides = var_5675, weight = down_blocks_3_resnets_0_conv1_weight_to_fp16, x = input_175_cast_fp16)[name = tensor("hidden_states_109_cast_fp16")]; tensor var_5683 = const()[name = tensor("op_5683"), val = tensor([1, 1])]; tensor var_5685 = const()[name = tensor("op_5685"), val = tensor([1, 1])]; tensor temb_13_pad_type_0 = const()[name = tensor("temb_13_pad_type_0"), val = tensor("custom")]; tensor temb_13_pad_0 = const()[name = tensor("temb_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_3_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("down_blocks_3_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408319296)))]; tensor down_blocks_3_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("down_blocks_3_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411596160)))]; tensor temb_13_cast_fp16 = conv(bias = down_blocks_3_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_5685, groups = var_5659, pad = temb_13_pad_0, pad_type = temb_13_pad_type_0, strides = var_5683, weight = down_blocks_3_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_13_cast_fp16")]; tensor input_179_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = temb_13_cast_fp16)[name = tensor("input_179_cast_fp16")]; tensor reshape_76_shape_0 = const()[name = tensor("reshape_76_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_76_cast_fp16 = reshape(shape = reshape_76_shape_0, x = input_179_cast_fp16)[name = tensor("reshape_76_cast_fp16")]; tensor reduce_mean_57_axes_0 = const()[name = tensor("reduce_mean_57_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_57_keep_dims_0 = const()[name = tensor("reduce_mean_57_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_57_cast_fp16 = reduce_mean(axes = reduce_mean_57_axes_0, keep_dims = reduce_mean_57_keep_dims_0, x = reshape_76_cast_fp16)[name = tensor("reduce_mean_57_cast_fp16")]; tensor sub_38_cast_fp16 = sub(x = reshape_76_cast_fp16, y = reduce_mean_57_cast_fp16)[name = tensor("sub_38_cast_fp16")]; tensor square_19_cast_fp16 = square(x = sub_38_cast_fp16)[name = tensor("square_19_cast_fp16")]; tensor reduce_mean_59_axes_0 = const()[name = tensor("reduce_mean_59_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_59_keep_dims_0 = const()[name = tensor("reduce_mean_59_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_59_cast_fp16 = reduce_mean(axes = reduce_mean_59_axes_0, keep_dims = reduce_mean_59_keep_dims_0, x = square_19_cast_fp16)[name = tensor("reduce_mean_59_cast_fp16")]; tensor add_38_y_0_to_fp16 = const()[name = tensor("add_38_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_38_cast_fp16 = add(x = reduce_mean_59_cast_fp16, y = add_38_y_0_to_fp16)[name = tensor("add_38_cast_fp16")]; tensor sqrt_19_cast_fp16 = sqrt(x = add_38_cast_fp16)[name = tensor("sqrt_19_cast_fp16")]; tensor real_div_19_cast_fp16 = real_div(x = sub_38_cast_fp16, y = sqrt_19_cast_fp16)[name = tensor("real_div_19_cast_fp16")]; tensor reshape_77_shape_0 = const()[name = tensor("reshape_77_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_77_cast_fp16 = reshape(shape = reshape_77_shape_0, x = real_div_19_cast_fp16)[name = tensor("reshape_77_cast_fp16")]; tensor add_39_gamma_0_to_fp16 = const()[name = tensor("add_39_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411598784)))]; tensor add_39_beta_0_to_fp16 = const()[name = tensor("add_39_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411601408)))]; tensor add_39_epsilon_0_to_fp16 = const()[name = tensor("add_39_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_39_cast_fp16 = batch_norm(beta = add_39_beta_0_to_fp16, epsilon = add_39_epsilon_0_to_fp16, gamma = add_39_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_77_cast_fp16)[name = tensor("add_39_cast_fp16")]; tensor input_183_cast_fp16 = silu(x = add_39_cast_fp16)[name = tensor("input_183_cast_fp16")]; tensor var_5695 = const()[name = tensor("op_5695"), val = tensor([1, 1])]; tensor var_5697 = const()[name = tensor("op_5697"), val = tensor([1, 1])]; tensor hidden_states_111_pad_type_0 = const()[name = tensor("hidden_states_111_pad_type_0"), val = tensor("custom")]; tensor hidden_states_111_pad_0 = const()[name = tensor("hidden_states_111_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_3_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("down_blocks_3_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411604032)))]; tensor down_blocks_3_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("down_blocks_3_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(441095296)))]; tensor hidden_states_111_cast_fp16 = conv(bias = down_blocks_3_resnets_0_conv2_bias_to_fp16, dilations = var_5697, groups = var_5659, pad = hidden_states_111_pad_0, pad_type = hidden_states_111_pad_type_0, strides = var_5695, weight = down_blocks_3_resnets_0_conv2_weight_to_fp16, x = input_183_cast_fp16)[name = tensor("hidden_states_111_cast_fp16")]; tensor input_185_cast_fp16 = add(x = input_171_cast_fp16, y = hidden_states_111_cast_fp16)[name = tensor("input_185_cast_fp16")]; tensor reshape_80_shape_0 = const()[name = tensor("reshape_80_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_80_cast_fp16 = reshape(shape = reshape_80_shape_0, x = input_185_cast_fp16)[name = tensor("reshape_80_cast_fp16")]; tensor reduce_mean_60_axes_0 = const()[name = tensor("reduce_mean_60_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_60_keep_dims_0 = const()[name = tensor("reduce_mean_60_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_60_cast_fp16 = reduce_mean(axes = reduce_mean_60_axes_0, keep_dims = reduce_mean_60_keep_dims_0, x = reshape_80_cast_fp16)[name = tensor("reduce_mean_60_cast_fp16")]; tensor sub_40_cast_fp16 = sub(x = reshape_80_cast_fp16, y = reduce_mean_60_cast_fp16)[name = tensor("sub_40_cast_fp16")]; tensor square_20_cast_fp16 = square(x = sub_40_cast_fp16)[name = tensor("square_20_cast_fp16")]; tensor reduce_mean_62_axes_0 = const()[name = tensor("reduce_mean_62_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_62_keep_dims_0 = const()[name = tensor("reduce_mean_62_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_62_cast_fp16 = reduce_mean(axes = reduce_mean_62_axes_0, keep_dims = reduce_mean_62_keep_dims_0, x = square_20_cast_fp16)[name = tensor("reduce_mean_62_cast_fp16")]; tensor add_40_y_0_to_fp16 = const()[name = tensor("add_40_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_40_cast_fp16 = add(x = reduce_mean_62_cast_fp16, y = add_40_y_0_to_fp16)[name = tensor("add_40_cast_fp16")]; tensor sqrt_20_cast_fp16 = sqrt(x = add_40_cast_fp16)[name = tensor("sqrt_20_cast_fp16")]; tensor real_div_20_cast_fp16 = real_div(x = sub_40_cast_fp16, y = sqrt_20_cast_fp16)[name = tensor("real_div_20_cast_fp16")]; tensor reshape_81_shape_0 = const()[name = tensor("reshape_81_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_81_cast_fp16 = reshape(shape = reshape_81_shape_0, x = real_div_20_cast_fp16)[name = tensor("reshape_81_cast_fp16")]; tensor add_41_gamma_0_to_fp16 = const()[name = tensor("add_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(441097920)))]; tensor add_41_beta_0_to_fp16 = const()[name = tensor("add_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(441100544)))]; tensor add_41_epsilon_0_to_fp16 = const()[name = tensor("add_41_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_41_cast_fp16 = batch_norm(beta = add_41_beta_0_to_fp16, epsilon = add_41_epsilon_0_to_fp16, gamma = add_41_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_81_cast_fp16)[name = tensor("add_41_cast_fp16")]; tensor input_189_cast_fp16 = silu(x = add_41_cast_fp16)[name = tensor("input_189_cast_fp16")]; tensor var_5712 = const()[name = tensor("op_5712"), val = tensor([1, 1])]; tensor var_5714 = const()[name = tensor("op_5714"), val = tensor([1, 1])]; tensor hidden_states_113_pad_type_0 = const()[name = tensor("hidden_states_113_pad_type_0"), val = tensor("custom")]; tensor hidden_states_113_pad_0 = const()[name = tensor("hidden_states_113_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_3_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("down_blocks_3_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(441103168)))]; tensor down_blocks_3_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("down_blocks_3_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(470594432)))]; tensor hidden_states_113_cast_fp16 = conv(bias = down_blocks_3_resnets_1_conv1_bias_to_fp16, dilations = var_5714, groups = var_5659, pad = hidden_states_113_pad_0, pad_type = hidden_states_113_pad_type_0, strides = var_5712, weight = down_blocks_3_resnets_1_conv1_weight_to_fp16, x = input_189_cast_fp16)[name = tensor("hidden_states_113_cast_fp16")]; tensor var_5720 = const()[name = tensor("op_5720"), val = tensor([1, 1])]; tensor var_5722 = const()[name = tensor("op_5722"), val = tensor([1, 1])]; tensor temb_15_pad_type_0 = const()[name = tensor("temb_15_pad_type_0"), val = tensor("custom")]; tensor temb_15_pad_0 = const()[name = tensor("temb_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor down_blocks_3_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("down_blocks_3_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(470597056)))]; tensor down_blocks_3_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("down_blocks_3_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473873920)))]; tensor temb_15_cast_fp16 = conv(bias = down_blocks_3_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_5722, groups = var_5659, pad = temb_15_pad_0, pad_type = temb_15_pad_type_0, strides = var_5720, weight = down_blocks_3_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_15_cast_fp16")]; tensor input_193_cast_fp16 = add(x = hidden_states_113_cast_fp16, y = temb_15_cast_fp16)[name = tensor("input_193_cast_fp16")]; tensor reshape_84_shape_0 = const()[name = tensor("reshape_84_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_84_cast_fp16 = reshape(shape = reshape_84_shape_0, x = input_193_cast_fp16)[name = tensor("reshape_84_cast_fp16")]; tensor reduce_mean_63_axes_0 = const()[name = tensor("reduce_mean_63_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_63_keep_dims_0 = const()[name = tensor("reduce_mean_63_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_63_cast_fp16 = reduce_mean(axes = reduce_mean_63_axes_0, keep_dims = reduce_mean_63_keep_dims_0, x = reshape_84_cast_fp16)[name = tensor("reduce_mean_63_cast_fp16")]; tensor sub_42_cast_fp16 = sub(x = reshape_84_cast_fp16, y = reduce_mean_63_cast_fp16)[name = tensor("sub_42_cast_fp16")]; tensor square_21_cast_fp16 = square(x = sub_42_cast_fp16)[name = tensor("square_21_cast_fp16")]; tensor reduce_mean_65_axes_0 = const()[name = tensor("reduce_mean_65_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_65_keep_dims_0 = const()[name = tensor("reduce_mean_65_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_65_cast_fp16 = reduce_mean(axes = reduce_mean_65_axes_0, keep_dims = reduce_mean_65_keep_dims_0, x = square_21_cast_fp16)[name = tensor("reduce_mean_65_cast_fp16")]; tensor add_42_y_0_to_fp16 = const()[name = tensor("add_42_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_42_cast_fp16 = add(x = reduce_mean_65_cast_fp16, y = add_42_y_0_to_fp16)[name = tensor("add_42_cast_fp16")]; tensor sqrt_21_cast_fp16 = sqrt(x = add_42_cast_fp16)[name = tensor("sqrt_21_cast_fp16")]; tensor real_div_21_cast_fp16 = real_div(x = sub_42_cast_fp16, y = sqrt_21_cast_fp16)[name = tensor("real_div_21_cast_fp16")]; tensor reshape_85_shape_0 = const()[name = tensor("reshape_85_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_85_cast_fp16 = reshape(shape = reshape_85_shape_0, x = real_div_21_cast_fp16)[name = tensor("reshape_85_cast_fp16")]; tensor add_43_gamma_0_to_fp16 = const()[name = tensor("add_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473876544)))]; tensor add_43_beta_0_to_fp16 = const()[name = tensor("add_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473879168)))]; tensor add_43_epsilon_0_to_fp16 = const()[name = tensor("add_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_43_cast_fp16 = batch_norm(beta = add_43_beta_0_to_fp16, epsilon = add_43_epsilon_0_to_fp16, gamma = add_43_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_85_cast_fp16)[name = tensor("add_43_cast_fp16")]; tensor input_197_cast_fp16 = silu(x = add_43_cast_fp16)[name = tensor("input_197_cast_fp16")]; tensor var_5732 = const()[name = tensor("op_5732"), val = tensor([1, 1])]; tensor var_5734 = const()[name = tensor("op_5734"), val = tensor([1, 1])]; tensor hidden_states_115_pad_type_0 = const()[name = tensor("hidden_states_115_pad_type_0"), val = tensor("custom")]; tensor hidden_states_115_pad_0 = const()[name = tensor("hidden_states_115_pad_0"), val = tensor([1, 1, 1, 1])]; tensor down_blocks_3_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("down_blocks_3_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473881792)))]; tensor down_blocks_3_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("down_blocks_3_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503373056)))]; tensor hidden_states_115_cast_fp16 = conv(bias = down_blocks_3_resnets_1_conv2_bias_to_fp16, dilations = var_5734, groups = var_5659, pad = hidden_states_115_pad_0, pad_type = hidden_states_115_pad_type_0, strides = var_5732, weight = down_blocks_3_resnets_1_conv2_weight_to_fp16, x = input_197_cast_fp16)[name = tensor("hidden_states_115_cast_fp16")]; tensor input_199_cast_fp16 = add(x = input_185_cast_fp16, y = hidden_states_115_cast_fp16)[name = tensor("input_199_cast_fp16")]; tensor var_5762 = const()[name = tensor("op_5762"), val = tensor(1)]; tensor reshape_88_shape_0 = const()[name = tensor("reshape_88_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_88_cast_fp16 = reshape(shape = reshape_88_shape_0, x = input_199_cast_fp16)[name = tensor("reshape_88_cast_fp16")]; tensor reduce_mean_66_axes_0 = const()[name = tensor("reduce_mean_66_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_66_keep_dims_0 = const()[name = tensor("reduce_mean_66_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_66_cast_fp16 = reduce_mean(axes = reduce_mean_66_axes_0, keep_dims = reduce_mean_66_keep_dims_0, x = reshape_88_cast_fp16)[name = tensor("reduce_mean_66_cast_fp16")]; tensor sub_44_cast_fp16 = sub(x = reshape_88_cast_fp16, y = reduce_mean_66_cast_fp16)[name = tensor("sub_44_cast_fp16")]; tensor square_22_cast_fp16 = square(x = sub_44_cast_fp16)[name = tensor("square_22_cast_fp16")]; tensor reduce_mean_68_axes_0 = const()[name = tensor("reduce_mean_68_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_68_keep_dims_0 = const()[name = tensor("reduce_mean_68_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_68_cast_fp16 = reduce_mean(axes = reduce_mean_68_axes_0, keep_dims = reduce_mean_68_keep_dims_0, x = square_22_cast_fp16)[name = tensor("reduce_mean_68_cast_fp16")]; tensor add_44_y_0_to_fp16 = const()[name = tensor("add_44_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_44_cast_fp16 = add(x = reduce_mean_68_cast_fp16, y = add_44_y_0_to_fp16)[name = tensor("add_44_cast_fp16")]; tensor sqrt_22_cast_fp16 = sqrt(x = add_44_cast_fp16)[name = tensor("sqrt_22_cast_fp16")]; tensor real_div_22_cast_fp16 = real_div(x = sub_44_cast_fp16, y = sqrt_22_cast_fp16)[name = tensor("real_div_22_cast_fp16")]; tensor reshape_89_shape_0 = const()[name = tensor("reshape_89_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_89_cast_fp16 = reshape(shape = reshape_89_shape_0, x = real_div_22_cast_fp16)[name = tensor("reshape_89_cast_fp16")]; tensor add_45_gamma_0_to_fp16 = const()[name = tensor("add_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503375680)))]; tensor add_45_beta_0_to_fp16 = const()[name = tensor("add_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503378304)))]; tensor add_45_epsilon_0_to_fp16 = const()[name = tensor("add_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_45_cast_fp16 = batch_norm(beta = add_45_beta_0_to_fp16, epsilon = add_45_epsilon_0_to_fp16, gamma = add_45_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_89_cast_fp16)[name = tensor("add_45_cast_fp16")]; tensor input_203_cast_fp16 = silu(x = add_45_cast_fp16)[name = tensor("input_203_cast_fp16")]; tensor var_5780 = const()[name = tensor("op_5780"), val = tensor([1, 1])]; tensor var_5782 = const()[name = tensor("op_5782"), val = tensor([1, 1])]; tensor hidden_states_117_pad_type_0 = const()[name = tensor("hidden_states_117_pad_type_0"), val = tensor("custom")]; tensor hidden_states_117_pad_0 = const()[name = tensor("hidden_states_117_pad_0"), val = tensor([1, 1, 1, 1])]; tensor mid_block_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("mid_block_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503380928)))]; tensor mid_block_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("mid_block_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532872192)))]; tensor hidden_states_117_cast_fp16 = conv(bias = mid_block_resnets_0_conv1_bias_to_fp16, dilations = var_5782, groups = var_5762, pad = hidden_states_117_pad_0, pad_type = hidden_states_117_pad_type_0, strides = var_5780, weight = mid_block_resnets_0_conv1_weight_to_fp16, x = input_203_cast_fp16)[name = tensor("hidden_states_117_cast_fp16")]; tensor var_5788 = const()[name = tensor("op_5788"), val = tensor([1, 1])]; tensor var_5790 = const()[name = tensor("op_5790"), val = tensor([1, 1])]; tensor temb_17_pad_type_0 = const()[name = tensor("temb_17_pad_type_0"), val = tensor("custom")]; tensor temb_17_pad_0 = const()[name = tensor("temb_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("mid_block_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532874816)))]; tensor mid_block_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("mid_block_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536151680)))]; tensor temb_17_cast_fp16 = conv(bias = mid_block_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_5790, groups = var_5762, pad = temb_17_pad_0, pad_type = temb_17_pad_type_0, strides = var_5788, weight = mid_block_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_17_cast_fp16")]; tensor input_207_cast_fp16 = add(x = hidden_states_117_cast_fp16, y = temb_17_cast_fp16)[name = tensor("input_207_cast_fp16")]; tensor reshape_92_shape_0 = const()[name = tensor("reshape_92_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_92_cast_fp16 = reshape(shape = reshape_92_shape_0, x = input_207_cast_fp16)[name = tensor("reshape_92_cast_fp16")]; tensor reduce_mean_69_axes_0 = const()[name = tensor("reduce_mean_69_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_69_keep_dims_0 = const()[name = tensor("reduce_mean_69_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_69_cast_fp16 = reduce_mean(axes = reduce_mean_69_axes_0, keep_dims = reduce_mean_69_keep_dims_0, x = reshape_92_cast_fp16)[name = tensor("reduce_mean_69_cast_fp16")]; tensor sub_46_cast_fp16 = sub(x = reshape_92_cast_fp16, y = reduce_mean_69_cast_fp16)[name = tensor("sub_46_cast_fp16")]; tensor square_23_cast_fp16 = square(x = sub_46_cast_fp16)[name = tensor("square_23_cast_fp16")]; tensor reduce_mean_71_axes_0 = const()[name = tensor("reduce_mean_71_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_71_keep_dims_0 = const()[name = tensor("reduce_mean_71_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_71_cast_fp16 = reduce_mean(axes = reduce_mean_71_axes_0, keep_dims = reduce_mean_71_keep_dims_0, x = square_23_cast_fp16)[name = tensor("reduce_mean_71_cast_fp16")]; tensor add_46_y_0_to_fp16 = const()[name = tensor("add_46_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_46_cast_fp16 = add(x = reduce_mean_71_cast_fp16, y = add_46_y_0_to_fp16)[name = tensor("add_46_cast_fp16")]; tensor sqrt_23_cast_fp16 = sqrt(x = add_46_cast_fp16)[name = tensor("sqrt_23_cast_fp16")]; tensor real_div_23_cast_fp16 = real_div(x = sub_46_cast_fp16, y = sqrt_23_cast_fp16)[name = tensor("real_div_23_cast_fp16")]; tensor reshape_93_shape_0 = const()[name = tensor("reshape_93_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_93_cast_fp16 = reshape(shape = reshape_93_shape_0, x = real_div_23_cast_fp16)[name = tensor("reshape_93_cast_fp16")]; tensor add_47_gamma_0_to_fp16 = const()[name = tensor("add_47_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536154304)))]; tensor add_47_beta_0_to_fp16 = const()[name = tensor("add_47_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536156928)))]; tensor add_47_epsilon_0_to_fp16 = const()[name = tensor("add_47_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_47_cast_fp16 = batch_norm(beta = add_47_beta_0_to_fp16, epsilon = add_47_epsilon_0_to_fp16, gamma = add_47_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_93_cast_fp16)[name = tensor("add_47_cast_fp16")]; tensor input_211_cast_fp16 = silu(x = add_47_cast_fp16)[name = tensor("input_211_cast_fp16")]; tensor var_5800 = const()[name = tensor("op_5800"), val = tensor([1, 1])]; tensor var_5802 = const()[name = tensor("op_5802"), val = tensor([1, 1])]; tensor hidden_states_119_pad_type_0 = const()[name = tensor("hidden_states_119_pad_type_0"), val = tensor("custom")]; tensor hidden_states_119_pad_0 = const()[name = tensor("hidden_states_119_pad_0"), val = tensor([1, 1, 1, 1])]; tensor mid_block_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("mid_block_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(536159552)))]; tensor mid_block_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("mid_block_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565650816)))]; tensor hidden_states_119_cast_fp16 = conv(bias = mid_block_resnets_0_conv2_bias_to_fp16, dilations = var_5802, groups = var_5762, pad = hidden_states_119_pad_0, pad_type = hidden_states_119_pad_type_0, strides = var_5800, weight = mid_block_resnets_0_conv2_weight_to_fp16, x = input_211_cast_fp16)[name = tensor("hidden_states_119_cast_fp16")]; tensor hidden_states_121_cast_fp16 = add(x = input_199_cast_fp16, y = hidden_states_119_cast_fp16)[name = tensor("hidden_states_121_cast_fp16")]; tensor reshape_96_shape_0 = const()[name = tensor("reshape_96_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_96_cast_fp16 = reshape(shape = reshape_96_shape_0, x = hidden_states_121_cast_fp16)[name = tensor("reshape_96_cast_fp16")]; tensor reduce_mean_72_axes_0 = const()[name = tensor("reduce_mean_72_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_72_keep_dims_0 = const()[name = tensor("reduce_mean_72_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_72_cast_fp16 = reduce_mean(axes = reduce_mean_72_axes_0, keep_dims = reduce_mean_72_keep_dims_0, x = reshape_96_cast_fp16)[name = tensor("reduce_mean_72_cast_fp16")]; tensor sub_48_cast_fp16 = sub(x = reshape_96_cast_fp16, y = reduce_mean_72_cast_fp16)[name = tensor("sub_48_cast_fp16")]; tensor square_24_cast_fp16 = square(x = sub_48_cast_fp16)[name = tensor("square_24_cast_fp16")]; tensor reduce_mean_74_axes_0 = const()[name = tensor("reduce_mean_74_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_74_keep_dims_0 = const()[name = tensor("reduce_mean_74_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_74_cast_fp16 = reduce_mean(axes = reduce_mean_74_axes_0, keep_dims = reduce_mean_74_keep_dims_0, x = square_24_cast_fp16)[name = tensor("reduce_mean_74_cast_fp16")]; tensor add_48_y_0_to_fp16 = const()[name = tensor("add_48_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_48_cast_fp16 = add(x = reduce_mean_74_cast_fp16, y = add_48_y_0_to_fp16)[name = tensor("add_48_cast_fp16")]; tensor sqrt_24_cast_fp16 = sqrt(x = add_48_cast_fp16)[name = tensor("sqrt_24_cast_fp16")]; tensor real_div_24_cast_fp16 = real_div(x = sub_48_cast_fp16, y = sqrt_24_cast_fp16)[name = tensor("real_div_24_cast_fp16")]; tensor reshape_97_shape_0 = const()[name = tensor("reshape_97_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_97_cast_fp16 = reshape(shape = reshape_97_shape_0, x = real_div_24_cast_fp16)[name = tensor("reshape_97_cast_fp16")]; tensor add_49_gamma_0_to_fp16 = const()[name = tensor("add_49_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565653440)))]; tensor add_49_beta_0_to_fp16 = const()[name = tensor("add_49_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565656064)))]; tensor add_49_epsilon_0_to_fp16 = const()[name = tensor("add_49_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_49_cast_fp16 = batch_norm(beta = add_49_beta_0_to_fp16, epsilon = add_49_epsilon_0_to_fp16, gamma = add_49_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_97_cast_fp16)[name = tensor("add_49_cast_fp16")]; tensor var_5822 = const()[name = tensor("op_5822"), val = tensor([1, 1])]; tensor var_5824 = const()[name = tensor("op_5824"), val = tensor([1, 1])]; tensor hidden_states_123_pad_type_0 = const()[name = tensor("hidden_states_123_pad_type_0"), val = tensor("custom")]; tensor hidden_states_123_pad_0 = const()[name = tensor("hidden_states_123_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_proj_in_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565658688)))]; tensor mid_block_attentions_0_proj_in_bias_to_fp16 = const()[name = tensor("mid_block_attentions_0_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568935552)))]; tensor hidden_states_123_cast_fp16 = conv(bias = mid_block_attentions_0_proj_in_bias_to_fp16, dilations = var_5824, groups = var_5762, pad = hidden_states_123_pad_0, pad_type = hidden_states_123_pad_type_0, strides = var_5822, weight = mid_block_attentions_0_proj_in_weight_to_fp16, x = add_49_cast_fp16)[name = tensor("hidden_states_123_cast_fp16")]; tensor var_5829 = const()[name = tensor("op_5829"), val = tensor([2, 1280, 1, 64])]; tensor inputs_37_cast_fp16 = reshape(shape = var_5829, x = hidden_states_123_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; tensor hidden_states_125_axes_0 = const()[name = tensor("hidden_states_125_axes_0"), val = tensor([1])]; tensor hidden_states_125_gamma_0_to_fp16 = const()[name = tensor("hidden_states_125_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568938176)))]; tensor hidden_states_125_beta_0_to_fp16 = const()[name = tensor("hidden_states_125_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568940800)))]; tensor var_5845_to_fp16 = const()[name = tensor("op_5845_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_125_cast_fp16 = layer_norm(axes = hidden_states_125_axes_0, beta = hidden_states_125_beta_0_to_fp16, epsilon = var_5845_to_fp16, gamma = hidden_states_125_gamma_0_to_fp16, x = inputs_37_cast_fp16)[name = tensor("hidden_states_125_cast_fp16")]; tensor var_5860 = const()[name = tensor("op_5860"), val = tensor([1, 1])]; tensor var_5862 = const()[name = tensor("op_5862"), val = tensor([1, 1])]; tensor q_25_pad_type_0 = const()[name = tensor("q_25_pad_type_0"), val = tensor("custom")]; tensor q_25_pad_0 = const()[name = tensor("q_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568943424)))]; tensor q_25_cast_fp16 = conv(dilations = var_5862, groups = var_5762, pad = q_25_pad_0, pad_type = q_25_pad_type_0, strides = var_5860, weight = mid_block_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_125_cast_fp16)[name = tensor("q_25_cast_fp16")]; tensor var_5866 = const()[name = tensor("op_5866"), val = tensor([1, 1])]; tensor var_5868 = const()[name = tensor("op_5868"), val = tensor([1, 1])]; tensor k_49_pad_type_0 = const()[name = tensor("k_49_pad_type_0"), val = tensor("custom")]; tensor k_49_pad_0 = const()[name = tensor("k_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(572220288)))]; tensor k_49_cast_fp16 = conv(dilations = var_5868, groups = var_5762, pad = k_49_pad_0, pad_type = k_49_pad_type_0, strides = var_5866, weight = mid_block_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_125_cast_fp16)[name = tensor("k_49_cast_fp16")]; tensor var_5872 = const()[name = tensor("op_5872"), val = tensor([1, 1])]; tensor var_5874 = const()[name = tensor("op_5874"), val = tensor([1, 1])]; tensor v_25_pad_type_0 = const()[name = tensor("v_25_pad_type_0"), val = tensor("custom")]; tensor v_25_pad_0 = const()[name = tensor("v_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575497152)))]; tensor v_25_cast_fp16 = conv(dilations = var_5874, groups = var_5762, pad = v_25_pad_0, pad_type = v_25_pad_type_0, strides = var_5872, weight = mid_block_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_125_cast_fp16)[name = tensor("v_25_cast_fp16")]; tensor var_5878_begin_0 = const()[name = tensor("op_5878_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5878_end_0 = const()[name = tensor("op_5878_end_0"), val = tensor([2, 160, 1, 64])]; tensor var_5878_end_mask_0 = const()[name = tensor("op_5878_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5878_cast_fp16 = slice_by_index(begin = var_5878_begin_0, end = var_5878_end_0, end_mask = var_5878_end_mask_0, x = q_25_cast_fp16)[name = tensor("op_5878_cast_fp16")]; tensor var_5882_begin_0 = const()[name = tensor("op_5882_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_5882_end_0 = const()[name = tensor("op_5882_end_0"), val = tensor([2, 320, 1, 64])]; tensor var_5882_end_mask_0 = const()[name = tensor("op_5882_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5882_cast_fp16 = slice_by_index(begin = var_5882_begin_0, end = var_5882_end_0, end_mask = var_5882_end_mask_0, x = q_25_cast_fp16)[name = tensor("op_5882_cast_fp16")]; tensor var_5886_begin_0 = const()[name = tensor("op_5886_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5886_end_0 = const()[name = tensor("op_5886_end_0"), val = tensor([2, 480, 1, 64])]; tensor var_5886_end_mask_0 = const()[name = tensor("op_5886_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5886_cast_fp16 = slice_by_index(begin = var_5886_begin_0, end = var_5886_end_0, end_mask = var_5886_end_mask_0, x = q_25_cast_fp16)[name = tensor("op_5886_cast_fp16")]; tensor var_5890_begin_0 = const()[name = tensor("op_5890_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_5890_end_0 = const()[name = tensor("op_5890_end_0"), val = tensor([2, 640, 1, 64])]; tensor var_5890_end_mask_0 = const()[name = tensor("op_5890_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5890_cast_fp16 = slice_by_index(begin = var_5890_begin_0, end = var_5890_end_0, end_mask = var_5890_end_mask_0, x = q_25_cast_fp16)[name = tensor("op_5890_cast_fp16")]; tensor var_5894_begin_0 = const()[name = tensor("op_5894_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5894_end_0 = const()[name = tensor("op_5894_end_0"), val = tensor([2, 800, 1, 64])]; tensor var_5894_end_mask_0 = const()[name = tensor("op_5894_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5894_cast_fp16 = slice_by_index(begin = var_5894_begin_0, end = var_5894_end_0, end_mask = var_5894_end_mask_0, x = q_25_cast_fp16)[name = tensor("op_5894_cast_fp16")]; tensor var_5898_begin_0 = const()[name = tensor("op_5898_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_5898_end_0 = const()[name = tensor("op_5898_end_0"), val = tensor([2, 960, 1, 64])]; tensor var_5898_end_mask_0 = const()[name = tensor("op_5898_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5898_cast_fp16 = slice_by_index(begin = var_5898_begin_0, end = var_5898_end_0, end_mask = var_5898_end_mask_0, x = q_25_cast_fp16)[name = tensor("op_5898_cast_fp16")]; tensor var_5902_begin_0 = const()[name = tensor("op_5902_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_5902_end_0 = const()[name = tensor("op_5902_end_0"), val = tensor([2, 1120, 1, 64])]; tensor var_5902_end_mask_0 = const()[name = tensor("op_5902_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5902_cast_fp16 = slice_by_index(begin = var_5902_begin_0, end = var_5902_end_0, end_mask = var_5902_end_mask_0, x = q_25_cast_fp16)[name = tensor("op_5902_cast_fp16")]; tensor var_5906_begin_0 = const()[name = tensor("op_5906_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_5906_end_0 = const()[name = tensor("op_5906_end_0"), val = tensor([2, 1280, 1, 64])]; tensor var_5906_end_mask_0 = const()[name = tensor("op_5906_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5906_cast_fp16 = slice_by_index(begin = var_5906_begin_0, end = var_5906_end_0, end_mask = var_5906_end_mask_0, x = q_25_cast_fp16)[name = tensor("op_5906_cast_fp16")]; tensor k_51_perm_0 = const()[name = tensor("k_51_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_5913_begin_0 = const()[name = tensor("op_5913_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5913_end_0 = const()[name = tensor("op_5913_end_0"), val = tensor([2, 64, 1, 160])]; tensor var_5913_end_mask_0 = const()[name = tensor("op_5913_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_19 = transpose(perm = k_51_perm_0, x = k_49_cast_fp16)[name = tensor("transpose_19")]; tensor var_5913_cast_fp16 = slice_by_index(begin = var_5913_begin_0, end = var_5913_end_0, end_mask = var_5913_end_mask_0, x = transpose_19)[name = tensor("op_5913_cast_fp16")]; tensor var_5917_begin_0 = const()[name = tensor("op_5917_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_5917_end_0 = const()[name = tensor("op_5917_end_0"), val = tensor([2, 64, 1, 320])]; tensor var_5917_end_mask_0 = const()[name = tensor("op_5917_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5917_cast_fp16 = slice_by_index(begin = var_5917_begin_0, end = var_5917_end_0, end_mask = var_5917_end_mask_0, x = transpose_19)[name = tensor("op_5917_cast_fp16")]; tensor var_5921_begin_0 = const()[name = tensor("op_5921_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_5921_end_0 = const()[name = tensor("op_5921_end_0"), val = tensor([2, 64, 1, 480])]; tensor var_5921_end_mask_0 = const()[name = tensor("op_5921_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5921_cast_fp16 = slice_by_index(begin = var_5921_begin_0, end = var_5921_end_0, end_mask = var_5921_end_mask_0, x = transpose_19)[name = tensor("op_5921_cast_fp16")]; tensor var_5925_begin_0 = const()[name = tensor("op_5925_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_5925_end_0 = const()[name = tensor("op_5925_end_0"), val = tensor([2, 64, 1, 640])]; tensor var_5925_end_mask_0 = const()[name = tensor("op_5925_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5925_cast_fp16 = slice_by_index(begin = var_5925_begin_0, end = var_5925_end_0, end_mask = var_5925_end_mask_0, x = transpose_19)[name = tensor("op_5925_cast_fp16")]; tensor var_5929_begin_0 = const()[name = tensor("op_5929_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_5929_end_0 = const()[name = tensor("op_5929_end_0"), val = tensor([2, 64, 1, 800])]; tensor var_5929_end_mask_0 = const()[name = tensor("op_5929_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5929_cast_fp16 = slice_by_index(begin = var_5929_begin_0, end = var_5929_end_0, end_mask = var_5929_end_mask_0, x = transpose_19)[name = tensor("op_5929_cast_fp16")]; tensor var_5933_begin_0 = const()[name = tensor("op_5933_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_5933_end_0 = const()[name = tensor("op_5933_end_0"), val = tensor([2, 64, 1, 960])]; tensor var_5933_end_mask_0 = const()[name = tensor("op_5933_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5933_cast_fp16 = slice_by_index(begin = var_5933_begin_0, end = var_5933_end_0, end_mask = var_5933_end_mask_0, x = transpose_19)[name = tensor("op_5933_cast_fp16")]; tensor var_5937_begin_0 = const()[name = tensor("op_5937_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_5937_end_0 = const()[name = tensor("op_5937_end_0"), val = tensor([2, 64, 1, 1120])]; tensor var_5937_end_mask_0 = const()[name = tensor("op_5937_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5937_cast_fp16 = slice_by_index(begin = var_5937_begin_0, end = var_5937_end_0, end_mask = var_5937_end_mask_0, x = transpose_19)[name = tensor("op_5937_cast_fp16")]; tensor var_5941_begin_0 = const()[name = tensor("op_5941_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_5941_end_0 = const()[name = tensor("op_5941_end_0"), val = tensor([2, 64, 1, 1280])]; tensor var_5941_end_mask_0 = const()[name = tensor("op_5941_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5941_cast_fp16 = slice_by_index(begin = var_5941_begin_0, end = var_5941_end_0, end_mask = var_5941_end_mask_0, x = transpose_19)[name = tensor("op_5941_cast_fp16")]; tensor var_5943_begin_0 = const()[name = tensor("op_5943_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5943_end_0 = const()[name = tensor("op_5943_end_0"), val = tensor([2, 160, 1, 64])]; tensor var_5943_end_mask_0 = const()[name = tensor("op_5943_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5943_cast_fp16 = slice_by_index(begin = var_5943_begin_0, end = var_5943_end_0, end_mask = var_5943_end_mask_0, x = v_25_cast_fp16)[name = tensor("op_5943_cast_fp16")]; tensor var_5947_begin_0 = const()[name = tensor("op_5947_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_5947_end_0 = const()[name = tensor("op_5947_end_0"), val = tensor([2, 320, 1, 64])]; tensor var_5947_end_mask_0 = const()[name = tensor("op_5947_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5947_cast_fp16 = slice_by_index(begin = var_5947_begin_0, end = var_5947_end_0, end_mask = var_5947_end_mask_0, x = v_25_cast_fp16)[name = tensor("op_5947_cast_fp16")]; tensor var_5951_begin_0 = const()[name = tensor("op_5951_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5951_end_0 = const()[name = tensor("op_5951_end_0"), val = tensor([2, 480, 1, 64])]; tensor var_5951_end_mask_0 = const()[name = tensor("op_5951_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5951_cast_fp16 = slice_by_index(begin = var_5951_begin_0, end = var_5951_end_0, end_mask = var_5951_end_mask_0, x = v_25_cast_fp16)[name = tensor("op_5951_cast_fp16")]; tensor var_5955_begin_0 = const()[name = tensor("op_5955_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_5955_end_0 = const()[name = tensor("op_5955_end_0"), val = tensor([2, 640, 1, 64])]; tensor var_5955_end_mask_0 = const()[name = tensor("op_5955_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5955_cast_fp16 = slice_by_index(begin = var_5955_begin_0, end = var_5955_end_0, end_mask = var_5955_end_mask_0, x = v_25_cast_fp16)[name = tensor("op_5955_cast_fp16")]; tensor var_5959_begin_0 = const()[name = tensor("op_5959_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5959_end_0 = const()[name = tensor("op_5959_end_0"), val = tensor([2, 800, 1, 64])]; tensor var_5959_end_mask_0 = const()[name = tensor("op_5959_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5959_cast_fp16 = slice_by_index(begin = var_5959_begin_0, end = var_5959_end_0, end_mask = var_5959_end_mask_0, x = v_25_cast_fp16)[name = tensor("op_5959_cast_fp16")]; tensor var_5963_begin_0 = const()[name = tensor("op_5963_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_5963_end_0 = const()[name = tensor("op_5963_end_0"), val = tensor([2, 960, 1, 64])]; tensor var_5963_end_mask_0 = const()[name = tensor("op_5963_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5963_cast_fp16 = slice_by_index(begin = var_5963_begin_0, end = var_5963_end_0, end_mask = var_5963_end_mask_0, x = v_25_cast_fp16)[name = tensor("op_5963_cast_fp16")]; tensor var_5967_begin_0 = const()[name = tensor("op_5967_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_5967_end_0 = const()[name = tensor("op_5967_end_0"), val = tensor([2, 1120, 1, 64])]; tensor var_5967_end_mask_0 = const()[name = tensor("op_5967_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5967_cast_fp16 = slice_by_index(begin = var_5967_begin_0, end = var_5967_end_0, end_mask = var_5967_end_mask_0, x = v_25_cast_fp16)[name = tensor("op_5967_cast_fp16")]; tensor var_5971_begin_0 = const()[name = tensor("op_5971_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_5971_end_0 = const()[name = tensor("op_5971_end_0"), val = tensor([2, 1280, 1, 64])]; tensor var_5971_end_mask_0 = const()[name = tensor("op_5971_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5971_cast_fp16 = slice_by_index(begin = var_5971_begin_0, end = var_5971_end_0, end_mask = var_5971_end_mask_0, x = v_25_cast_fp16)[name = tensor("op_5971_cast_fp16")]; tensor var_5975_equation_0 = const()[name = tensor("op_5975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5975_cast_fp16 = einsum(equation = var_5975_equation_0, values = (var_5913_cast_fp16, var_5878_cast_fp16))[name = tensor("op_5975_cast_fp16")]; tensor var_5976_to_fp16 = const()[name = tensor("op_5976_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_65_cast_fp16 = mul(x = var_5975_cast_fp16, y = var_5976_to_fp16)[name = tensor("aw_65_cast_fp16")]; tensor var_5979_equation_0 = const()[name = tensor("op_5979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5979_cast_fp16 = einsum(equation = var_5979_equation_0, values = (var_5917_cast_fp16, var_5882_cast_fp16))[name = tensor("op_5979_cast_fp16")]; tensor var_5980_to_fp16 = const()[name = tensor("op_5980_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_67_cast_fp16 = mul(x = var_5979_cast_fp16, y = var_5980_to_fp16)[name = tensor("aw_67_cast_fp16")]; tensor var_5983_equation_0 = const()[name = tensor("op_5983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5983_cast_fp16 = einsum(equation = var_5983_equation_0, values = (var_5921_cast_fp16, var_5886_cast_fp16))[name = tensor("op_5983_cast_fp16")]; tensor var_5984_to_fp16 = const()[name = tensor("op_5984_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_69_cast_fp16 = mul(x = var_5983_cast_fp16, y = var_5984_to_fp16)[name = tensor("aw_69_cast_fp16")]; tensor var_5987_equation_0 = const()[name = tensor("op_5987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5987_cast_fp16 = einsum(equation = var_5987_equation_0, values = (var_5925_cast_fp16, var_5890_cast_fp16))[name = tensor("op_5987_cast_fp16")]; tensor var_5988_to_fp16 = const()[name = tensor("op_5988_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_71_cast_fp16 = mul(x = var_5987_cast_fp16, y = var_5988_to_fp16)[name = tensor("aw_71_cast_fp16")]; tensor var_5991_equation_0 = const()[name = tensor("op_5991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5991_cast_fp16 = einsum(equation = var_5991_equation_0, values = (var_5929_cast_fp16, var_5894_cast_fp16))[name = tensor("op_5991_cast_fp16")]; tensor var_5992_to_fp16 = const()[name = tensor("op_5992_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_73_cast_fp16 = mul(x = var_5991_cast_fp16, y = var_5992_to_fp16)[name = tensor("aw_73_cast_fp16")]; tensor var_5995_equation_0 = const()[name = tensor("op_5995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5995_cast_fp16 = einsum(equation = var_5995_equation_0, values = (var_5933_cast_fp16, var_5898_cast_fp16))[name = tensor("op_5995_cast_fp16")]; tensor var_5996_to_fp16 = const()[name = tensor("op_5996_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_75_cast_fp16 = mul(x = var_5995_cast_fp16, y = var_5996_to_fp16)[name = tensor("aw_75_cast_fp16")]; tensor var_5999_equation_0 = const()[name = tensor("op_5999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_5999_cast_fp16 = einsum(equation = var_5999_equation_0, values = (var_5937_cast_fp16, var_5902_cast_fp16))[name = tensor("op_5999_cast_fp16")]; tensor var_6000_to_fp16 = const()[name = tensor("op_6000_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_77_cast_fp16 = mul(x = var_5999_cast_fp16, y = var_6000_to_fp16)[name = tensor("aw_77_cast_fp16")]; tensor var_6003_equation_0 = const()[name = tensor("op_6003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6003_cast_fp16 = einsum(equation = var_6003_equation_0, values = (var_5941_cast_fp16, var_5906_cast_fp16))[name = tensor("op_6003_cast_fp16")]; tensor var_6004_to_fp16 = const()[name = tensor("op_6004_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_79_cast_fp16 = mul(x = var_6003_cast_fp16, y = var_6004_to_fp16)[name = tensor("aw_79_cast_fp16")]; tensor var_6006_cast_fp16 = softmax(axis = var_5762, x = aw_65_cast_fp16)[name = tensor("op_6006_cast_fp16")]; tensor var_6007_cast_fp16 = softmax(axis = var_5762, x = aw_67_cast_fp16)[name = tensor("op_6007_cast_fp16")]; tensor var_6008_cast_fp16 = softmax(axis = var_5762, x = aw_69_cast_fp16)[name = tensor("op_6008_cast_fp16")]; tensor var_6009_cast_fp16 = softmax(axis = var_5762, x = aw_71_cast_fp16)[name = tensor("op_6009_cast_fp16")]; tensor var_6010_cast_fp16 = softmax(axis = var_5762, x = aw_73_cast_fp16)[name = tensor("op_6010_cast_fp16")]; tensor var_6011_cast_fp16 = softmax(axis = var_5762, x = aw_75_cast_fp16)[name = tensor("op_6011_cast_fp16")]; tensor var_6012_cast_fp16 = softmax(axis = var_5762, x = aw_77_cast_fp16)[name = tensor("op_6012_cast_fp16")]; tensor var_6013_cast_fp16 = softmax(axis = var_5762, x = aw_79_cast_fp16)[name = tensor("op_6013_cast_fp16")]; tensor var_6015_equation_0 = const()[name = tensor("op_6015_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6015_cast_fp16 = einsum(equation = var_6015_equation_0, values = (var_5943_cast_fp16, var_6006_cast_fp16))[name = tensor("op_6015_cast_fp16")]; tensor var_6017_equation_0 = const()[name = tensor("op_6017_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6017_cast_fp16 = einsum(equation = var_6017_equation_0, values = (var_5947_cast_fp16, var_6007_cast_fp16))[name = tensor("op_6017_cast_fp16")]; tensor var_6019_equation_0 = const()[name = tensor("op_6019_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6019_cast_fp16 = einsum(equation = var_6019_equation_0, values = (var_5951_cast_fp16, var_6008_cast_fp16))[name = tensor("op_6019_cast_fp16")]; tensor var_6021_equation_0 = const()[name = tensor("op_6021_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6021_cast_fp16 = einsum(equation = var_6021_equation_0, values = (var_5955_cast_fp16, var_6009_cast_fp16))[name = tensor("op_6021_cast_fp16")]; tensor var_6023_equation_0 = const()[name = tensor("op_6023_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6023_cast_fp16 = einsum(equation = var_6023_equation_0, values = (var_5959_cast_fp16, var_6010_cast_fp16))[name = tensor("op_6023_cast_fp16")]; tensor var_6025_equation_0 = const()[name = tensor("op_6025_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6025_cast_fp16 = einsum(equation = var_6025_equation_0, values = (var_5963_cast_fp16, var_6011_cast_fp16))[name = tensor("op_6025_cast_fp16")]; tensor var_6027_equation_0 = const()[name = tensor("op_6027_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6027_cast_fp16 = einsum(equation = var_6027_equation_0, values = (var_5967_cast_fp16, var_6012_cast_fp16))[name = tensor("op_6027_cast_fp16")]; tensor var_6029_equation_0 = const()[name = tensor("op_6029_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6029_cast_fp16 = einsum(equation = var_6029_equation_0, values = (var_5971_cast_fp16, var_6013_cast_fp16))[name = tensor("op_6029_cast_fp16")]; tensor input_215_interleave_0 = const()[name = tensor("input_215_interleave_0"), val = tensor(false)]; tensor input_215_cast_fp16 = concat(axis = var_5762, interleave = input_215_interleave_0, values = (var_6015_cast_fp16, var_6017_cast_fp16, var_6019_cast_fp16, var_6021_cast_fp16, var_6023_cast_fp16, var_6025_cast_fp16, var_6027_cast_fp16, var_6029_cast_fp16))[name = tensor("input_215_cast_fp16")]; tensor var_6035 = const()[name = tensor("op_6035"), val = tensor([1, 1])]; tensor var_6037 = const()[name = tensor("op_6037"), val = tensor([1, 1])]; tensor var_6039_pad_type_0 = const()[name = tensor("op_6039_pad_type_0"), val = tensor("custom")]; tensor var_6039_pad_0 = const()[name = tensor("op_6039_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578774016)))]; tensor mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582050880)))]; tensor var_6039_cast_fp16 = conv(bias = mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_6037, groups = var_5762, pad = var_6039_pad_0, pad_type = var_6039_pad_type_0, strides = var_6035, weight = mid_block_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_215_cast_fp16)[name = tensor("op_6039_cast_fp16")]; tensor inputs_39_cast_fp16 = add(x = var_6039_cast_fp16, y = inputs_37_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; tensor hidden_states_127_axes_0 = const()[name = tensor("hidden_states_127_axes_0"), val = tensor([1])]; tensor hidden_states_127_gamma_0_to_fp16 = const()[name = tensor("hidden_states_127_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582053504)))]; tensor hidden_states_127_beta_0_to_fp16 = const()[name = tensor("hidden_states_127_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582056128)))]; tensor var_6049_to_fp16 = const()[name = tensor("op_6049_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_127_cast_fp16 = layer_norm(axes = hidden_states_127_axes_0, beta = hidden_states_127_beta_0_to_fp16, epsilon = var_6049_to_fp16, gamma = hidden_states_127_gamma_0_to_fp16, x = inputs_39_cast_fp16)[name = tensor("hidden_states_127_cast_fp16")]; tensor var_6064 = const()[name = tensor("op_6064"), val = tensor([1, 1])]; tensor var_6066 = const()[name = tensor("op_6066"), val = tensor([1, 1])]; tensor q_27_pad_type_0 = const()[name = tensor("q_27_pad_type_0"), val = tensor("custom")]; tensor q_27_pad_0 = const()[name = tensor("q_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(582058752)))]; tensor q_27_cast_fp16 = conv(dilations = var_6066, groups = var_5762, pad = q_27_pad_0, pad_type = q_27_pad_type_0, strides = var_6064, weight = mid_block_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_127_cast_fp16)[name = tensor("q_27_cast_fp16")]; tensor var_6070 = const()[name = tensor("op_6070"), val = tensor([1, 1])]; tensor var_6072 = const()[name = tensor("op_6072"), val = tensor([1, 1])]; tensor k_53_pad_type_0 = const()[name = tensor("k_53_pad_type_0"), val = tensor("custom")]; tensor k_53_pad_0 = const()[name = tensor("k_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(585335616)))]; tensor k_53_cast_fp16 = conv(dilations = var_6072, groups = var_5762, pad = k_53_pad_0, pad_type = k_53_pad_type_0, strides = var_6070, weight = mid_block_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_53_cast_fp16")]; tensor var_6076 = const()[name = tensor("op_6076"), val = tensor([1, 1])]; tensor var_6078 = const()[name = tensor("op_6078"), val = tensor([1, 1])]; tensor v_27_pad_type_0 = const()[name = tensor("v_27_pad_type_0"), val = tensor("custom")]; tensor v_27_pad_0 = const()[name = tensor("v_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(587301760)))]; tensor v_27_cast_fp16 = conv(dilations = var_6078, groups = var_5762, pad = v_27_pad_0, pad_type = v_27_pad_type_0, strides = var_6076, weight = mid_block_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_27_cast_fp16")]; tensor var_6082_begin_0 = const()[name = tensor("op_6082_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6082_end_0 = const()[name = tensor("op_6082_end_0"), val = tensor([2, 160, 1, 64])]; tensor var_6082_end_mask_0 = const()[name = tensor("op_6082_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6082_cast_fp16 = slice_by_index(begin = var_6082_begin_0, end = var_6082_end_0, end_mask = var_6082_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_6082_cast_fp16")]; tensor var_6086_begin_0 = const()[name = tensor("op_6086_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_6086_end_0 = const()[name = tensor("op_6086_end_0"), val = tensor([2, 320, 1, 64])]; tensor var_6086_end_mask_0 = const()[name = tensor("op_6086_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6086_cast_fp16 = slice_by_index(begin = var_6086_begin_0, end = var_6086_end_0, end_mask = var_6086_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_6086_cast_fp16")]; tensor var_6090_begin_0 = const()[name = tensor("op_6090_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6090_end_0 = const()[name = tensor("op_6090_end_0"), val = tensor([2, 480, 1, 64])]; tensor var_6090_end_mask_0 = const()[name = tensor("op_6090_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6090_cast_fp16 = slice_by_index(begin = var_6090_begin_0, end = var_6090_end_0, end_mask = var_6090_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_6090_cast_fp16")]; tensor var_6094_begin_0 = const()[name = tensor("op_6094_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_6094_end_0 = const()[name = tensor("op_6094_end_0"), val = tensor([2, 640, 1, 64])]; tensor var_6094_end_mask_0 = const()[name = tensor("op_6094_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6094_cast_fp16 = slice_by_index(begin = var_6094_begin_0, end = var_6094_end_0, end_mask = var_6094_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_6094_cast_fp16")]; tensor var_6098_begin_0 = const()[name = tensor("op_6098_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6098_end_0 = const()[name = tensor("op_6098_end_0"), val = tensor([2, 800, 1, 64])]; tensor var_6098_end_mask_0 = const()[name = tensor("op_6098_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6098_cast_fp16 = slice_by_index(begin = var_6098_begin_0, end = var_6098_end_0, end_mask = var_6098_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_6098_cast_fp16")]; tensor var_6102_begin_0 = const()[name = tensor("op_6102_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_6102_end_0 = const()[name = tensor("op_6102_end_0"), val = tensor([2, 960, 1, 64])]; tensor var_6102_end_mask_0 = const()[name = tensor("op_6102_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6102_cast_fp16 = slice_by_index(begin = var_6102_begin_0, end = var_6102_end_0, end_mask = var_6102_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_6102_cast_fp16")]; tensor var_6106_begin_0 = const()[name = tensor("op_6106_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_6106_end_0 = const()[name = tensor("op_6106_end_0"), val = tensor([2, 1120, 1, 64])]; tensor var_6106_end_mask_0 = const()[name = tensor("op_6106_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6106_cast_fp16 = slice_by_index(begin = var_6106_begin_0, end = var_6106_end_0, end_mask = var_6106_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_6106_cast_fp16")]; tensor var_6110_begin_0 = const()[name = tensor("op_6110_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_6110_end_0 = const()[name = tensor("op_6110_end_0"), val = tensor([2, 1280, 1, 64])]; tensor var_6110_end_mask_0 = const()[name = tensor("op_6110_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6110_cast_fp16 = slice_by_index(begin = var_6110_begin_0, end = var_6110_end_0, end_mask = var_6110_end_mask_0, x = q_27_cast_fp16)[name = tensor("op_6110_cast_fp16")]; tensor k_55_perm_0 = const()[name = tensor("k_55_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_6117_begin_0 = const()[name = tensor("op_6117_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6117_end_0 = const()[name = tensor("op_6117_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_6117_end_mask_0 = const()[name = tensor("op_6117_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_18 = transpose(perm = k_55_perm_0, x = k_53_cast_fp16)[name = tensor("transpose_18")]; tensor var_6117_cast_fp16 = slice_by_index(begin = var_6117_begin_0, end = var_6117_end_0, end_mask = var_6117_end_mask_0, x = transpose_18)[name = tensor("op_6117_cast_fp16")]; tensor var_6121_begin_0 = const()[name = tensor("op_6121_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_6121_end_0 = const()[name = tensor("op_6121_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_6121_end_mask_0 = const()[name = tensor("op_6121_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6121_cast_fp16 = slice_by_index(begin = var_6121_begin_0, end = var_6121_end_0, end_mask = var_6121_end_mask_0, x = transpose_18)[name = tensor("op_6121_cast_fp16")]; tensor var_6125_begin_0 = const()[name = tensor("op_6125_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_6125_end_0 = const()[name = tensor("op_6125_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_6125_end_mask_0 = const()[name = tensor("op_6125_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6125_cast_fp16 = slice_by_index(begin = var_6125_begin_0, end = var_6125_end_0, end_mask = var_6125_end_mask_0, x = transpose_18)[name = tensor("op_6125_cast_fp16")]; tensor var_6129_begin_0 = const()[name = tensor("op_6129_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_6129_end_0 = const()[name = tensor("op_6129_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_6129_end_mask_0 = const()[name = tensor("op_6129_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6129_cast_fp16 = slice_by_index(begin = var_6129_begin_0, end = var_6129_end_0, end_mask = var_6129_end_mask_0, x = transpose_18)[name = tensor("op_6129_cast_fp16")]; tensor var_6133_begin_0 = const()[name = tensor("op_6133_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_6133_end_0 = const()[name = tensor("op_6133_end_0"), val = tensor([2, 77, 1, 800])]; tensor var_6133_end_mask_0 = const()[name = tensor("op_6133_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6133_cast_fp16 = slice_by_index(begin = var_6133_begin_0, end = var_6133_end_0, end_mask = var_6133_end_mask_0, x = transpose_18)[name = tensor("op_6133_cast_fp16")]; tensor var_6137_begin_0 = const()[name = tensor("op_6137_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_6137_end_0 = const()[name = tensor("op_6137_end_0"), val = tensor([2, 77, 1, 960])]; tensor var_6137_end_mask_0 = const()[name = tensor("op_6137_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6137_cast_fp16 = slice_by_index(begin = var_6137_begin_0, end = var_6137_end_0, end_mask = var_6137_end_mask_0, x = transpose_18)[name = tensor("op_6137_cast_fp16")]; tensor var_6141_begin_0 = const()[name = tensor("op_6141_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_6141_end_0 = const()[name = tensor("op_6141_end_0"), val = tensor([2, 77, 1, 1120])]; tensor var_6141_end_mask_0 = const()[name = tensor("op_6141_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6141_cast_fp16 = slice_by_index(begin = var_6141_begin_0, end = var_6141_end_0, end_mask = var_6141_end_mask_0, x = transpose_18)[name = tensor("op_6141_cast_fp16")]; tensor var_6145_begin_0 = const()[name = tensor("op_6145_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_6145_end_0 = const()[name = tensor("op_6145_end_0"), val = tensor([2, 77, 1, 1280])]; tensor var_6145_end_mask_0 = const()[name = tensor("op_6145_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6145_cast_fp16 = slice_by_index(begin = var_6145_begin_0, end = var_6145_end_0, end_mask = var_6145_end_mask_0, x = transpose_18)[name = tensor("op_6145_cast_fp16")]; tensor var_6147_begin_0 = const()[name = tensor("op_6147_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6147_end_0 = const()[name = tensor("op_6147_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_6147_end_mask_0 = const()[name = tensor("op_6147_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6147_cast_fp16 = slice_by_index(begin = var_6147_begin_0, end = var_6147_end_0, end_mask = var_6147_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_6147_cast_fp16")]; tensor var_6151_begin_0 = const()[name = tensor("op_6151_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_6151_end_0 = const()[name = tensor("op_6151_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_6151_end_mask_0 = const()[name = tensor("op_6151_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6151_cast_fp16 = slice_by_index(begin = var_6151_begin_0, end = var_6151_end_0, end_mask = var_6151_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_6151_cast_fp16")]; tensor var_6155_begin_0 = const()[name = tensor("op_6155_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6155_end_0 = const()[name = tensor("op_6155_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_6155_end_mask_0 = const()[name = tensor("op_6155_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6155_cast_fp16 = slice_by_index(begin = var_6155_begin_0, end = var_6155_end_0, end_mask = var_6155_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_6155_cast_fp16")]; tensor var_6159_begin_0 = const()[name = tensor("op_6159_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_6159_end_0 = const()[name = tensor("op_6159_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_6159_end_mask_0 = const()[name = tensor("op_6159_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6159_cast_fp16 = slice_by_index(begin = var_6159_begin_0, end = var_6159_end_0, end_mask = var_6159_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_6159_cast_fp16")]; tensor var_6163_begin_0 = const()[name = tensor("op_6163_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6163_end_0 = const()[name = tensor("op_6163_end_0"), val = tensor([2, 800, 1, 77])]; tensor var_6163_end_mask_0 = const()[name = tensor("op_6163_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6163_cast_fp16 = slice_by_index(begin = var_6163_begin_0, end = var_6163_end_0, end_mask = var_6163_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_6163_cast_fp16")]; tensor var_6167_begin_0 = const()[name = tensor("op_6167_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_6167_end_0 = const()[name = tensor("op_6167_end_0"), val = tensor([2, 960, 1, 77])]; tensor var_6167_end_mask_0 = const()[name = tensor("op_6167_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6167_cast_fp16 = slice_by_index(begin = var_6167_begin_0, end = var_6167_end_0, end_mask = var_6167_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_6167_cast_fp16")]; tensor var_6171_begin_0 = const()[name = tensor("op_6171_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_6171_end_0 = const()[name = tensor("op_6171_end_0"), val = tensor([2, 1120, 1, 77])]; tensor var_6171_end_mask_0 = const()[name = tensor("op_6171_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6171_cast_fp16 = slice_by_index(begin = var_6171_begin_0, end = var_6171_end_0, end_mask = var_6171_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_6171_cast_fp16")]; tensor var_6175_begin_0 = const()[name = tensor("op_6175_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_6175_end_0 = const()[name = tensor("op_6175_end_0"), val = tensor([2, 1280, 1, 77])]; tensor var_6175_end_mask_0 = const()[name = tensor("op_6175_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6175_cast_fp16 = slice_by_index(begin = var_6175_begin_0, end = var_6175_end_0, end_mask = var_6175_end_mask_0, x = v_27_cast_fp16)[name = tensor("op_6175_cast_fp16")]; tensor var_6179_equation_0 = const()[name = tensor("op_6179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6179_cast_fp16 = einsum(equation = var_6179_equation_0, values = (var_6117_cast_fp16, var_6082_cast_fp16))[name = tensor("op_6179_cast_fp16")]; tensor var_6180_to_fp16 = const()[name = tensor("op_6180_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_81_cast_fp16 = mul(x = var_6179_cast_fp16, y = var_6180_to_fp16)[name = tensor("aw_81_cast_fp16")]; tensor var_6183_equation_0 = const()[name = tensor("op_6183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6183_cast_fp16 = einsum(equation = var_6183_equation_0, values = (var_6121_cast_fp16, var_6086_cast_fp16))[name = tensor("op_6183_cast_fp16")]; tensor var_6184_to_fp16 = const()[name = tensor("op_6184_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_83_cast_fp16 = mul(x = var_6183_cast_fp16, y = var_6184_to_fp16)[name = tensor("aw_83_cast_fp16")]; tensor var_6187_equation_0 = const()[name = tensor("op_6187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6187_cast_fp16 = einsum(equation = var_6187_equation_0, values = (var_6125_cast_fp16, var_6090_cast_fp16))[name = tensor("op_6187_cast_fp16")]; tensor var_6188_to_fp16 = const()[name = tensor("op_6188_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_85_cast_fp16 = mul(x = var_6187_cast_fp16, y = var_6188_to_fp16)[name = tensor("aw_85_cast_fp16")]; tensor var_6191_equation_0 = const()[name = tensor("op_6191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6191_cast_fp16 = einsum(equation = var_6191_equation_0, values = (var_6129_cast_fp16, var_6094_cast_fp16))[name = tensor("op_6191_cast_fp16")]; tensor var_6192_to_fp16 = const()[name = tensor("op_6192_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_87_cast_fp16 = mul(x = var_6191_cast_fp16, y = var_6192_to_fp16)[name = tensor("aw_87_cast_fp16")]; tensor var_6195_equation_0 = const()[name = tensor("op_6195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6195_cast_fp16 = einsum(equation = var_6195_equation_0, values = (var_6133_cast_fp16, var_6098_cast_fp16))[name = tensor("op_6195_cast_fp16")]; tensor var_6196_to_fp16 = const()[name = tensor("op_6196_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_89_cast_fp16 = mul(x = var_6195_cast_fp16, y = var_6196_to_fp16)[name = tensor("aw_89_cast_fp16")]; tensor var_6199_equation_0 = const()[name = tensor("op_6199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6199_cast_fp16 = einsum(equation = var_6199_equation_0, values = (var_6137_cast_fp16, var_6102_cast_fp16))[name = tensor("op_6199_cast_fp16")]; tensor var_6200_to_fp16 = const()[name = tensor("op_6200_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_91_cast_fp16 = mul(x = var_6199_cast_fp16, y = var_6200_to_fp16)[name = tensor("aw_91_cast_fp16")]; tensor var_6203_equation_0 = const()[name = tensor("op_6203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6203_cast_fp16 = einsum(equation = var_6203_equation_0, values = (var_6141_cast_fp16, var_6106_cast_fp16))[name = tensor("op_6203_cast_fp16")]; tensor var_6204_to_fp16 = const()[name = tensor("op_6204_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_93_cast_fp16 = mul(x = var_6203_cast_fp16, y = var_6204_to_fp16)[name = tensor("aw_93_cast_fp16")]; tensor var_6207_equation_0 = const()[name = tensor("op_6207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6207_cast_fp16 = einsum(equation = var_6207_equation_0, values = (var_6145_cast_fp16, var_6110_cast_fp16))[name = tensor("op_6207_cast_fp16")]; tensor var_6208_to_fp16 = const()[name = tensor("op_6208_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_95_cast_fp16 = mul(x = var_6207_cast_fp16, y = var_6208_to_fp16)[name = tensor("aw_95_cast_fp16")]; tensor var_6210_cast_fp16 = softmax(axis = var_5762, x = aw_81_cast_fp16)[name = tensor("op_6210_cast_fp16")]; tensor var_6211_cast_fp16 = softmax(axis = var_5762, x = aw_83_cast_fp16)[name = tensor("op_6211_cast_fp16")]; tensor var_6212_cast_fp16 = softmax(axis = var_5762, x = aw_85_cast_fp16)[name = tensor("op_6212_cast_fp16")]; tensor var_6213_cast_fp16 = softmax(axis = var_5762, x = aw_87_cast_fp16)[name = tensor("op_6213_cast_fp16")]; tensor var_6214_cast_fp16 = softmax(axis = var_5762, x = aw_89_cast_fp16)[name = tensor("op_6214_cast_fp16")]; tensor var_6215_cast_fp16 = softmax(axis = var_5762, x = aw_91_cast_fp16)[name = tensor("op_6215_cast_fp16")]; tensor var_6216_cast_fp16 = softmax(axis = var_5762, x = aw_93_cast_fp16)[name = tensor("op_6216_cast_fp16")]; tensor var_6217_cast_fp16 = softmax(axis = var_5762, x = aw_95_cast_fp16)[name = tensor("op_6217_cast_fp16")]; tensor var_6219_equation_0 = const()[name = tensor("op_6219_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6219_cast_fp16 = einsum(equation = var_6219_equation_0, values = (var_6147_cast_fp16, var_6210_cast_fp16))[name = tensor("op_6219_cast_fp16")]; tensor var_6221_equation_0 = const()[name = tensor("op_6221_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6221_cast_fp16 = einsum(equation = var_6221_equation_0, values = (var_6151_cast_fp16, var_6211_cast_fp16))[name = tensor("op_6221_cast_fp16")]; tensor var_6223_equation_0 = const()[name = tensor("op_6223_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6223_cast_fp16 = einsum(equation = var_6223_equation_0, values = (var_6155_cast_fp16, var_6212_cast_fp16))[name = tensor("op_6223_cast_fp16")]; tensor var_6225_equation_0 = const()[name = tensor("op_6225_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6225_cast_fp16 = einsum(equation = var_6225_equation_0, values = (var_6159_cast_fp16, var_6213_cast_fp16))[name = tensor("op_6225_cast_fp16")]; tensor var_6227_equation_0 = const()[name = tensor("op_6227_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6227_cast_fp16 = einsum(equation = var_6227_equation_0, values = (var_6163_cast_fp16, var_6214_cast_fp16))[name = tensor("op_6227_cast_fp16")]; tensor var_6229_equation_0 = const()[name = tensor("op_6229_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6229_cast_fp16 = einsum(equation = var_6229_equation_0, values = (var_6167_cast_fp16, var_6215_cast_fp16))[name = tensor("op_6229_cast_fp16")]; tensor var_6231_equation_0 = const()[name = tensor("op_6231_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6231_cast_fp16 = einsum(equation = var_6231_equation_0, values = (var_6171_cast_fp16, var_6216_cast_fp16))[name = tensor("op_6231_cast_fp16")]; tensor var_6233_equation_0 = const()[name = tensor("op_6233_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6233_cast_fp16 = einsum(equation = var_6233_equation_0, values = (var_6175_cast_fp16, var_6217_cast_fp16))[name = tensor("op_6233_cast_fp16")]; tensor input_217_interleave_0 = const()[name = tensor("input_217_interleave_0"), val = tensor(false)]; tensor input_217_cast_fp16 = concat(axis = var_5762, interleave = input_217_interleave_0, values = (var_6219_cast_fp16, var_6221_cast_fp16, var_6223_cast_fp16, var_6225_cast_fp16, var_6227_cast_fp16, var_6229_cast_fp16, var_6231_cast_fp16, var_6233_cast_fp16))[name = tensor("input_217_cast_fp16")]; tensor var_6239 = const()[name = tensor("op_6239"), val = tensor([1, 1])]; tensor var_6241 = const()[name = tensor("op_6241"), val = tensor([1, 1])]; tensor var_6243_pad_type_0 = const()[name = tensor("op_6243_pad_type_0"), val = tensor("custom")]; tensor var_6243_pad_0 = const()[name = tensor("op_6243_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(589267904)))]; tensor mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(592544768)))]; tensor var_6243_cast_fp16 = conv(bias = mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_6241, groups = var_5762, pad = var_6243_pad_0, pad_type = var_6243_pad_type_0, strides = var_6239, weight = mid_block_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_217_cast_fp16)[name = tensor("op_6243_cast_fp16")]; tensor inputs_41_cast_fp16 = add(x = var_6243_cast_fp16, y = inputs_39_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; tensor input_219_axes_0 = const()[name = tensor("input_219_axes_0"), val = tensor([1])]; tensor input_219_gamma_0_to_fp16 = const()[name = tensor("input_219_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(592547392)))]; tensor input_219_beta_0_to_fp16 = const()[name = tensor("input_219_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(592550016)))]; tensor var_6253_to_fp16 = const()[name = tensor("op_6253_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_219_cast_fp16 = layer_norm(axes = input_219_axes_0, beta = input_219_beta_0_to_fp16, epsilon = var_6253_to_fp16, gamma = input_219_gamma_0_to_fp16, x = inputs_41_cast_fp16)[name = tensor("input_219_cast_fp16")]; tensor var_6269 = const()[name = tensor("op_6269"), val = tensor([1, 1])]; tensor var_6271 = const()[name = tensor("op_6271"), val = tensor([1, 1])]; tensor var_6273_pad_type_0 = const()[name = tensor("op_6273_pad_type_0"), val = tensor("custom")]; tensor var_6273_pad_0 = const()[name = tensor("op_6273_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(592552640)))]; tensor mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(618767104)))]; tensor var_6273_cast_fp16 = conv(bias = mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_6271, groups = var_5762, pad = var_6273_pad_0, pad_type = var_6273_pad_type_0, strides = var_6269, weight = mid_block_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_219_cast_fp16)[name = tensor("op_6273_cast_fp16")]; tensor var_6274_split_sizes_0 = const()[name = tensor("op_6274_split_sizes_0"), val = tensor([5120, 5120])]; tensor var_6274_axis_0 = const()[name = tensor("op_6274_axis_0"), val = tensor(1)]; tensor var_6274_cast_fp16_0, tensor var_6274_cast_fp16_1 = split(axis = var_6274_axis_0, split_sizes = var_6274_split_sizes_0, x = var_6273_cast_fp16)[name = tensor("op_6274_cast_fp16")]; tensor var_6276_mode_0 = const()[name = tensor("op_6276_mode_0"), val = tensor("EXACT")]; tensor var_6276_cast_fp16 = gelu(mode = var_6276_mode_0, x = var_6274_cast_fp16_1)[name = tensor("op_6276_cast_fp16")]; tensor input_221_cast_fp16 = mul(x = var_6274_cast_fp16_0, y = var_6276_cast_fp16)[name = tensor("input_221_cast_fp16")]; tensor var_6280 = const()[name = tensor("op_6280"), val = tensor([1, 1])]; tensor var_6282 = const()[name = tensor("op_6282"), val = tensor([1, 1])]; tensor var_6284_pad_type_0 = const()[name = tensor("op_6284_pad_type_0"), val = tensor("custom")]; tensor var_6284_pad_0 = const()[name = tensor("op_6284_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(618787648)))]; tensor mid_block_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("mid_block_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(631894912)))]; tensor var_6284_cast_fp16 = conv(bias = mid_block_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_6282, groups = var_5762, pad = var_6284_pad_0, pad_type = var_6284_pad_type_0, strides = var_6280, weight = mid_block_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_221_cast_fp16)[name = tensor("op_6284_cast_fp16")]; tensor hidden_states_131_cast_fp16 = add(x = var_6284_cast_fp16, y = inputs_41_cast_fp16)[name = tensor("hidden_states_131_cast_fp16")]; tensor var_6286 = const()[name = tensor("op_6286"), val = tensor([2, 1280, 8, 8])]; tensor input_223_cast_fp16 = reshape(shape = var_6286, x = hidden_states_131_cast_fp16)[name = tensor("input_223_cast_fp16")]; tensor var_6290 = const()[name = tensor("op_6290"), val = tensor([1, 1])]; tensor var_6292 = const()[name = tensor("op_6292"), val = tensor([1, 1])]; tensor hidden_states_133_pad_type_0 = const()[name = tensor("hidden_states_133_pad_type_0"), val = tensor("custom")]; tensor hidden_states_133_pad_0 = const()[name = tensor("hidden_states_133_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_attentions_0_proj_out_weight_to_fp16 = const()[name = tensor("mid_block_attentions_0_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(631897536)))]; tensor mid_block_attentions_0_proj_out_bias_to_fp16 = const()[name = tensor("mid_block_attentions_0_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(635174400)))]; tensor hidden_states_133_cast_fp16 = conv(bias = mid_block_attentions_0_proj_out_bias_to_fp16, dilations = var_6292, groups = var_5762, pad = hidden_states_133_pad_0, pad_type = hidden_states_133_pad_type_0, strides = var_6290, weight = mid_block_attentions_0_proj_out_weight_to_fp16, x = input_223_cast_fp16)[name = tensor("hidden_states_133_cast_fp16")]; tensor input_225_cast_fp16 = add(x = hidden_states_133_cast_fp16, y = hidden_states_121_cast_fp16)[name = tensor("input_225_cast_fp16")]; tensor reshape_100_shape_0 = const()[name = tensor("reshape_100_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_100_cast_fp16 = reshape(shape = reshape_100_shape_0, x = input_225_cast_fp16)[name = tensor("reshape_100_cast_fp16")]; tensor reduce_mean_75_axes_0 = const()[name = tensor("reduce_mean_75_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_75_keep_dims_0 = const()[name = tensor("reduce_mean_75_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_75_cast_fp16 = reduce_mean(axes = reduce_mean_75_axes_0, keep_dims = reduce_mean_75_keep_dims_0, x = reshape_100_cast_fp16)[name = tensor("reduce_mean_75_cast_fp16")]; tensor sub_50_cast_fp16 = sub(x = reshape_100_cast_fp16, y = reduce_mean_75_cast_fp16)[name = tensor("sub_50_cast_fp16")]; tensor square_25_cast_fp16 = square(x = sub_50_cast_fp16)[name = tensor("square_25_cast_fp16")]; tensor reduce_mean_77_axes_0 = const()[name = tensor("reduce_mean_77_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_77_keep_dims_0 = const()[name = tensor("reduce_mean_77_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_77_cast_fp16 = reduce_mean(axes = reduce_mean_77_axes_0, keep_dims = reduce_mean_77_keep_dims_0, x = square_25_cast_fp16)[name = tensor("reduce_mean_77_cast_fp16")]; tensor add_50_y_0_to_fp16 = const()[name = tensor("add_50_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_50_cast_fp16 = add(x = reduce_mean_77_cast_fp16, y = add_50_y_0_to_fp16)[name = tensor("add_50_cast_fp16")]; tensor sqrt_25_cast_fp16 = sqrt(x = add_50_cast_fp16)[name = tensor("sqrt_25_cast_fp16")]; tensor real_div_25_cast_fp16 = real_div(x = sub_50_cast_fp16, y = sqrt_25_cast_fp16)[name = tensor("real_div_25_cast_fp16")]; tensor reshape_101_shape_0 = const()[name = tensor("reshape_101_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_101_cast_fp16 = reshape(shape = reshape_101_shape_0, x = real_div_25_cast_fp16)[name = tensor("reshape_101_cast_fp16")]; tensor add_51_gamma_0_to_fp16 = const()[name = tensor("add_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(635177024)))]; tensor add_51_beta_0_to_fp16 = const()[name = tensor("add_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(635179648)))]; tensor add_51_epsilon_0_to_fp16 = const()[name = tensor("add_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_51_cast_fp16 = batch_norm(beta = add_51_beta_0_to_fp16, epsilon = add_51_epsilon_0_to_fp16, gamma = add_51_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_101_cast_fp16)[name = tensor("add_51_cast_fp16")]; tensor input_229_cast_fp16 = silu(x = add_51_cast_fp16)[name = tensor("input_229_cast_fp16")]; tensor var_6307 = const()[name = tensor("op_6307"), val = tensor([1, 1])]; tensor var_6309 = const()[name = tensor("op_6309"), val = tensor([1, 1])]; tensor hidden_states_135_pad_type_0 = const()[name = tensor("hidden_states_135_pad_type_0"), val = tensor("custom")]; tensor hidden_states_135_pad_0 = const()[name = tensor("hidden_states_135_pad_0"), val = tensor([1, 1, 1, 1])]; tensor mid_block_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("mid_block_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(635182272)))]; tensor mid_block_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("mid_block_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(664673536)))]; tensor hidden_states_135_cast_fp16 = conv(bias = mid_block_resnets_1_conv1_bias_to_fp16, dilations = var_6309, groups = var_5762, pad = hidden_states_135_pad_0, pad_type = hidden_states_135_pad_type_0, strides = var_6307, weight = mid_block_resnets_1_conv1_weight_to_fp16, x = input_229_cast_fp16)[name = tensor("hidden_states_135_cast_fp16")]; tensor var_6315 = const()[name = tensor("op_6315"), val = tensor([1, 1])]; tensor var_6317 = const()[name = tensor("op_6317"), val = tensor([1, 1])]; tensor temb_19_pad_type_0 = const()[name = tensor("temb_19_pad_type_0"), val = tensor("custom")]; tensor temb_19_pad_0 = const()[name = tensor("temb_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor mid_block_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("mid_block_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(664676160)))]; tensor mid_block_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("mid_block_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(667953024)))]; tensor temb_19_cast_fp16 = conv(bias = mid_block_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_6317, groups = var_5762, pad = temb_19_pad_0, pad_type = temb_19_pad_type_0, strides = var_6315, weight = mid_block_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_19_cast_fp16")]; tensor input_233_cast_fp16 = add(x = hidden_states_135_cast_fp16, y = temb_19_cast_fp16)[name = tensor("input_233_cast_fp16")]; tensor reshape_104_shape_0 = const()[name = tensor("reshape_104_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_104_cast_fp16 = reshape(shape = reshape_104_shape_0, x = input_233_cast_fp16)[name = tensor("reshape_104_cast_fp16")]; tensor reduce_mean_78_axes_0 = const()[name = tensor("reduce_mean_78_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_78_keep_dims_0 = const()[name = tensor("reduce_mean_78_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_78_cast_fp16 = reduce_mean(axes = reduce_mean_78_axes_0, keep_dims = reduce_mean_78_keep_dims_0, x = reshape_104_cast_fp16)[name = tensor("reduce_mean_78_cast_fp16")]; tensor sub_52_cast_fp16 = sub(x = reshape_104_cast_fp16, y = reduce_mean_78_cast_fp16)[name = tensor("sub_52_cast_fp16")]; tensor square_26_cast_fp16 = square(x = sub_52_cast_fp16)[name = tensor("square_26_cast_fp16")]; tensor reduce_mean_80_axes_0 = const()[name = tensor("reduce_mean_80_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_80_keep_dims_0 = const()[name = tensor("reduce_mean_80_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_80_cast_fp16 = reduce_mean(axes = reduce_mean_80_axes_0, keep_dims = reduce_mean_80_keep_dims_0, x = square_26_cast_fp16)[name = tensor("reduce_mean_80_cast_fp16")]; tensor add_52_y_0_to_fp16 = const()[name = tensor("add_52_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_52_cast_fp16 = add(x = reduce_mean_80_cast_fp16, y = add_52_y_0_to_fp16)[name = tensor("add_52_cast_fp16")]; tensor sqrt_26_cast_fp16 = sqrt(x = add_52_cast_fp16)[name = tensor("sqrt_26_cast_fp16")]; tensor real_div_26_cast_fp16 = real_div(x = sub_52_cast_fp16, y = sqrt_26_cast_fp16)[name = tensor("real_div_26_cast_fp16")]; tensor reshape_105_shape_0 = const()[name = tensor("reshape_105_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_105_cast_fp16 = reshape(shape = reshape_105_shape_0, x = real_div_26_cast_fp16)[name = tensor("reshape_105_cast_fp16")]; tensor add_53_gamma_0_to_fp16 = const()[name = tensor("add_53_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(667955648)))]; tensor add_53_beta_0_to_fp16 = const()[name = tensor("add_53_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(667958272)))]; tensor add_53_epsilon_0_to_fp16 = const()[name = tensor("add_53_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_53_cast_fp16 = batch_norm(beta = add_53_beta_0_to_fp16, epsilon = add_53_epsilon_0_to_fp16, gamma = add_53_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_105_cast_fp16)[name = tensor("add_53_cast_fp16")]; tensor input_237_cast_fp16 = silu(x = add_53_cast_fp16)[name = tensor("input_237_cast_fp16")]; tensor var_6327 = const()[name = tensor("op_6327"), val = tensor([1, 1])]; tensor var_6329 = const()[name = tensor("op_6329"), val = tensor([1, 1])]; tensor hidden_states_137_pad_type_0 = const()[name = tensor("hidden_states_137_pad_type_0"), val = tensor("custom")]; tensor hidden_states_137_pad_0 = const()[name = tensor("hidden_states_137_pad_0"), val = tensor([1, 1, 1, 1])]; tensor mid_block_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("mid_block_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(667960896)))]; tensor mid_block_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("mid_block_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(697452160)))]; tensor hidden_states_137_cast_fp16 = conv(bias = mid_block_resnets_1_conv2_bias_to_fp16, dilations = var_6329, groups = var_5762, pad = hidden_states_137_pad_0, pad_type = hidden_states_137_pad_type_0, strides = var_6327, weight = mid_block_resnets_1_conv2_weight_to_fp16, x = input_237_cast_fp16)[name = tensor("hidden_states_137_cast_fp16")]; tensor hidden_states_139_cast_fp16 = add(x = input_225_cast_fp16, y = hidden_states_137_cast_fp16)[name = tensor("hidden_states_139_cast_fp16")]; tensor var_6340 = const()[name = tensor("op_6340"), val = tensor(1)]; tensor input_239_interleave_0 = const()[name = tensor("input_239_interleave_0"), val = tensor(false)]; tensor input_239_cast_fp16 = concat(axis = var_6340, interleave = input_239_interleave_0, values = (hidden_states_139_cast_fp16, input_199_cast_fp16))[name = tensor("input_239_cast_fp16")]; tensor reshape_108_shape_0 = const()[name = tensor("reshape_108_shape_0"), val = tensor([2, 32, 80, 8, 8])]; tensor reshape_108_cast_fp16 = reshape(shape = reshape_108_shape_0, x = input_239_cast_fp16)[name = tensor("reshape_108_cast_fp16")]; tensor reduce_mean_81_axes_0 = const()[name = tensor("reduce_mean_81_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_81_keep_dims_0 = const()[name = tensor("reduce_mean_81_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_81_cast_fp16 = reduce_mean(axes = reduce_mean_81_axes_0, keep_dims = reduce_mean_81_keep_dims_0, x = reshape_108_cast_fp16)[name = tensor("reduce_mean_81_cast_fp16")]; tensor sub_54_cast_fp16 = sub(x = reshape_108_cast_fp16, y = reduce_mean_81_cast_fp16)[name = tensor("sub_54_cast_fp16")]; tensor square_27_cast_fp16 = square(x = sub_54_cast_fp16)[name = tensor("square_27_cast_fp16")]; tensor reduce_mean_83_axes_0 = const()[name = tensor("reduce_mean_83_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_83_keep_dims_0 = const()[name = tensor("reduce_mean_83_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_83_cast_fp16 = reduce_mean(axes = reduce_mean_83_axes_0, keep_dims = reduce_mean_83_keep_dims_0, x = square_27_cast_fp16)[name = tensor("reduce_mean_83_cast_fp16")]; tensor add_54_y_0_to_fp16 = const()[name = tensor("add_54_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_54_cast_fp16 = add(x = reduce_mean_83_cast_fp16, y = add_54_y_0_to_fp16)[name = tensor("add_54_cast_fp16")]; tensor sqrt_27_cast_fp16 = sqrt(x = add_54_cast_fp16)[name = tensor("sqrt_27_cast_fp16")]; tensor real_div_27_cast_fp16 = real_div(x = sub_54_cast_fp16, y = sqrt_27_cast_fp16)[name = tensor("real_div_27_cast_fp16")]; tensor reshape_109_shape_0 = const()[name = tensor("reshape_109_shape_0"), val = tensor([2, 2560, 8, 8])]; tensor reshape_109_cast_fp16 = reshape(shape = reshape_109_shape_0, x = real_div_27_cast_fp16)[name = tensor("reshape_109_cast_fp16")]; tensor add_55_mean_0_to_fp16 = const()[name = tensor("add_55_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(697454784)))]; tensor add_55_variance_0_to_fp16 = const()[name = tensor("add_55_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(697459968)))]; tensor add_55_gamma_0_to_fp16 = const()[name = tensor("add_55_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(697465152)))]; tensor add_55_beta_0_to_fp16 = const()[name = tensor("add_55_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(697470336)))]; tensor add_55_epsilon_0_to_fp16 = const()[name = tensor("add_55_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_55_cast_fp16 = batch_norm(beta = add_55_beta_0_to_fp16, epsilon = add_55_epsilon_0_to_fp16, gamma = add_55_gamma_0_to_fp16, mean = add_55_mean_0_to_fp16, variance = add_55_variance_0_to_fp16, x = reshape_109_cast_fp16)[name = tensor("add_55_cast_fp16")]; tensor input_243_cast_fp16 = silu(x = add_55_cast_fp16)[name = tensor("input_243_cast_fp16")]; tensor var_6363 = const()[name = tensor("op_6363"), val = tensor([1, 1])]; tensor var_6365 = const()[name = tensor("op_6365"), val = tensor([1, 1])]; tensor hidden_states_141_pad_type_0 = const()[name = tensor("hidden_states_141_pad_type_0"), val = tensor("custom")]; tensor hidden_states_141_pad_0 = const()[name = tensor("hidden_states_141_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_0_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(697475520)))]; tensor up_blocks_0_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(756457984)))]; tensor hidden_states_141_cast_fp16 = conv(bias = up_blocks_0_resnets_0_conv1_bias_to_fp16, dilations = var_6365, groups = var_6340, pad = hidden_states_141_pad_0, pad_type = hidden_states_141_pad_type_0, strides = var_6363, weight = up_blocks_0_resnets_0_conv1_weight_to_fp16, x = input_243_cast_fp16)[name = tensor("hidden_states_141_cast_fp16")]; tensor var_6371 = const()[name = tensor("op_6371"), val = tensor([1, 1])]; tensor var_6373 = const()[name = tensor("op_6373"), val = tensor([1, 1])]; tensor temb_21_pad_type_0 = const()[name = tensor("temb_21_pad_type_0"), val = tensor("custom")]; tensor temb_21_pad_0 = const()[name = tensor("temb_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_0_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(756460608)))]; tensor up_blocks_0_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(759737472)))]; tensor temb_21_cast_fp16 = conv(bias = up_blocks_0_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_6373, groups = var_6340, pad = temb_21_pad_0, pad_type = temb_21_pad_type_0, strides = var_6371, weight = up_blocks_0_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_21_cast_fp16")]; tensor input_247_cast_fp16 = add(x = hidden_states_141_cast_fp16, y = temb_21_cast_fp16)[name = tensor("input_247_cast_fp16")]; tensor reshape_112_shape_0 = const()[name = tensor("reshape_112_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_112_cast_fp16 = reshape(shape = reshape_112_shape_0, x = input_247_cast_fp16)[name = tensor("reshape_112_cast_fp16")]; tensor reduce_mean_84_axes_0 = const()[name = tensor("reduce_mean_84_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_84_keep_dims_0 = const()[name = tensor("reduce_mean_84_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_84_cast_fp16 = reduce_mean(axes = reduce_mean_84_axes_0, keep_dims = reduce_mean_84_keep_dims_0, x = reshape_112_cast_fp16)[name = tensor("reduce_mean_84_cast_fp16")]; tensor sub_56_cast_fp16 = sub(x = reshape_112_cast_fp16, y = reduce_mean_84_cast_fp16)[name = tensor("sub_56_cast_fp16")]; tensor square_28_cast_fp16 = square(x = sub_56_cast_fp16)[name = tensor("square_28_cast_fp16")]; tensor reduce_mean_86_axes_0 = const()[name = tensor("reduce_mean_86_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_86_keep_dims_0 = const()[name = tensor("reduce_mean_86_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_86_cast_fp16 = reduce_mean(axes = reduce_mean_86_axes_0, keep_dims = reduce_mean_86_keep_dims_0, x = square_28_cast_fp16)[name = tensor("reduce_mean_86_cast_fp16")]; tensor add_56_y_0_to_fp16 = const()[name = tensor("add_56_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_56_cast_fp16 = add(x = reduce_mean_86_cast_fp16, y = add_56_y_0_to_fp16)[name = tensor("add_56_cast_fp16")]; tensor sqrt_28_cast_fp16 = sqrt(x = add_56_cast_fp16)[name = tensor("sqrt_28_cast_fp16")]; tensor real_div_28_cast_fp16 = real_div(x = sub_56_cast_fp16, y = sqrt_28_cast_fp16)[name = tensor("real_div_28_cast_fp16")]; tensor reshape_113_shape_0 = const()[name = tensor("reshape_113_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_113_cast_fp16 = reshape(shape = reshape_113_shape_0, x = real_div_28_cast_fp16)[name = tensor("reshape_113_cast_fp16")]; tensor add_57_gamma_0_to_fp16 = const()[name = tensor("add_57_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(759740096)))]; tensor add_57_beta_0_to_fp16 = const()[name = tensor("add_57_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(759742720)))]; tensor add_57_epsilon_0_to_fp16 = const()[name = tensor("add_57_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_57_cast_fp16 = batch_norm(beta = add_57_beta_0_to_fp16, epsilon = add_57_epsilon_0_to_fp16, gamma = add_57_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_113_cast_fp16)[name = tensor("add_57_cast_fp16")]; tensor input_251_cast_fp16 = silu(x = add_57_cast_fp16)[name = tensor("input_251_cast_fp16")]; tensor var_6383 = const()[name = tensor("op_6383"), val = tensor([1, 1])]; tensor var_6385 = const()[name = tensor("op_6385"), val = tensor([1, 1])]; tensor hidden_states_143_pad_type_0 = const()[name = tensor("hidden_states_143_pad_type_0"), val = tensor("custom")]; tensor hidden_states_143_pad_0 = const()[name = tensor("hidden_states_143_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_0_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(759745344)))]; tensor up_blocks_0_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(789236608)))]; tensor hidden_states_143_cast_fp16 = conv(bias = up_blocks_0_resnets_0_conv2_bias_to_fp16, dilations = var_6385, groups = var_6340, pad = hidden_states_143_pad_0, pad_type = hidden_states_143_pad_type_0, strides = var_6383, weight = up_blocks_0_resnets_0_conv2_weight_to_fp16, x = input_251_cast_fp16)[name = tensor("hidden_states_143_cast_fp16")]; tensor var_6390 = const()[name = tensor("op_6390"), val = tensor([1, 1])]; tensor var_6392 = const()[name = tensor("op_6392"), val = tensor([1, 1])]; tensor x_5_pad_type_0 = const()[name = tensor("x_5_pad_type_0"), val = tensor("custom")]; tensor x_5_pad_0 = const()[name = tensor("x_5_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_0_resnets_0_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_0_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(789239232)))]; tensor up_blocks_0_resnets_0_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_0_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(795792896)))]; tensor x_5_cast_fp16 = conv(bias = up_blocks_0_resnets_0_conv_shortcut_bias_to_fp16, dilations = var_6392, groups = var_6340, pad = x_5_pad_0, pad_type = x_5_pad_type_0, strides = var_6390, weight = up_blocks_0_resnets_0_conv_shortcut_weight_to_fp16, x = input_239_cast_fp16)[name = tensor("x_5_cast_fp16")]; tensor hidden_states_145_cast_fp16 = add(x = x_5_cast_fp16, y = hidden_states_143_cast_fp16)[name = tensor("hidden_states_145_cast_fp16")]; tensor input_253_interleave_0 = const()[name = tensor("input_253_interleave_0"), val = tensor(false)]; tensor input_253_cast_fp16 = concat(axis = var_6340, interleave = input_253_interleave_0, values = (hidden_states_145_cast_fp16, input_185_cast_fp16))[name = tensor("input_253_cast_fp16")]; tensor reshape_116_shape_0 = const()[name = tensor("reshape_116_shape_0"), val = tensor([2, 32, 80, 8, 8])]; tensor reshape_116_cast_fp16 = reshape(shape = reshape_116_shape_0, x = input_253_cast_fp16)[name = tensor("reshape_116_cast_fp16")]; tensor reduce_mean_87_axes_0 = const()[name = tensor("reduce_mean_87_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_87_keep_dims_0 = const()[name = tensor("reduce_mean_87_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_87_cast_fp16 = reduce_mean(axes = reduce_mean_87_axes_0, keep_dims = reduce_mean_87_keep_dims_0, x = reshape_116_cast_fp16)[name = tensor("reduce_mean_87_cast_fp16")]; tensor sub_58_cast_fp16 = sub(x = reshape_116_cast_fp16, y = reduce_mean_87_cast_fp16)[name = tensor("sub_58_cast_fp16")]; tensor square_29_cast_fp16 = square(x = sub_58_cast_fp16)[name = tensor("square_29_cast_fp16")]; tensor reduce_mean_89_axes_0 = const()[name = tensor("reduce_mean_89_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_89_keep_dims_0 = const()[name = tensor("reduce_mean_89_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_89_cast_fp16 = reduce_mean(axes = reduce_mean_89_axes_0, keep_dims = reduce_mean_89_keep_dims_0, x = square_29_cast_fp16)[name = tensor("reduce_mean_89_cast_fp16")]; tensor add_58_y_0_to_fp16 = const()[name = tensor("add_58_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_58_cast_fp16 = add(x = reduce_mean_89_cast_fp16, y = add_58_y_0_to_fp16)[name = tensor("add_58_cast_fp16")]; tensor sqrt_29_cast_fp16 = sqrt(x = add_58_cast_fp16)[name = tensor("sqrt_29_cast_fp16")]; tensor real_div_29_cast_fp16 = real_div(x = sub_58_cast_fp16, y = sqrt_29_cast_fp16)[name = tensor("real_div_29_cast_fp16")]; tensor reshape_117_shape_0 = const()[name = tensor("reshape_117_shape_0"), val = tensor([2, 2560, 8, 8])]; tensor reshape_117_cast_fp16 = reshape(shape = reshape_117_shape_0, x = real_div_29_cast_fp16)[name = tensor("reshape_117_cast_fp16")]; tensor add_59_gamma_0_to_fp16 = const()[name = tensor("add_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(795795520)))]; tensor add_59_beta_0_to_fp16 = const()[name = tensor("add_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(795800704)))]; tensor add_59_epsilon_0_to_fp16 = const()[name = tensor("add_59_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_59_cast_fp16 = batch_norm(beta = add_59_beta_0_to_fp16, epsilon = add_59_epsilon_0_to_fp16, gamma = add_59_gamma_0_to_fp16, mean = add_55_mean_0_to_fp16, variance = add_55_variance_0_to_fp16, x = reshape_117_cast_fp16)[name = tensor("add_59_cast_fp16")]; tensor input_257_cast_fp16 = silu(x = add_59_cast_fp16)[name = tensor("input_257_cast_fp16")]; tensor var_6410 = const()[name = tensor("op_6410"), val = tensor([1, 1])]; tensor var_6412 = const()[name = tensor("op_6412"), val = tensor([1, 1])]; tensor hidden_states_147_pad_type_0 = const()[name = tensor("hidden_states_147_pad_type_0"), val = tensor("custom")]; tensor hidden_states_147_pad_0 = const()[name = tensor("hidden_states_147_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_0_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(795805888)))]; tensor up_blocks_0_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(854788352)))]; tensor hidden_states_147_cast_fp16 = conv(bias = up_blocks_0_resnets_1_conv1_bias_to_fp16, dilations = var_6412, groups = var_6340, pad = hidden_states_147_pad_0, pad_type = hidden_states_147_pad_type_0, strides = var_6410, weight = up_blocks_0_resnets_1_conv1_weight_to_fp16, x = input_257_cast_fp16)[name = tensor("hidden_states_147_cast_fp16")]; tensor var_6418 = const()[name = tensor("op_6418"), val = tensor([1, 1])]; tensor var_6420 = const()[name = tensor("op_6420"), val = tensor([1, 1])]; tensor temb_23_pad_type_0 = const()[name = tensor("temb_23_pad_type_0"), val = tensor("custom")]; tensor temb_23_pad_0 = const()[name = tensor("temb_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_0_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(854790976)))]; tensor up_blocks_0_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(858067840)))]; tensor temb_23_cast_fp16 = conv(bias = up_blocks_0_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_6420, groups = var_6340, pad = temb_23_pad_0, pad_type = temb_23_pad_type_0, strides = var_6418, weight = up_blocks_0_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_23_cast_fp16")]; tensor input_261_cast_fp16 = add(x = hidden_states_147_cast_fp16, y = temb_23_cast_fp16)[name = tensor("input_261_cast_fp16")]; tensor reshape_120_shape_0 = const()[name = tensor("reshape_120_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_120_cast_fp16 = reshape(shape = reshape_120_shape_0, x = input_261_cast_fp16)[name = tensor("reshape_120_cast_fp16")]; tensor reduce_mean_90_axes_0 = const()[name = tensor("reduce_mean_90_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_90_keep_dims_0 = const()[name = tensor("reduce_mean_90_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_90_cast_fp16 = reduce_mean(axes = reduce_mean_90_axes_0, keep_dims = reduce_mean_90_keep_dims_0, x = reshape_120_cast_fp16)[name = tensor("reduce_mean_90_cast_fp16")]; tensor sub_60_cast_fp16 = sub(x = reshape_120_cast_fp16, y = reduce_mean_90_cast_fp16)[name = tensor("sub_60_cast_fp16")]; tensor square_30_cast_fp16 = square(x = sub_60_cast_fp16)[name = tensor("square_30_cast_fp16")]; tensor reduce_mean_92_axes_0 = const()[name = tensor("reduce_mean_92_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_92_keep_dims_0 = const()[name = tensor("reduce_mean_92_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_92_cast_fp16 = reduce_mean(axes = reduce_mean_92_axes_0, keep_dims = reduce_mean_92_keep_dims_0, x = square_30_cast_fp16)[name = tensor("reduce_mean_92_cast_fp16")]; tensor add_60_y_0_to_fp16 = const()[name = tensor("add_60_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_60_cast_fp16 = add(x = reduce_mean_92_cast_fp16, y = add_60_y_0_to_fp16)[name = tensor("add_60_cast_fp16")]; tensor sqrt_30_cast_fp16 = sqrt(x = add_60_cast_fp16)[name = tensor("sqrt_30_cast_fp16")]; tensor real_div_30_cast_fp16 = real_div(x = sub_60_cast_fp16, y = sqrt_30_cast_fp16)[name = tensor("real_div_30_cast_fp16")]; tensor reshape_121_shape_0 = const()[name = tensor("reshape_121_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_121_cast_fp16 = reshape(shape = reshape_121_shape_0, x = real_div_30_cast_fp16)[name = tensor("reshape_121_cast_fp16")]; tensor add_61_gamma_0_to_fp16 = const()[name = tensor("add_61_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(858070464)))]; tensor add_61_beta_0_to_fp16 = const()[name = tensor("add_61_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(858073088)))]; tensor add_61_epsilon_0_to_fp16 = const()[name = tensor("add_61_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_61_cast_fp16 = batch_norm(beta = add_61_beta_0_to_fp16, epsilon = add_61_epsilon_0_to_fp16, gamma = add_61_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_121_cast_fp16)[name = tensor("add_61_cast_fp16")]; tensor input_265_cast_fp16 = silu(x = add_61_cast_fp16)[name = tensor("input_265_cast_fp16")]; tensor var_6430 = const()[name = tensor("op_6430"), val = tensor([1, 1])]; tensor var_6432 = const()[name = tensor("op_6432"), val = tensor([1, 1])]; tensor hidden_states_149_pad_type_0 = const()[name = tensor("hidden_states_149_pad_type_0"), val = tensor("custom")]; tensor hidden_states_149_pad_0 = const()[name = tensor("hidden_states_149_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_0_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(858075712)))]; tensor up_blocks_0_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(887566976)))]; tensor hidden_states_149_cast_fp16 = conv(bias = up_blocks_0_resnets_1_conv2_bias_to_fp16, dilations = var_6432, groups = var_6340, pad = hidden_states_149_pad_0, pad_type = hidden_states_149_pad_type_0, strides = var_6430, weight = up_blocks_0_resnets_1_conv2_weight_to_fp16, x = input_265_cast_fp16)[name = tensor("hidden_states_149_cast_fp16")]; tensor var_6437 = const()[name = tensor("op_6437"), val = tensor([1, 1])]; tensor var_6439 = const()[name = tensor("op_6439"), val = tensor([1, 1])]; tensor x_7_pad_type_0 = const()[name = tensor("x_7_pad_type_0"), val = tensor("custom")]; tensor x_7_pad_0 = const()[name = tensor("x_7_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_0_resnets_1_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_1_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(887569600)))]; tensor up_blocks_0_resnets_1_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_1_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(894123264)))]; tensor x_7_cast_fp16 = conv(bias = up_blocks_0_resnets_1_conv_shortcut_bias_to_fp16, dilations = var_6439, groups = var_6340, pad = x_7_pad_0, pad_type = x_7_pad_type_0, strides = var_6437, weight = up_blocks_0_resnets_1_conv_shortcut_weight_to_fp16, x = input_253_cast_fp16)[name = tensor("x_7_cast_fp16")]; tensor hidden_states_151_cast_fp16 = add(x = x_7_cast_fp16, y = hidden_states_149_cast_fp16)[name = tensor("hidden_states_151_cast_fp16")]; tensor input_267_interleave_0 = const()[name = tensor("input_267_interleave_0"), val = tensor(false)]; tensor input_267_cast_fp16 = concat(axis = var_6340, interleave = input_267_interleave_0, values = (hidden_states_151_cast_fp16, input_171_cast_fp16))[name = tensor("input_267_cast_fp16")]; tensor reshape_124_shape_0 = const()[name = tensor("reshape_124_shape_0"), val = tensor([2, 32, 80, 8, 8])]; tensor reshape_124_cast_fp16 = reshape(shape = reshape_124_shape_0, x = input_267_cast_fp16)[name = tensor("reshape_124_cast_fp16")]; tensor reduce_mean_93_axes_0 = const()[name = tensor("reduce_mean_93_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_93_keep_dims_0 = const()[name = tensor("reduce_mean_93_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_93_cast_fp16 = reduce_mean(axes = reduce_mean_93_axes_0, keep_dims = reduce_mean_93_keep_dims_0, x = reshape_124_cast_fp16)[name = tensor("reduce_mean_93_cast_fp16")]; tensor sub_62_cast_fp16 = sub(x = reshape_124_cast_fp16, y = reduce_mean_93_cast_fp16)[name = tensor("sub_62_cast_fp16")]; tensor square_31_cast_fp16 = square(x = sub_62_cast_fp16)[name = tensor("square_31_cast_fp16")]; tensor reduce_mean_95_axes_0 = const()[name = tensor("reduce_mean_95_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_95_keep_dims_0 = const()[name = tensor("reduce_mean_95_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_95_cast_fp16 = reduce_mean(axes = reduce_mean_95_axes_0, keep_dims = reduce_mean_95_keep_dims_0, x = square_31_cast_fp16)[name = tensor("reduce_mean_95_cast_fp16")]; tensor add_62_y_0_to_fp16 = const()[name = tensor("add_62_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_62_cast_fp16 = add(x = reduce_mean_95_cast_fp16, y = add_62_y_0_to_fp16)[name = tensor("add_62_cast_fp16")]; tensor sqrt_31_cast_fp16 = sqrt(x = add_62_cast_fp16)[name = tensor("sqrt_31_cast_fp16")]; tensor real_div_31_cast_fp16 = real_div(x = sub_62_cast_fp16, y = sqrt_31_cast_fp16)[name = tensor("real_div_31_cast_fp16")]; tensor reshape_125_shape_0 = const()[name = tensor("reshape_125_shape_0"), val = tensor([2, 2560, 8, 8])]; tensor reshape_125_cast_fp16 = reshape(shape = reshape_125_shape_0, x = real_div_31_cast_fp16)[name = tensor("reshape_125_cast_fp16")]; tensor add_63_gamma_0_to_fp16 = const()[name = tensor("add_63_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(894125888)))]; tensor add_63_beta_0_to_fp16 = const()[name = tensor("add_63_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(894131072)))]; tensor add_63_epsilon_0_to_fp16 = const()[name = tensor("add_63_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_63_cast_fp16 = batch_norm(beta = add_63_beta_0_to_fp16, epsilon = add_63_epsilon_0_to_fp16, gamma = add_63_gamma_0_to_fp16, mean = add_55_mean_0_to_fp16, variance = add_55_variance_0_to_fp16, x = reshape_125_cast_fp16)[name = tensor("add_63_cast_fp16")]; tensor input_271_cast_fp16 = silu(x = add_63_cast_fp16)[name = tensor("input_271_cast_fp16")]; tensor var_6457 = const()[name = tensor("op_6457"), val = tensor([1, 1])]; tensor var_6459 = const()[name = tensor("op_6459"), val = tensor([1, 1])]; tensor hidden_states_153_pad_type_0 = const()[name = tensor("hidden_states_153_pad_type_0"), val = tensor("custom")]; tensor hidden_states_153_pad_0 = const()[name = tensor("hidden_states_153_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_0_resnets_2_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_2_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(894136256)))]; tensor up_blocks_0_resnets_2_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_2_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(953118720)))]; tensor hidden_states_153_cast_fp16 = conv(bias = up_blocks_0_resnets_2_conv1_bias_to_fp16, dilations = var_6459, groups = var_6340, pad = hidden_states_153_pad_0, pad_type = hidden_states_153_pad_type_0, strides = var_6457, weight = up_blocks_0_resnets_2_conv1_weight_to_fp16, x = input_271_cast_fp16)[name = tensor("hidden_states_153_cast_fp16")]; tensor var_6465 = const()[name = tensor("op_6465"), val = tensor([1, 1])]; tensor var_6467 = const()[name = tensor("op_6467"), val = tensor([1, 1])]; tensor temb_25_pad_type_0 = const()[name = tensor("temb_25_pad_type_0"), val = tensor("custom")]; tensor temb_25_pad_0 = const()[name = tensor("temb_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_0_resnets_2_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_2_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(953121344)))]; tensor up_blocks_0_resnets_2_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_2_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(956398208)))]; tensor temb_25_cast_fp16 = conv(bias = up_blocks_0_resnets_2_time_emb_proj_bias_to_fp16, dilations = var_6467, groups = var_6340, pad = temb_25_pad_0, pad_type = temb_25_pad_type_0, strides = var_6465, weight = up_blocks_0_resnets_2_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_25_cast_fp16")]; tensor input_275_cast_fp16 = add(x = hidden_states_153_cast_fp16, y = temb_25_cast_fp16)[name = tensor("input_275_cast_fp16")]; tensor reshape_128_shape_0 = const()[name = tensor("reshape_128_shape_0"), val = tensor([2, 32, 40, 8, 8])]; tensor reshape_128_cast_fp16 = reshape(shape = reshape_128_shape_0, x = input_275_cast_fp16)[name = tensor("reshape_128_cast_fp16")]; tensor reduce_mean_96_axes_0 = const()[name = tensor("reduce_mean_96_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_96_keep_dims_0 = const()[name = tensor("reduce_mean_96_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_96_cast_fp16 = reduce_mean(axes = reduce_mean_96_axes_0, keep_dims = reduce_mean_96_keep_dims_0, x = reshape_128_cast_fp16)[name = tensor("reduce_mean_96_cast_fp16")]; tensor sub_64_cast_fp16 = sub(x = reshape_128_cast_fp16, y = reduce_mean_96_cast_fp16)[name = tensor("sub_64_cast_fp16")]; tensor square_32_cast_fp16 = square(x = sub_64_cast_fp16)[name = tensor("square_32_cast_fp16")]; tensor reduce_mean_98_axes_0 = const()[name = tensor("reduce_mean_98_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_98_keep_dims_0 = const()[name = tensor("reduce_mean_98_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_98_cast_fp16 = reduce_mean(axes = reduce_mean_98_axes_0, keep_dims = reduce_mean_98_keep_dims_0, x = square_32_cast_fp16)[name = tensor("reduce_mean_98_cast_fp16")]; tensor add_64_y_0_to_fp16 = const()[name = tensor("add_64_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_64_cast_fp16 = add(x = reduce_mean_98_cast_fp16, y = add_64_y_0_to_fp16)[name = tensor("add_64_cast_fp16")]; tensor sqrt_32_cast_fp16 = sqrt(x = add_64_cast_fp16)[name = tensor("sqrt_32_cast_fp16")]; tensor real_div_32_cast_fp16 = real_div(x = sub_64_cast_fp16, y = sqrt_32_cast_fp16)[name = tensor("real_div_32_cast_fp16")]; tensor reshape_129_shape_0 = const()[name = tensor("reshape_129_shape_0"), val = tensor([2, 1280, 8, 8])]; tensor reshape_129_cast_fp16 = reshape(shape = reshape_129_shape_0, x = real_div_32_cast_fp16)[name = tensor("reshape_129_cast_fp16")]; tensor add_65_gamma_0_to_fp16 = const()[name = tensor("add_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(956400832)))]; tensor add_65_beta_0_to_fp16 = const()[name = tensor("add_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(956403456)))]; tensor add_65_epsilon_0_to_fp16 = const()[name = tensor("add_65_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_65_cast_fp16 = batch_norm(beta = add_65_beta_0_to_fp16, epsilon = add_65_epsilon_0_to_fp16, gamma = add_65_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_129_cast_fp16)[name = tensor("add_65_cast_fp16")]; tensor input_279_cast_fp16 = silu(x = add_65_cast_fp16)[name = tensor("input_279_cast_fp16")]; tensor var_6477 = const()[name = tensor("op_6477"), val = tensor([1, 1])]; tensor var_6479 = const()[name = tensor("op_6479"), val = tensor([1, 1])]; tensor hidden_states_155_pad_type_0 = const()[name = tensor("hidden_states_155_pad_type_0"), val = tensor("custom")]; tensor hidden_states_155_pad_0 = const()[name = tensor("hidden_states_155_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_0_resnets_2_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_2_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(956406080)))]; tensor up_blocks_0_resnets_2_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_2_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(985897344)))]; tensor hidden_states_155_cast_fp16 = conv(bias = up_blocks_0_resnets_2_conv2_bias_to_fp16, dilations = var_6479, groups = var_6340, pad = hidden_states_155_pad_0, pad_type = hidden_states_155_pad_type_0, strides = var_6477, weight = up_blocks_0_resnets_2_conv2_weight_to_fp16, x = input_279_cast_fp16)[name = tensor("hidden_states_155_cast_fp16")]; tensor var_6484 = const()[name = tensor("op_6484"), val = tensor([1, 1])]; tensor var_6486 = const()[name = tensor("op_6486"), val = tensor([1, 1])]; tensor x_9_pad_type_0 = const()[name = tensor("x_9_pad_type_0"), val = tensor("custom")]; tensor x_9_pad_0 = const()[name = tensor("x_9_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_0_resnets_2_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_0_resnets_2_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(985899968)))]; tensor up_blocks_0_resnets_2_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_0_resnets_2_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(992453632)))]; tensor x_9_cast_fp16 = conv(bias = up_blocks_0_resnets_2_conv_shortcut_bias_to_fp16, dilations = var_6486, groups = var_6340, pad = x_9_pad_0, pad_type = x_9_pad_type_0, strides = var_6484, weight = up_blocks_0_resnets_2_conv_shortcut_weight_to_fp16, x = input_267_cast_fp16)[name = tensor("x_9_cast_fp16")]; tensor input_281_cast_fp16 = add(x = x_9_cast_fp16, y = hidden_states_155_cast_fp16)[name = tensor("input_281_cast_fp16")]; tensor input_283_scale_factor_height_0 = const()[name = tensor("input_283_scale_factor_height_0"), val = tensor(0x1p+1)]; tensor input_283_scale_factor_width_0 = const()[name = tensor("input_283_scale_factor_width_0"), val = tensor(0x1p+1)]; tensor input_283_cast_fp16 = upsample_nearest_neighbor(scale_factor_height = input_283_scale_factor_height_0, scale_factor_width = input_283_scale_factor_width_0, x = input_281_cast_fp16)[name = tensor("input_283_cast_fp16")]; tensor var_6495 = const()[name = tensor("op_6495"), val = tensor([1, 1])]; tensor var_6497 = const()[name = tensor("op_6497"), val = tensor([1, 1])]; tensor hidden_states_157_pad_type_0 = const()[name = tensor("hidden_states_157_pad_type_0"), val = tensor("custom")]; tensor hidden_states_157_pad_0 = const()[name = tensor("hidden_states_157_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_0_upsamplers_0_conv_weight_to_fp16 = const()[name = tensor("up_blocks_0_upsamplers_0_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(992456256)))]; tensor up_blocks_0_upsamplers_0_conv_bias_to_fp16 = const()[name = tensor("up_blocks_0_upsamplers_0_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1021947520)))]; tensor hidden_states_157_cast_fp16 = conv(bias = up_blocks_0_upsamplers_0_conv_bias_to_fp16, dilations = var_6497, groups = var_6340, pad = hidden_states_157_pad_0, pad_type = hidden_states_157_pad_type_0, strides = var_6495, weight = up_blocks_0_upsamplers_0_conv_weight_to_fp16, x = input_283_cast_fp16)[name = tensor("hidden_states_157_cast_fp16")]; tensor var_6522 = const()[name = tensor("op_6522"), val = tensor(1)]; tensor input_285_interleave_0 = const()[name = tensor("input_285_interleave_0"), val = tensor(false)]; tensor input_285_cast_fp16 = concat(axis = var_6522, interleave = input_285_interleave_0, values = (hidden_states_157_cast_fp16, input_169_cast_fp16))[name = tensor("input_285_cast_fp16")]; tensor reshape_132_shape_0 = const()[name = tensor("reshape_132_shape_0"), val = tensor([2, 32, 80, 16, 16])]; tensor reshape_132_cast_fp16 = reshape(shape = reshape_132_shape_0, x = input_285_cast_fp16)[name = tensor("reshape_132_cast_fp16")]; tensor reduce_mean_99_axes_0 = const()[name = tensor("reduce_mean_99_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_99_keep_dims_0 = const()[name = tensor("reduce_mean_99_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_99_cast_fp16 = reduce_mean(axes = reduce_mean_99_axes_0, keep_dims = reduce_mean_99_keep_dims_0, x = reshape_132_cast_fp16)[name = tensor("reduce_mean_99_cast_fp16")]; tensor sub_66_cast_fp16 = sub(x = reshape_132_cast_fp16, y = reduce_mean_99_cast_fp16)[name = tensor("sub_66_cast_fp16")]; tensor square_33_cast_fp16 = square(x = sub_66_cast_fp16)[name = tensor("square_33_cast_fp16")]; tensor reduce_mean_101_axes_0 = const()[name = tensor("reduce_mean_101_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_101_keep_dims_0 = const()[name = tensor("reduce_mean_101_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_101_cast_fp16 = reduce_mean(axes = reduce_mean_101_axes_0, keep_dims = reduce_mean_101_keep_dims_0, x = square_33_cast_fp16)[name = tensor("reduce_mean_101_cast_fp16")]; tensor add_66_y_0_to_fp16 = const()[name = tensor("add_66_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_66_cast_fp16 = add(x = reduce_mean_101_cast_fp16, y = add_66_y_0_to_fp16)[name = tensor("add_66_cast_fp16")]; tensor sqrt_33_cast_fp16 = sqrt(x = add_66_cast_fp16)[name = tensor("sqrt_33_cast_fp16")]; tensor real_div_33_cast_fp16 = real_div(x = sub_66_cast_fp16, y = sqrt_33_cast_fp16)[name = tensor("real_div_33_cast_fp16")]; tensor reshape_133_shape_0 = const()[name = tensor("reshape_133_shape_0"), val = tensor([2, 2560, 16, 16])]; tensor reshape_133_cast_fp16 = reshape(shape = reshape_133_shape_0, x = real_div_33_cast_fp16)[name = tensor("reshape_133_cast_fp16")]; tensor add_67_gamma_0_to_fp16 = const()[name = tensor("add_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1021950144)))]; tensor add_67_beta_0_to_fp16 = const()[name = tensor("add_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1021955328)))]; tensor add_67_epsilon_0_to_fp16 = const()[name = tensor("add_67_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_67_cast_fp16 = batch_norm(beta = add_67_beta_0_to_fp16, epsilon = add_67_epsilon_0_to_fp16, gamma = add_67_gamma_0_to_fp16, mean = add_55_mean_0_to_fp16, variance = add_55_variance_0_to_fp16, x = reshape_133_cast_fp16)[name = tensor("add_67_cast_fp16")]; tensor input_289_cast_fp16 = silu(x = add_67_cast_fp16)[name = tensor("input_289_cast_fp16")]; tensor var_6551 = const()[name = tensor("op_6551"), val = tensor([1, 1])]; tensor var_6553 = const()[name = tensor("op_6553"), val = tensor([1, 1])]; tensor hidden_states_159_pad_type_0 = const()[name = tensor("hidden_states_159_pad_type_0"), val = tensor("custom")]; tensor hidden_states_159_pad_0 = const()[name = tensor("hidden_states_159_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_1_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1021960512)))]; tensor up_blocks_1_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1080942976)))]; tensor hidden_states_159_cast_fp16 = conv(bias = up_blocks_1_resnets_0_conv1_bias_to_fp16, dilations = var_6553, groups = var_6522, pad = hidden_states_159_pad_0, pad_type = hidden_states_159_pad_type_0, strides = var_6551, weight = up_blocks_1_resnets_0_conv1_weight_to_fp16, x = input_289_cast_fp16)[name = tensor("hidden_states_159_cast_fp16")]; tensor var_6559 = const()[name = tensor("op_6559"), val = tensor([1, 1])]; tensor var_6561 = const()[name = tensor("op_6561"), val = tensor([1, 1])]; tensor temb_27_pad_type_0 = const()[name = tensor("temb_27_pad_type_0"), val = tensor("custom")]; tensor temb_27_pad_0 = const()[name = tensor("temb_27_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1080945600)))]; tensor up_blocks_1_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1084222464)))]; tensor temb_27_cast_fp16 = conv(bias = up_blocks_1_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_6561, groups = var_6522, pad = temb_27_pad_0, pad_type = temb_27_pad_type_0, strides = var_6559, weight = up_blocks_1_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_27_cast_fp16")]; tensor input_293_cast_fp16 = add(x = hidden_states_159_cast_fp16, y = temb_27_cast_fp16)[name = tensor("input_293_cast_fp16")]; tensor reshape_136_shape_0 = const()[name = tensor("reshape_136_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_136_cast_fp16 = reshape(shape = reshape_136_shape_0, x = input_293_cast_fp16)[name = tensor("reshape_136_cast_fp16")]; tensor reduce_mean_102_axes_0 = const()[name = tensor("reduce_mean_102_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_102_keep_dims_0 = const()[name = tensor("reduce_mean_102_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_102_cast_fp16 = reduce_mean(axes = reduce_mean_102_axes_0, keep_dims = reduce_mean_102_keep_dims_0, x = reshape_136_cast_fp16)[name = tensor("reduce_mean_102_cast_fp16")]; tensor sub_68_cast_fp16 = sub(x = reshape_136_cast_fp16, y = reduce_mean_102_cast_fp16)[name = tensor("sub_68_cast_fp16")]; tensor square_34_cast_fp16 = square(x = sub_68_cast_fp16)[name = tensor("square_34_cast_fp16")]; tensor reduce_mean_104_axes_0 = const()[name = tensor("reduce_mean_104_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_104_keep_dims_0 = const()[name = tensor("reduce_mean_104_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_104_cast_fp16 = reduce_mean(axes = reduce_mean_104_axes_0, keep_dims = reduce_mean_104_keep_dims_0, x = square_34_cast_fp16)[name = tensor("reduce_mean_104_cast_fp16")]; tensor add_68_y_0_to_fp16 = const()[name = tensor("add_68_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_68_cast_fp16 = add(x = reduce_mean_104_cast_fp16, y = add_68_y_0_to_fp16)[name = tensor("add_68_cast_fp16")]; tensor sqrt_34_cast_fp16 = sqrt(x = add_68_cast_fp16)[name = tensor("sqrt_34_cast_fp16")]; tensor real_div_34_cast_fp16 = real_div(x = sub_68_cast_fp16, y = sqrt_34_cast_fp16)[name = tensor("real_div_34_cast_fp16")]; tensor reshape_137_shape_0 = const()[name = tensor("reshape_137_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_137_cast_fp16 = reshape(shape = reshape_137_shape_0, x = real_div_34_cast_fp16)[name = tensor("reshape_137_cast_fp16")]; tensor add_69_gamma_0_to_fp16 = const()[name = tensor("add_69_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1084225088)))]; tensor add_69_beta_0_to_fp16 = const()[name = tensor("add_69_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1084227712)))]; tensor add_69_epsilon_0_to_fp16 = const()[name = tensor("add_69_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_69_cast_fp16 = batch_norm(beta = add_69_beta_0_to_fp16, epsilon = add_69_epsilon_0_to_fp16, gamma = add_69_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_137_cast_fp16)[name = tensor("add_69_cast_fp16")]; tensor input_297_cast_fp16 = silu(x = add_69_cast_fp16)[name = tensor("input_297_cast_fp16")]; tensor var_6571 = const()[name = tensor("op_6571"), val = tensor([1, 1])]; tensor var_6573 = const()[name = tensor("op_6573"), val = tensor([1, 1])]; tensor hidden_states_161_pad_type_0 = const()[name = tensor("hidden_states_161_pad_type_0"), val = tensor("custom")]; tensor hidden_states_161_pad_0 = const()[name = tensor("hidden_states_161_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_1_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1084230336)))]; tensor up_blocks_1_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1113721600)))]; tensor hidden_states_161_cast_fp16 = conv(bias = up_blocks_1_resnets_0_conv2_bias_to_fp16, dilations = var_6573, groups = var_6522, pad = hidden_states_161_pad_0, pad_type = hidden_states_161_pad_type_0, strides = var_6571, weight = up_blocks_1_resnets_0_conv2_weight_to_fp16, x = input_297_cast_fp16)[name = tensor("hidden_states_161_cast_fp16")]; tensor var_6578 = const()[name = tensor("op_6578"), val = tensor([1, 1])]; tensor var_6580 = const()[name = tensor("op_6580"), val = tensor([1, 1])]; tensor x_11_pad_type_0 = const()[name = tensor("x_11_pad_type_0"), val = tensor("custom")]; tensor x_11_pad_0 = const()[name = tensor("x_11_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_resnets_0_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_0_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1113724224)))]; tensor up_blocks_1_resnets_0_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_0_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1120277888)))]; tensor x_11_cast_fp16 = conv(bias = up_blocks_1_resnets_0_conv_shortcut_bias_to_fp16, dilations = var_6580, groups = var_6522, pad = x_11_pad_0, pad_type = x_11_pad_type_0, strides = var_6578, weight = up_blocks_1_resnets_0_conv_shortcut_weight_to_fp16, x = input_285_cast_fp16)[name = tensor("x_11_cast_fp16")]; tensor hidden_states_163_cast_fp16 = add(x = x_11_cast_fp16, y = hidden_states_161_cast_fp16)[name = tensor("hidden_states_163_cast_fp16")]; tensor reshape_140_shape_0 = const()[name = tensor("reshape_140_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_140_cast_fp16 = reshape(shape = reshape_140_shape_0, x = hidden_states_163_cast_fp16)[name = tensor("reshape_140_cast_fp16")]; tensor reduce_mean_105_axes_0 = const()[name = tensor("reduce_mean_105_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_105_keep_dims_0 = const()[name = tensor("reduce_mean_105_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_105_cast_fp16 = reduce_mean(axes = reduce_mean_105_axes_0, keep_dims = reduce_mean_105_keep_dims_0, x = reshape_140_cast_fp16)[name = tensor("reduce_mean_105_cast_fp16")]; tensor sub_70_cast_fp16 = sub(x = reshape_140_cast_fp16, y = reduce_mean_105_cast_fp16)[name = tensor("sub_70_cast_fp16")]; tensor square_35_cast_fp16 = square(x = sub_70_cast_fp16)[name = tensor("square_35_cast_fp16")]; tensor reduce_mean_107_axes_0 = const()[name = tensor("reduce_mean_107_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_107_keep_dims_0 = const()[name = tensor("reduce_mean_107_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_107_cast_fp16 = reduce_mean(axes = reduce_mean_107_axes_0, keep_dims = reduce_mean_107_keep_dims_0, x = square_35_cast_fp16)[name = tensor("reduce_mean_107_cast_fp16")]; tensor add_70_y_0_to_fp16 = const()[name = tensor("add_70_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_70_cast_fp16 = add(x = reduce_mean_107_cast_fp16, y = add_70_y_0_to_fp16)[name = tensor("add_70_cast_fp16")]; tensor sqrt_35_cast_fp16 = sqrt(x = add_70_cast_fp16)[name = tensor("sqrt_35_cast_fp16")]; tensor real_div_35_cast_fp16 = real_div(x = sub_70_cast_fp16, y = sqrt_35_cast_fp16)[name = tensor("real_div_35_cast_fp16")]; tensor reshape_141_shape_0 = const()[name = tensor("reshape_141_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_141_cast_fp16 = reshape(shape = reshape_141_shape_0, x = real_div_35_cast_fp16)[name = tensor("reshape_141_cast_fp16")]; tensor add_71_gamma_0_to_fp16 = const()[name = tensor("add_71_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1120280512)))]; tensor add_71_beta_0_to_fp16 = const()[name = tensor("add_71_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1120283136)))]; tensor add_71_epsilon_0_to_fp16 = const()[name = tensor("add_71_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_71_cast_fp16 = batch_norm(beta = add_71_beta_0_to_fp16, epsilon = add_71_epsilon_0_to_fp16, gamma = add_71_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_141_cast_fp16)[name = tensor("add_71_cast_fp16")]; tensor var_6600 = const()[name = tensor("op_6600"), val = tensor([1, 1])]; tensor var_6602 = const()[name = tensor("op_6602"), val = tensor([1, 1])]; tensor hidden_states_165_pad_type_0 = const()[name = tensor("hidden_states_165_pad_type_0"), val = tensor("custom")]; tensor hidden_states_165_pad_0 = const()[name = tensor("hidden_states_165_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1120285760)))]; tensor up_blocks_1_attentions_0_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1123562624)))]; tensor hidden_states_165_cast_fp16 = conv(bias = up_blocks_1_attentions_0_proj_in_bias_to_fp16, dilations = var_6602, groups = var_6522, pad = hidden_states_165_pad_0, pad_type = hidden_states_165_pad_type_0, strides = var_6600, weight = up_blocks_1_attentions_0_proj_in_weight_to_fp16, x = add_71_cast_fp16)[name = tensor("hidden_states_165_cast_fp16")]; tensor var_6607 = const()[name = tensor("op_6607"), val = tensor([2, 1280, 1, 256])]; tensor inputs_43_cast_fp16 = reshape(shape = var_6607, x = hidden_states_165_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; tensor hidden_states_167_axes_0 = const()[name = tensor("hidden_states_167_axes_0"), val = tensor([1])]; tensor hidden_states_167_gamma_0_to_fp16 = const()[name = tensor("hidden_states_167_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1123565248)))]; tensor hidden_states_167_beta_0_to_fp16 = const()[name = tensor("hidden_states_167_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1123567872)))]; tensor var_6623_to_fp16 = const()[name = tensor("op_6623_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_167_cast_fp16 = layer_norm(axes = hidden_states_167_axes_0, beta = hidden_states_167_beta_0_to_fp16, epsilon = var_6623_to_fp16, gamma = hidden_states_167_gamma_0_to_fp16, x = inputs_43_cast_fp16)[name = tensor("hidden_states_167_cast_fp16")]; tensor var_6638 = const()[name = tensor("op_6638"), val = tensor([1, 1])]; tensor var_6640 = const()[name = tensor("op_6640"), val = tensor([1, 1])]; tensor q_29_pad_type_0 = const()[name = tensor("q_29_pad_type_0"), val = tensor("custom")]; tensor q_29_pad_0 = const()[name = tensor("q_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1123570496)))]; tensor q_29_cast_fp16 = conv(dilations = var_6640, groups = var_6522, pad = q_29_pad_0, pad_type = q_29_pad_type_0, strides = var_6638, weight = up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_167_cast_fp16)[name = tensor("q_29_cast_fp16")]; tensor var_6644 = const()[name = tensor("op_6644"), val = tensor([1, 1])]; tensor var_6646 = const()[name = tensor("op_6646"), val = tensor([1, 1])]; tensor k_57_pad_type_0 = const()[name = tensor("k_57_pad_type_0"), val = tensor("custom")]; tensor k_57_pad_0 = const()[name = tensor("k_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1126847360)))]; tensor k_57_cast_fp16 = conv(dilations = var_6646, groups = var_6522, pad = k_57_pad_0, pad_type = k_57_pad_type_0, strides = var_6644, weight = up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_167_cast_fp16)[name = tensor("k_57_cast_fp16")]; tensor var_6650 = const()[name = tensor("op_6650"), val = tensor([1, 1])]; tensor var_6652 = const()[name = tensor("op_6652"), val = tensor([1, 1])]; tensor v_29_pad_type_0 = const()[name = tensor("v_29_pad_type_0"), val = tensor("custom")]; tensor v_29_pad_0 = const()[name = tensor("v_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1130124224)))]; tensor v_29_cast_fp16 = conv(dilations = var_6652, groups = var_6522, pad = v_29_pad_0, pad_type = v_29_pad_type_0, strides = var_6650, weight = up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_167_cast_fp16)[name = tensor("v_29_cast_fp16")]; tensor var_6656_begin_0 = const()[name = tensor("op_6656_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6656_end_0 = const()[name = tensor("op_6656_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_6656_end_mask_0 = const()[name = tensor("op_6656_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6656_cast_fp16 = slice_by_index(begin = var_6656_begin_0, end = var_6656_end_0, end_mask = var_6656_end_mask_0, x = q_29_cast_fp16)[name = tensor("op_6656_cast_fp16")]; tensor var_6660_begin_0 = const()[name = tensor("op_6660_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_6660_end_0 = const()[name = tensor("op_6660_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_6660_end_mask_0 = const()[name = tensor("op_6660_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6660_cast_fp16 = slice_by_index(begin = var_6660_begin_0, end = var_6660_end_0, end_mask = var_6660_end_mask_0, x = q_29_cast_fp16)[name = tensor("op_6660_cast_fp16")]; tensor var_6664_begin_0 = const()[name = tensor("op_6664_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6664_end_0 = const()[name = tensor("op_6664_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_6664_end_mask_0 = const()[name = tensor("op_6664_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6664_cast_fp16 = slice_by_index(begin = var_6664_begin_0, end = var_6664_end_0, end_mask = var_6664_end_mask_0, x = q_29_cast_fp16)[name = tensor("op_6664_cast_fp16")]; tensor var_6668_begin_0 = const()[name = tensor("op_6668_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_6668_end_0 = const()[name = tensor("op_6668_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_6668_end_mask_0 = const()[name = tensor("op_6668_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6668_cast_fp16 = slice_by_index(begin = var_6668_begin_0, end = var_6668_end_0, end_mask = var_6668_end_mask_0, x = q_29_cast_fp16)[name = tensor("op_6668_cast_fp16")]; tensor var_6672_begin_0 = const()[name = tensor("op_6672_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6672_end_0 = const()[name = tensor("op_6672_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_6672_end_mask_0 = const()[name = tensor("op_6672_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6672_cast_fp16 = slice_by_index(begin = var_6672_begin_0, end = var_6672_end_0, end_mask = var_6672_end_mask_0, x = q_29_cast_fp16)[name = tensor("op_6672_cast_fp16")]; tensor var_6676_begin_0 = const()[name = tensor("op_6676_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_6676_end_0 = const()[name = tensor("op_6676_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_6676_end_mask_0 = const()[name = tensor("op_6676_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6676_cast_fp16 = slice_by_index(begin = var_6676_begin_0, end = var_6676_end_0, end_mask = var_6676_end_mask_0, x = q_29_cast_fp16)[name = tensor("op_6676_cast_fp16")]; tensor var_6680_begin_0 = const()[name = tensor("op_6680_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_6680_end_0 = const()[name = tensor("op_6680_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_6680_end_mask_0 = const()[name = tensor("op_6680_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6680_cast_fp16 = slice_by_index(begin = var_6680_begin_0, end = var_6680_end_0, end_mask = var_6680_end_mask_0, x = q_29_cast_fp16)[name = tensor("op_6680_cast_fp16")]; tensor var_6684_begin_0 = const()[name = tensor("op_6684_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_6684_end_0 = const()[name = tensor("op_6684_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_6684_end_mask_0 = const()[name = tensor("op_6684_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6684_cast_fp16 = slice_by_index(begin = var_6684_begin_0, end = var_6684_end_0, end_mask = var_6684_end_mask_0, x = q_29_cast_fp16)[name = tensor("op_6684_cast_fp16")]; tensor k_59_perm_0 = const()[name = tensor("k_59_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_6691_begin_0 = const()[name = tensor("op_6691_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6691_end_0 = const()[name = tensor("op_6691_end_0"), val = tensor([2, 256, 1, 160])]; tensor var_6691_end_mask_0 = const()[name = tensor("op_6691_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_17 = transpose(perm = k_59_perm_0, x = k_57_cast_fp16)[name = tensor("transpose_17")]; tensor var_6691_cast_fp16 = slice_by_index(begin = var_6691_begin_0, end = var_6691_end_0, end_mask = var_6691_end_mask_0, x = transpose_17)[name = tensor("op_6691_cast_fp16")]; tensor var_6695_begin_0 = const()[name = tensor("op_6695_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_6695_end_0 = const()[name = tensor("op_6695_end_0"), val = tensor([2, 256, 1, 320])]; tensor var_6695_end_mask_0 = const()[name = tensor("op_6695_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6695_cast_fp16 = slice_by_index(begin = var_6695_begin_0, end = var_6695_end_0, end_mask = var_6695_end_mask_0, x = transpose_17)[name = tensor("op_6695_cast_fp16")]; tensor var_6699_begin_0 = const()[name = tensor("op_6699_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_6699_end_0 = const()[name = tensor("op_6699_end_0"), val = tensor([2, 256, 1, 480])]; tensor var_6699_end_mask_0 = const()[name = tensor("op_6699_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6699_cast_fp16 = slice_by_index(begin = var_6699_begin_0, end = var_6699_end_0, end_mask = var_6699_end_mask_0, x = transpose_17)[name = tensor("op_6699_cast_fp16")]; tensor var_6703_begin_0 = const()[name = tensor("op_6703_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_6703_end_0 = const()[name = tensor("op_6703_end_0"), val = tensor([2, 256, 1, 640])]; tensor var_6703_end_mask_0 = const()[name = tensor("op_6703_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6703_cast_fp16 = slice_by_index(begin = var_6703_begin_0, end = var_6703_end_0, end_mask = var_6703_end_mask_0, x = transpose_17)[name = tensor("op_6703_cast_fp16")]; tensor var_6707_begin_0 = const()[name = tensor("op_6707_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_6707_end_0 = const()[name = tensor("op_6707_end_0"), val = tensor([2, 256, 1, 800])]; tensor var_6707_end_mask_0 = const()[name = tensor("op_6707_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6707_cast_fp16 = slice_by_index(begin = var_6707_begin_0, end = var_6707_end_0, end_mask = var_6707_end_mask_0, x = transpose_17)[name = tensor("op_6707_cast_fp16")]; tensor var_6711_begin_0 = const()[name = tensor("op_6711_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_6711_end_0 = const()[name = tensor("op_6711_end_0"), val = tensor([2, 256, 1, 960])]; tensor var_6711_end_mask_0 = const()[name = tensor("op_6711_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6711_cast_fp16 = slice_by_index(begin = var_6711_begin_0, end = var_6711_end_0, end_mask = var_6711_end_mask_0, x = transpose_17)[name = tensor("op_6711_cast_fp16")]; tensor var_6715_begin_0 = const()[name = tensor("op_6715_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_6715_end_0 = const()[name = tensor("op_6715_end_0"), val = tensor([2, 256, 1, 1120])]; tensor var_6715_end_mask_0 = const()[name = tensor("op_6715_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6715_cast_fp16 = slice_by_index(begin = var_6715_begin_0, end = var_6715_end_0, end_mask = var_6715_end_mask_0, x = transpose_17)[name = tensor("op_6715_cast_fp16")]; tensor var_6719_begin_0 = const()[name = tensor("op_6719_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_6719_end_0 = const()[name = tensor("op_6719_end_0"), val = tensor([2, 256, 1, 1280])]; tensor var_6719_end_mask_0 = const()[name = tensor("op_6719_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6719_cast_fp16 = slice_by_index(begin = var_6719_begin_0, end = var_6719_end_0, end_mask = var_6719_end_mask_0, x = transpose_17)[name = tensor("op_6719_cast_fp16")]; tensor var_6721_begin_0 = const()[name = tensor("op_6721_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6721_end_0 = const()[name = tensor("op_6721_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_6721_end_mask_0 = const()[name = tensor("op_6721_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6721_cast_fp16 = slice_by_index(begin = var_6721_begin_0, end = var_6721_end_0, end_mask = var_6721_end_mask_0, x = v_29_cast_fp16)[name = tensor("op_6721_cast_fp16")]; tensor var_6725_begin_0 = const()[name = tensor("op_6725_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_6725_end_0 = const()[name = tensor("op_6725_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_6725_end_mask_0 = const()[name = tensor("op_6725_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6725_cast_fp16 = slice_by_index(begin = var_6725_begin_0, end = var_6725_end_0, end_mask = var_6725_end_mask_0, x = v_29_cast_fp16)[name = tensor("op_6725_cast_fp16")]; tensor var_6729_begin_0 = const()[name = tensor("op_6729_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6729_end_0 = const()[name = tensor("op_6729_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_6729_end_mask_0 = const()[name = tensor("op_6729_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6729_cast_fp16 = slice_by_index(begin = var_6729_begin_0, end = var_6729_end_0, end_mask = var_6729_end_mask_0, x = v_29_cast_fp16)[name = tensor("op_6729_cast_fp16")]; tensor var_6733_begin_0 = const()[name = tensor("op_6733_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_6733_end_0 = const()[name = tensor("op_6733_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_6733_end_mask_0 = const()[name = tensor("op_6733_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6733_cast_fp16 = slice_by_index(begin = var_6733_begin_0, end = var_6733_end_0, end_mask = var_6733_end_mask_0, x = v_29_cast_fp16)[name = tensor("op_6733_cast_fp16")]; tensor var_6737_begin_0 = const()[name = tensor("op_6737_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6737_end_0 = const()[name = tensor("op_6737_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_6737_end_mask_0 = const()[name = tensor("op_6737_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6737_cast_fp16 = slice_by_index(begin = var_6737_begin_0, end = var_6737_end_0, end_mask = var_6737_end_mask_0, x = v_29_cast_fp16)[name = tensor("op_6737_cast_fp16")]; tensor var_6741_begin_0 = const()[name = tensor("op_6741_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_6741_end_0 = const()[name = tensor("op_6741_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_6741_end_mask_0 = const()[name = tensor("op_6741_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6741_cast_fp16 = slice_by_index(begin = var_6741_begin_0, end = var_6741_end_0, end_mask = var_6741_end_mask_0, x = v_29_cast_fp16)[name = tensor("op_6741_cast_fp16")]; tensor var_6745_begin_0 = const()[name = tensor("op_6745_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_6745_end_0 = const()[name = tensor("op_6745_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_6745_end_mask_0 = const()[name = tensor("op_6745_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6745_cast_fp16 = slice_by_index(begin = var_6745_begin_0, end = var_6745_end_0, end_mask = var_6745_end_mask_0, x = v_29_cast_fp16)[name = tensor("op_6745_cast_fp16")]; tensor var_6749_begin_0 = const()[name = tensor("op_6749_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_6749_end_0 = const()[name = tensor("op_6749_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_6749_end_mask_0 = const()[name = tensor("op_6749_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6749_cast_fp16 = slice_by_index(begin = var_6749_begin_0, end = var_6749_end_0, end_mask = var_6749_end_mask_0, x = v_29_cast_fp16)[name = tensor("op_6749_cast_fp16")]; tensor var_6753_equation_0 = const()[name = tensor("op_6753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6753_cast_fp16 = einsum(equation = var_6753_equation_0, values = (var_6691_cast_fp16, var_6656_cast_fp16))[name = tensor("op_6753_cast_fp16")]; tensor var_6754_to_fp16 = const()[name = tensor("op_6754_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_97_cast_fp16 = mul(x = var_6753_cast_fp16, y = var_6754_to_fp16)[name = tensor("aw_97_cast_fp16")]; tensor var_6757_equation_0 = const()[name = tensor("op_6757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6757_cast_fp16 = einsum(equation = var_6757_equation_0, values = (var_6695_cast_fp16, var_6660_cast_fp16))[name = tensor("op_6757_cast_fp16")]; tensor var_6758_to_fp16 = const()[name = tensor("op_6758_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_99_cast_fp16 = mul(x = var_6757_cast_fp16, y = var_6758_to_fp16)[name = tensor("aw_99_cast_fp16")]; tensor var_6761_equation_0 = const()[name = tensor("op_6761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6761_cast_fp16 = einsum(equation = var_6761_equation_0, values = (var_6699_cast_fp16, var_6664_cast_fp16))[name = tensor("op_6761_cast_fp16")]; tensor var_6762_to_fp16 = const()[name = tensor("op_6762_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_101_cast_fp16 = mul(x = var_6761_cast_fp16, y = var_6762_to_fp16)[name = tensor("aw_101_cast_fp16")]; tensor var_6765_equation_0 = const()[name = tensor("op_6765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6765_cast_fp16 = einsum(equation = var_6765_equation_0, values = (var_6703_cast_fp16, var_6668_cast_fp16))[name = tensor("op_6765_cast_fp16")]; tensor var_6766_to_fp16 = const()[name = tensor("op_6766_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_103_cast_fp16 = mul(x = var_6765_cast_fp16, y = var_6766_to_fp16)[name = tensor("aw_103_cast_fp16")]; tensor var_6769_equation_0 = const()[name = tensor("op_6769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6769_cast_fp16 = einsum(equation = var_6769_equation_0, values = (var_6707_cast_fp16, var_6672_cast_fp16))[name = tensor("op_6769_cast_fp16")]; tensor var_6770_to_fp16 = const()[name = tensor("op_6770_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_105_cast_fp16 = mul(x = var_6769_cast_fp16, y = var_6770_to_fp16)[name = tensor("aw_105_cast_fp16")]; tensor var_6773_equation_0 = const()[name = tensor("op_6773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6773_cast_fp16 = einsum(equation = var_6773_equation_0, values = (var_6711_cast_fp16, var_6676_cast_fp16))[name = tensor("op_6773_cast_fp16")]; tensor var_6774_to_fp16 = const()[name = tensor("op_6774_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_107_cast_fp16 = mul(x = var_6773_cast_fp16, y = var_6774_to_fp16)[name = tensor("aw_107_cast_fp16")]; tensor var_6777_equation_0 = const()[name = tensor("op_6777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6777_cast_fp16 = einsum(equation = var_6777_equation_0, values = (var_6715_cast_fp16, var_6680_cast_fp16))[name = tensor("op_6777_cast_fp16")]; tensor var_6778_to_fp16 = const()[name = tensor("op_6778_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_109_cast_fp16 = mul(x = var_6777_cast_fp16, y = var_6778_to_fp16)[name = tensor("aw_109_cast_fp16")]; tensor var_6781_equation_0 = const()[name = tensor("op_6781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6781_cast_fp16 = einsum(equation = var_6781_equation_0, values = (var_6719_cast_fp16, var_6684_cast_fp16))[name = tensor("op_6781_cast_fp16")]; tensor var_6782_to_fp16 = const()[name = tensor("op_6782_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_111_cast_fp16 = mul(x = var_6781_cast_fp16, y = var_6782_to_fp16)[name = tensor("aw_111_cast_fp16")]; tensor var_6784_cast_fp16 = softmax(axis = var_6522, x = aw_97_cast_fp16)[name = tensor("op_6784_cast_fp16")]; tensor var_6785_cast_fp16 = softmax(axis = var_6522, x = aw_99_cast_fp16)[name = tensor("op_6785_cast_fp16")]; tensor var_6786_cast_fp16 = softmax(axis = var_6522, x = aw_101_cast_fp16)[name = tensor("op_6786_cast_fp16")]; tensor var_6787_cast_fp16 = softmax(axis = var_6522, x = aw_103_cast_fp16)[name = tensor("op_6787_cast_fp16")]; tensor var_6788_cast_fp16 = softmax(axis = var_6522, x = aw_105_cast_fp16)[name = tensor("op_6788_cast_fp16")]; tensor var_6789_cast_fp16 = softmax(axis = var_6522, x = aw_107_cast_fp16)[name = tensor("op_6789_cast_fp16")]; tensor var_6790_cast_fp16 = softmax(axis = var_6522, x = aw_109_cast_fp16)[name = tensor("op_6790_cast_fp16")]; tensor var_6791_cast_fp16 = softmax(axis = var_6522, x = aw_111_cast_fp16)[name = tensor("op_6791_cast_fp16")]; tensor var_6793_equation_0 = const()[name = tensor("op_6793_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6793_cast_fp16 = einsum(equation = var_6793_equation_0, values = (var_6721_cast_fp16, var_6784_cast_fp16))[name = tensor("op_6793_cast_fp16")]; tensor var_6795_equation_0 = const()[name = tensor("op_6795_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6795_cast_fp16 = einsum(equation = var_6795_equation_0, values = (var_6725_cast_fp16, var_6785_cast_fp16))[name = tensor("op_6795_cast_fp16")]; tensor var_6797_equation_0 = const()[name = tensor("op_6797_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6797_cast_fp16 = einsum(equation = var_6797_equation_0, values = (var_6729_cast_fp16, var_6786_cast_fp16))[name = tensor("op_6797_cast_fp16")]; tensor var_6799_equation_0 = const()[name = tensor("op_6799_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6799_cast_fp16 = einsum(equation = var_6799_equation_0, values = (var_6733_cast_fp16, var_6787_cast_fp16))[name = tensor("op_6799_cast_fp16")]; tensor var_6801_equation_0 = const()[name = tensor("op_6801_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6801_cast_fp16 = einsum(equation = var_6801_equation_0, values = (var_6737_cast_fp16, var_6788_cast_fp16))[name = tensor("op_6801_cast_fp16")]; tensor var_6803_equation_0 = const()[name = tensor("op_6803_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6803_cast_fp16 = einsum(equation = var_6803_equation_0, values = (var_6741_cast_fp16, var_6789_cast_fp16))[name = tensor("op_6803_cast_fp16")]; tensor var_6805_equation_0 = const()[name = tensor("op_6805_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6805_cast_fp16 = einsum(equation = var_6805_equation_0, values = (var_6745_cast_fp16, var_6790_cast_fp16))[name = tensor("op_6805_cast_fp16")]; tensor var_6807_equation_0 = const()[name = tensor("op_6807_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6807_cast_fp16 = einsum(equation = var_6807_equation_0, values = (var_6749_cast_fp16, var_6791_cast_fp16))[name = tensor("op_6807_cast_fp16")]; tensor input_301_interleave_0 = const()[name = tensor("input_301_interleave_0"), val = tensor(false)]; tensor input_301_cast_fp16 = concat(axis = var_6522, interleave = input_301_interleave_0, values = (var_6793_cast_fp16, var_6795_cast_fp16, var_6797_cast_fp16, var_6799_cast_fp16, var_6801_cast_fp16, var_6803_cast_fp16, var_6805_cast_fp16, var_6807_cast_fp16))[name = tensor("input_301_cast_fp16")]; tensor var_6813 = const()[name = tensor("op_6813"), val = tensor([1, 1])]; tensor var_6815 = const()[name = tensor("op_6815"), val = tensor([1, 1])]; tensor var_6817_pad_type_0 = const()[name = tensor("op_6817_pad_type_0"), val = tensor("custom")]; tensor var_6817_pad_0 = const()[name = tensor("op_6817_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1133401088)))]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1136677952)))]; tensor var_6817_cast_fp16 = conv(bias = up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_6815, groups = var_6522, pad = var_6817_pad_0, pad_type = var_6817_pad_type_0, strides = var_6813, weight = up_blocks_1_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_301_cast_fp16)[name = tensor("op_6817_cast_fp16")]; tensor inputs_45_cast_fp16 = add(x = var_6817_cast_fp16, y = inputs_43_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; tensor hidden_states_169_axes_0 = const()[name = tensor("hidden_states_169_axes_0"), val = tensor([1])]; tensor hidden_states_169_gamma_0_to_fp16 = const()[name = tensor("hidden_states_169_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1136680576)))]; tensor hidden_states_169_beta_0_to_fp16 = const()[name = tensor("hidden_states_169_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1136683200)))]; tensor var_6827_to_fp16 = const()[name = tensor("op_6827_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_169_cast_fp16 = layer_norm(axes = hidden_states_169_axes_0, beta = hidden_states_169_beta_0_to_fp16, epsilon = var_6827_to_fp16, gamma = hidden_states_169_gamma_0_to_fp16, x = inputs_45_cast_fp16)[name = tensor("hidden_states_169_cast_fp16")]; tensor var_6842 = const()[name = tensor("op_6842"), val = tensor([1, 1])]; tensor var_6844 = const()[name = tensor("op_6844"), val = tensor([1, 1])]; tensor q_31_pad_type_0 = const()[name = tensor("q_31_pad_type_0"), val = tensor("custom")]; tensor q_31_pad_0 = const()[name = tensor("q_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1136685824)))]; tensor q_31_cast_fp16 = conv(dilations = var_6844, groups = var_6522, pad = q_31_pad_0, pad_type = q_31_pad_type_0, strides = var_6842, weight = up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_169_cast_fp16)[name = tensor("q_31_cast_fp16")]; tensor var_6848 = const()[name = tensor("op_6848"), val = tensor([1, 1])]; tensor var_6850 = const()[name = tensor("op_6850"), val = tensor([1, 1])]; tensor k_61_pad_type_0 = const()[name = tensor("k_61_pad_type_0"), val = tensor("custom")]; tensor k_61_pad_0 = const()[name = tensor("k_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1139962688)))]; tensor k_61_cast_fp16 = conv(dilations = var_6850, groups = var_6522, pad = k_61_pad_0, pad_type = k_61_pad_type_0, strides = var_6848, weight = up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_61_cast_fp16")]; tensor var_6854 = const()[name = tensor("op_6854"), val = tensor([1, 1])]; tensor var_6856 = const()[name = tensor("op_6856"), val = tensor([1, 1])]; tensor v_31_pad_type_0 = const()[name = tensor("v_31_pad_type_0"), val = tensor("custom")]; tensor v_31_pad_0 = const()[name = tensor("v_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1141928832)))]; tensor v_31_cast_fp16 = conv(dilations = var_6856, groups = var_6522, pad = v_31_pad_0, pad_type = v_31_pad_type_0, strides = var_6854, weight = up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_31_cast_fp16")]; tensor var_6860_begin_0 = const()[name = tensor("op_6860_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6860_end_0 = const()[name = tensor("op_6860_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_6860_end_mask_0 = const()[name = tensor("op_6860_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6860_cast_fp16 = slice_by_index(begin = var_6860_begin_0, end = var_6860_end_0, end_mask = var_6860_end_mask_0, x = q_31_cast_fp16)[name = tensor("op_6860_cast_fp16")]; tensor var_6864_begin_0 = const()[name = tensor("op_6864_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_6864_end_0 = const()[name = tensor("op_6864_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_6864_end_mask_0 = const()[name = tensor("op_6864_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6864_cast_fp16 = slice_by_index(begin = var_6864_begin_0, end = var_6864_end_0, end_mask = var_6864_end_mask_0, x = q_31_cast_fp16)[name = tensor("op_6864_cast_fp16")]; tensor var_6868_begin_0 = const()[name = tensor("op_6868_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6868_end_0 = const()[name = tensor("op_6868_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_6868_end_mask_0 = const()[name = tensor("op_6868_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6868_cast_fp16 = slice_by_index(begin = var_6868_begin_0, end = var_6868_end_0, end_mask = var_6868_end_mask_0, x = q_31_cast_fp16)[name = tensor("op_6868_cast_fp16")]; tensor var_6872_begin_0 = const()[name = tensor("op_6872_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_6872_end_0 = const()[name = tensor("op_6872_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_6872_end_mask_0 = const()[name = tensor("op_6872_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6872_cast_fp16 = slice_by_index(begin = var_6872_begin_0, end = var_6872_end_0, end_mask = var_6872_end_mask_0, x = q_31_cast_fp16)[name = tensor("op_6872_cast_fp16")]; tensor var_6876_begin_0 = const()[name = tensor("op_6876_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6876_end_0 = const()[name = tensor("op_6876_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_6876_end_mask_0 = const()[name = tensor("op_6876_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6876_cast_fp16 = slice_by_index(begin = var_6876_begin_0, end = var_6876_end_0, end_mask = var_6876_end_mask_0, x = q_31_cast_fp16)[name = tensor("op_6876_cast_fp16")]; tensor var_6880_begin_0 = const()[name = tensor("op_6880_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_6880_end_0 = const()[name = tensor("op_6880_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_6880_end_mask_0 = const()[name = tensor("op_6880_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6880_cast_fp16 = slice_by_index(begin = var_6880_begin_0, end = var_6880_end_0, end_mask = var_6880_end_mask_0, x = q_31_cast_fp16)[name = tensor("op_6880_cast_fp16")]; tensor var_6884_begin_0 = const()[name = tensor("op_6884_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_6884_end_0 = const()[name = tensor("op_6884_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_6884_end_mask_0 = const()[name = tensor("op_6884_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6884_cast_fp16 = slice_by_index(begin = var_6884_begin_0, end = var_6884_end_0, end_mask = var_6884_end_mask_0, x = q_31_cast_fp16)[name = tensor("op_6884_cast_fp16")]; tensor var_6888_begin_0 = const()[name = tensor("op_6888_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_6888_end_0 = const()[name = tensor("op_6888_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_6888_end_mask_0 = const()[name = tensor("op_6888_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6888_cast_fp16 = slice_by_index(begin = var_6888_begin_0, end = var_6888_end_0, end_mask = var_6888_end_mask_0, x = q_31_cast_fp16)[name = tensor("op_6888_cast_fp16")]; tensor k_63_perm_0 = const()[name = tensor("k_63_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_6895_begin_0 = const()[name = tensor("op_6895_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6895_end_0 = const()[name = tensor("op_6895_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_6895_end_mask_0 = const()[name = tensor("op_6895_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_16 = transpose(perm = k_63_perm_0, x = k_61_cast_fp16)[name = tensor("transpose_16")]; tensor var_6895_cast_fp16 = slice_by_index(begin = var_6895_begin_0, end = var_6895_end_0, end_mask = var_6895_end_mask_0, x = transpose_16)[name = tensor("op_6895_cast_fp16")]; tensor var_6899_begin_0 = const()[name = tensor("op_6899_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_6899_end_0 = const()[name = tensor("op_6899_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_6899_end_mask_0 = const()[name = tensor("op_6899_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6899_cast_fp16 = slice_by_index(begin = var_6899_begin_0, end = var_6899_end_0, end_mask = var_6899_end_mask_0, x = transpose_16)[name = tensor("op_6899_cast_fp16")]; tensor var_6903_begin_0 = const()[name = tensor("op_6903_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_6903_end_0 = const()[name = tensor("op_6903_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_6903_end_mask_0 = const()[name = tensor("op_6903_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6903_cast_fp16 = slice_by_index(begin = var_6903_begin_0, end = var_6903_end_0, end_mask = var_6903_end_mask_0, x = transpose_16)[name = tensor("op_6903_cast_fp16")]; tensor var_6907_begin_0 = const()[name = tensor("op_6907_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_6907_end_0 = const()[name = tensor("op_6907_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_6907_end_mask_0 = const()[name = tensor("op_6907_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6907_cast_fp16 = slice_by_index(begin = var_6907_begin_0, end = var_6907_end_0, end_mask = var_6907_end_mask_0, x = transpose_16)[name = tensor("op_6907_cast_fp16")]; tensor var_6911_begin_0 = const()[name = tensor("op_6911_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_6911_end_0 = const()[name = tensor("op_6911_end_0"), val = tensor([2, 77, 1, 800])]; tensor var_6911_end_mask_0 = const()[name = tensor("op_6911_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6911_cast_fp16 = slice_by_index(begin = var_6911_begin_0, end = var_6911_end_0, end_mask = var_6911_end_mask_0, x = transpose_16)[name = tensor("op_6911_cast_fp16")]; tensor var_6915_begin_0 = const()[name = tensor("op_6915_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_6915_end_0 = const()[name = tensor("op_6915_end_0"), val = tensor([2, 77, 1, 960])]; tensor var_6915_end_mask_0 = const()[name = tensor("op_6915_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6915_cast_fp16 = slice_by_index(begin = var_6915_begin_0, end = var_6915_end_0, end_mask = var_6915_end_mask_0, x = transpose_16)[name = tensor("op_6915_cast_fp16")]; tensor var_6919_begin_0 = const()[name = tensor("op_6919_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_6919_end_0 = const()[name = tensor("op_6919_end_0"), val = tensor([2, 77, 1, 1120])]; tensor var_6919_end_mask_0 = const()[name = tensor("op_6919_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6919_cast_fp16 = slice_by_index(begin = var_6919_begin_0, end = var_6919_end_0, end_mask = var_6919_end_mask_0, x = transpose_16)[name = tensor("op_6919_cast_fp16")]; tensor var_6923_begin_0 = const()[name = tensor("op_6923_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_6923_end_0 = const()[name = tensor("op_6923_end_0"), val = tensor([2, 77, 1, 1280])]; tensor var_6923_end_mask_0 = const()[name = tensor("op_6923_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6923_cast_fp16 = slice_by_index(begin = var_6923_begin_0, end = var_6923_end_0, end_mask = var_6923_end_mask_0, x = transpose_16)[name = tensor("op_6923_cast_fp16")]; tensor var_6925_begin_0 = const()[name = tensor("op_6925_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6925_end_0 = const()[name = tensor("op_6925_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_6925_end_mask_0 = const()[name = tensor("op_6925_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6925_cast_fp16 = slice_by_index(begin = var_6925_begin_0, end = var_6925_end_0, end_mask = var_6925_end_mask_0, x = v_31_cast_fp16)[name = tensor("op_6925_cast_fp16")]; tensor var_6929_begin_0 = const()[name = tensor("op_6929_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_6929_end_0 = const()[name = tensor("op_6929_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_6929_end_mask_0 = const()[name = tensor("op_6929_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6929_cast_fp16 = slice_by_index(begin = var_6929_begin_0, end = var_6929_end_0, end_mask = var_6929_end_mask_0, x = v_31_cast_fp16)[name = tensor("op_6929_cast_fp16")]; tensor var_6933_begin_0 = const()[name = tensor("op_6933_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6933_end_0 = const()[name = tensor("op_6933_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_6933_end_mask_0 = const()[name = tensor("op_6933_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6933_cast_fp16 = slice_by_index(begin = var_6933_begin_0, end = var_6933_end_0, end_mask = var_6933_end_mask_0, x = v_31_cast_fp16)[name = tensor("op_6933_cast_fp16")]; tensor var_6937_begin_0 = const()[name = tensor("op_6937_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_6937_end_0 = const()[name = tensor("op_6937_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_6937_end_mask_0 = const()[name = tensor("op_6937_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6937_cast_fp16 = slice_by_index(begin = var_6937_begin_0, end = var_6937_end_0, end_mask = var_6937_end_mask_0, x = v_31_cast_fp16)[name = tensor("op_6937_cast_fp16")]; tensor var_6941_begin_0 = const()[name = tensor("op_6941_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6941_end_0 = const()[name = tensor("op_6941_end_0"), val = tensor([2, 800, 1, 77])]; tensor var_6941_end_mask_0 = const()[name = tensor("op_6941_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6941_cast_fp16 = slice_by_index(begin = var_6941_begin_0, end = var_6941_end_0, end_mask = var_6941_end_mask_0, x = v_31_cast_fp16)[name = tensor("op_6941_cast_fp16")]; tensor var_6945_begin_0 = const()[name = tensor("op_6945_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_6945_end_0 = const()[name = tensor("op_6945_end_0"), val = tensor([2, 960, 1, 77])]; tensor var_6945_end_mask_0 = const()[name = tensor("op_6945_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6945_cast_fp16 = slice_by_index(begin = var_6945_begin_0, end = var_6945_end_0, end_mask = var_6945_end_mask_0, x = v_31_cast_fp16)[name = tensor("op_6945_cast_fp16")]; tensor var_6949_begin_0 = const()[name = tensor("op_6949_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_6949_end_0 = const()[name = tensor("op_6949_end_0"), val = tensor([2, 1120, 1, 77])]; tensor var_6949_end_mask_0 = const()[name = tensor("op_6949_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6949_cast_fp16 = slice_by_index(begin = var_6949_begin_0, end = var_6949_end_0, end_mask = var_6949_end_mask_0, x = v_31_cast_fp16)[name = tensor("op_6949_cast_fp16")]; tensor var_6953_begin_0 = const()[name = tensor("op_6953_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_6953_end_0 = const()[name = tensor("op_6953_end_0"), val = tensor([2, 1280, 1, 77])]; tensor var_6953_end_mask_0 = const()[name = tensor("op_6953_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6953_cast_fp16 = slice_by_index(begin = var_6953_begin_0, end = var_6953_end_0, end_mask = var_6953_end_mask_0, x = v_31_cast_fp16)[name = tensor("op_6953_cast_fp16")]; tensor var_6957_equation_0 = const()[name = tensor("op_6957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6957_cast_fp16 = einsum(equation = var_6957_equation_0, values = (var_6895_cast_fp16, var_6860_cast_fp16))[name = tensor("op_6957_cast_fp16")]; tensor var_6958_to_fp16 = const()[name = tensor("op_6958_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_113_cast_fp16 = mul(x = var_6957_cast_fp16, y = var_6958_to_fp16)[name = tensor("aw_113_cast_fp16")]; tensor var_6961_equation_0 = const()[name = tensor("op_6961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6961_cast_fp16 = einsum(equation = var_6961_equation_0, values = (var_6899_cast_fp16, var_6864_cast_fp16))[name = tensor("op_6961_cast_fp16")]; tensor var_6962_to_fp16 = const()[name = tensor("op_6962_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_115_cast_fp16 = mul(x = var_6961_cast_fp16, y = var_6962_to_fp16)[name = tensor("aw_115_cast_fp16")]; tensor var_6965_equation_0 = const()[name = tensor("op_6965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6965_cast_fp16 = einsum(equation = var_6965_equation_0, values = (var_6903_cast_fp16, var_6868_cast_fp16))[name = tensor("op_6965_cast_fp16")]; tensor var_6966_to_fp16 = const()[name = tensor("op_6966_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_117_cast_fp16 = mul(x = var_6965_cast_fp16, y = var_6966_to_fp16)[name = tensor("aw_117_cast_fp16")]; tensor var_6969_equation_0 = const()[name = tensor("op_6969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6969_cast_fp16 = einsum(equation = var_6969_equation_0, values = (var_6907_cast_fp16, var_6872_cast_fp16))[name = tensor("op_6969_cast_fp16")]; tensor var_6970_to_fp16 = const()[name = tensor("op_6970_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_119_cast_fp16 = mul(x = var_6969_cast_fp16, y = var_6970_to_fp16)[name = tensor("aw_119_cast_fp16")]; tensor var_6973_equation_0 = const()[name = tensor("op_6973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6973_cast_fp16 = einsum(equation = var_6973_equation_0, values = (var_6911_cast_fp16, var_6876_cast_fp16))[name = tensor("op_6973_cast_fp16")]; tensor var_6974_to_fp16 = const()[name = tensor("op_6974_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_121_cast_fp16 = mul(x = var_6973_cast_fp16, y = var_6974_to_fp16)[name = tensor("aw_121_cast_fp16")]; tensor var_6977_equation_0 = const()[name = tensor("op_6977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6977_cast_fp16 = einsum(equation = var_6977_equation_0, values = (var_6915_cast_fp16, var_6880_cast_fp16))[name = tensor("op_6977_cast_fp16")]; tensor var_6978_to_fp16 = const()[name = tensor("op_6978_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_123_cast_fp16 = mul(x = var_6977_cast_fp16, y = var_6978_to_fp16)[name = tensor("aw_123_cast_fp16")]; tensor var_6981_equation_0 = const()[name = tensor("op_6981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6981_cast_fp16 = einsum(equation = var_6981_equation_0, values = (var_6919_cast_fp16, var_6884_cast_fp16))[name = tensor("op_6981_cast_fp16")]; tensor var_6982_to_fp16 = const()[name = tensor("op_6982_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_125_cast_fp16 = mul(x = var_6981_cast_fp16, y = var_6982_to_fp16)[name = tensor("aw_125_cast_fp16")]; tensor var_6985_equation_0 = const()[name = tensor("op_6985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_6985_cast_fp16 = einsum(equation = var_6985_equation_0, values = (var_6923_cast_fp16, var_6888_cast_fp16))[name = tensor("op_6985_cast_fp16")]; tensor var_6986_to_fp16 = const()[name = tensor("op_6986_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_127_cast_fp16 = mul(x = var_6985_cast_fp16, y = var_6986_to_fp16)[name = tensor("aw_127_cast_fp16")]; tensor var_6988_cast_fp16 = softmax(axis = var_6522, x = aw_113_cast_fp16)[name = tensor("op_6988_cast_fp16")]; tensor var_6989_cast_fp16 = softmax(axis = var_6522, x = aw_115_cast_fp16)[name = tensor("op_6989_cast_fp16")]; tensor var_6990_cast_fp16 = softmax(axis = var_6522, x = aw_117_cast_fp16)[name = tensor("op_6990_cast_fp16")]; tensor var_6991_cast_fp16 = softmax(axis = var_6522, x = aw_119_cast_fp16)[name = tensor("op_6991_cast_fp16")]; tensor var_6992_cast_fp16 = softmax(axis = var_6522, x = aw_121_cast_fp16)[name = tensor("op_6992_cast_fp16")]; tensor var_6993_cast_fp16 = softmax(axis = var_6522, x = aw_123_cast_fp16)[name = tensor("op_6993_cast_fp16")]; tensor var_6994_cast_fp16 = softmax(axis = var_6522, x = aw_125_cast_fp16)[name = tensor("op_6994_cast_fp16")]; tensor var_6995_cast_fp16 = softmax(axis = var_6522, x = aw_127_cast_fp16)[name = tensor("op_6995_cast_fp16")]; tensor var_6997_equation_0 = const()[name = tensor("op_6997_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6997_cast_fp16 = einsum(equation = var_6997_equation_0, values = (var_6925_cast_fp16, var_6988_cast_fp16))[name = tensor("op_6997_cast_fp16")]; tensor var_6999_equation_0 = const()[name = tensor("op_6999_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6999_cast_fp16 = einsum(equation = var_6999_equation_0, values = (var_6929_cast_fp16, var_6989_cast_fp16))[name = tensor("op_6999_cast_fp16")]; tensor var_7001_equation_0 = const()[name = tensor("op_7001_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7001_cast_fp16 = einsum(equation = var_7001_equation_0, values = (var_6933_cast_fp16, var_6990_cast_fp16))[name = tensor("op_7001_cast_fp16")]; tensor var_7003_equation_0 = const()[name = tensor("op_7003_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7003_cast_fp16 = einsum(equation = var_7003_equation_0, values = (var_6937_cast_fp16, var_6991_cast_fp16))[name = tensor("op_7003_cast_fp16")]; tensor var_7005_equation_0 = const()[name = tensor("op_7005_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7005_cast_fp16 = einsum(equation = var_7005_equation_0, values = (var_6941_cast_fp16, var_6992_cast_fp16))[name = tensor("op_7005_cast_fp16")]; tensor var_7007_equation_0 = const()[name = tensor("op_7007_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7007_cast_fp16 = einsum(equation = var_7007_equation_0, values = (var_6945_cast_fp16, var_6993_cast_fp16))[name = tensor("op_7007_cast_fp16")]; tensor var_7009_equation_0 = const()[name = tensor("op_7009_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7009_cast_fp16 = einsum(equation = var_7009_equation_0, values = (var_6949_cast_fp16, var_6994_cast_fp16))[name = tensor("op_7009_cast_fp16")]; tensor var_7011_equation_0 = const()[name = tensor("op_7011_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7011_cast_fp16 = einsum(equation = var_7011_equation_0, values = (var_6953_cast_fp16, var_6995_cast_fp16))[name = tensor("op_7011_cast_fp16")]; tensor input_303_interleave_0 = const()[name = tensor("input_303_interleave_0"), val = tensor(false)]; tensor input_303_cast_fp16 = concat(axis = var_6522, interleave = input_303_interleave_0, values = (var_6997_cast_fp16, var_6999_cast_fp16, var_7001_cast_fp16, var_7003_cast_fp16, var_7005_cast_fp16, var_7007_cast_fp16, var_7009_cast_fp16, var_7011_cast_fp16))[name = tensor("input_303_cast_fp16")]; tensor var_7017 = const()[name = tensor("op_7017"), val = tensor([1, 1])]; tensor var_7019 = const()[name = tensor("op_7019"), val = tensor([1, 1])]; tensor var_7021_pad_type_0 = const()[name = tensor("op_7021_pad_type_0"), val = tensor("custom")]; tensor var_7021_pad_0 = const()[name = tensor("op_7021_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1143894976)))]; tensor up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1147171840)))]; tensor var_7021_cast_fp16 = conv(bias = up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_7019, groups = var_6522, pad = var_7021_pad_0, pad_type = var_7021_pad_type_0, strides = var_7017, weight = up_blocks_1_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_303_cast_fp16)[name = tensor("op_7021_cast_fp16")]; tensor inputs_47_cast_fp16 = add(x = var_7021_cast_fp16, y = inputs_45_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; tensor input_305_axes_0 = const()[name = tensor("input_305_axes_0"), val = tensor([1])]; tensor input_305_gamma_0_to_fp16 = const()[name = tensor("input_305_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1147174464)))]; tensor input_305_beta_0_to_fp16 = const()[name = tensor("input_305_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1147177088)))]; tensor var_7031_to_fp16 = const()[name = tensor("op_7031_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_305_cast_fp16 = layer_norm(axes = input_305_axes_0, beta = input_305_beta_0_to_fp16, epsilon = var_7031_to_fp16, gamma = input_305_gamma_0_to_fp16, x = inputs_47_cast_fp16)[name = tensor("input_305_cast_fp16")]; tensor var_7047 = const()[name = tensor("op_7047"), val = tensor([1, 1])]; tensor var_7049 = const()[name = tensor("op_7049"), val = tensor([1, 1])]; tensor var_7051_pad_type_0 = const()[name = tensor("op_7051_pad_type_0"), val = tensor("custom")]; tensor var_7051_pad_0 = const()[name = tensor("op_7051_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1147179712)))]; tensor up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1173394176)))]; tensor var_7051_cast_fp16 = conv(bias = up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_7049, groups = var_6522, pad = var_7051_pad_0, pad_type = var_7051_pad_type_0, strides = var_7047, weight = up_blocks_1_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_305_cast_fp16)[name = tensor("op_7051_cast_fp16")]; tensor var_7052_split_sizes_0 = const()[name = tensor("op_7052_split_sizes_0"), val = tensor([5120, 5120])]; tensor var_7052_axis_0 = const()[name = tensor("op_7052_axis_0"), val = tensor(1)]; tensor var_7052_cast_fp16_0, tensor var_7052_cast_fp16_1 = split(axis = var_7052_axis_0, split_sizes = var_7052_split_sizes_0, x = var_7051_cast_fp16)[name = tensor("op_7052_cast_fp16")]; tensor var_7054_mode_0 = const()[name = tensor("op_7054_mode_0"), val = tensor("EXACT")]; tensor var_7054_cast_fp16 = gelu(mode = var_7054_mode_0, x = var_7052_cast_fp16_1)[name = tensor("op_7054_cast_fp16")]; tensor input_307_cast_fp16 = mul(x = var_7052_cast_fp16_0, y = var_7054_cast_fp16)[name = tensor("input_307_cast_fp16")]; tensor var_7058 = const()[name = tensor("op_7058"), val = tensor([1, 1])]; tensor var_7060 = const()[name = tensor("op_7060"), val = tensor([1, 1])]; tensor var_7062_pad_type_0 = const()[name = tensor("op_7062_pad_type_0"), val = tensor("custom")]; tensor var_7062_pad_0 = const()[name = tensor("op_7062_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1173414720)))]; tensor up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1186521984)))]; tensor var_7062_cast_fp16 = conv(bias = up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_7060, groups = var_6522, pad = var_7062_pad_0, pad_type = var_7062_pad_type_0, strides = var_7058, weight = up_blocks_1_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_307_cast_fp16)[name = tensor("op_7062_cast_fp16")]; tensor hidden_states_173_cast_fp16 = add(x = var_7062_cast_fp16, y = inputs_47_cast_fp16)[name = tensor("hidden_states_173_cast_fp16")]; tensor var_7064 = const()[name = tensor("op_7064"), val = tensor([2, 1280, 16, 16])]; tensor input_309_cast_fp16 = reshape(shape = var_7064, x = hidden_states_173_cast_fp16)[name = tensor("input_309_cast_fp16")]; tensor var_7068 = const()[name = tensor("op_7068"), val = tensor([1, 1])]; tensor var_7070 = const()[name = tensor("op_7070"), val = tensor([1, 1])]; tensor hidden_states_175_pad_type_0 = const()[name = tensor("hidden_states_175_pad_type_0"), val = tensor("custom")]; tensor hidden_states_175_pad_0 = const()[name = tensor("hidden_states_175_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_0_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1186524608)))]; tensor up_blocks_1_attentions_0_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_0_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1189801472)))]; tensor hidden_states_175_cast_fp16 = conv(bias = up_blocks_1_attentions_0_proj_out_bias_to_fp16, dilations = var_7070, groups = var_6522, pad = hidden_states_175_pad_0, pad_type = hidden_states_175_pad_type_0, strides = var_7068, weight = up_blocks_1_attentions_0_proj_out_weight_to_fp16, x = input_309_cast_fp16)[name = tensor("hidden_states_175_cast_fp16")]; tensor hidden_states_177_cast_fp16 = add(x = hidden_states_175_cast_fp16, y = hidden_states_163_cast_fp16)[name = tensor("hidden_states_177_cast_fp16")]; tensor input_311_interleave_0 = const()[name = tensor("input_311_interleave_0"), val = tensor(false)]; tensor input_311_cast_fp16 = concat(axis = var_6522, interleave = input_311_interleave_0, values = (hidden_states_177_cast_fp16, input_143_cast_fp16))[name = tensor("input_311_cast_fp16")]; tensor reshape_144_shape_0 = const()[name = tensor("reshape_144_shape_0"), val = tensor([2, 32, 80, 16, 16])]; tensor reshape_144_cast_fp16 = reshape(shape = reshape_144_shape_0, x = input_311_cast_fp16)[name = tensor("reshape_144_cast_fp16")]; tensor reduce_mean_108_axes_0 = const()[name = tensor("reduce_mean_108_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_108_keep_dims_0 = const()[name = tensor("reduce_mean_108_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_108_cast_fp16 = reduce_mean(axes = reduce_mean_108_axes_0, keep_dims = reduce_mean_108_keep_dims_0, x = reshape_144_cast_fp16)[name = tensor("reduce_mean_108_cast_fp16")]; tensor sub_72_cast_fp16 = sub(x = reshape_144_cast_fp16, y = reduce_mean_108_cast_fp16)[name = tensor("sub_72_cast_fp16")]; tensor square_36_cast_fp16 = square(x = sub_72_cast_fp16)[name = tensor("square_36_cast_fp16")]; tensor reduce_mean_110_axes_0 = const()[name = tensor("reduce_mean_110_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_110_keep_dims_0 = const()[name = tensor("reduce_mean_110_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_110_cast_fp16 = reduce_mean(axes = reduce_mean_110_axes_0, keep_dims = reduce_mean_110_keep_dims_0, x = square_36_cast_fp16)[name = tensor("reduce_mean_110_cast_fp16")]; tensor add_72_y_0_to_fp16 = const()[name = tensor("add_72_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_72_cast_fp16 = add(x = reduce_mean_110_cast_fp16, y = add_72_y_0_to_fp16)[name = tensor("add_72_cast_fp16")]; tensor sqrt_36_cast_fp16 = sqrt(x = add_72_cast_fp16)[name = tensor("sqrt_36_cast_fp16")]; tensor real_div_36_cast_fp16 = real_div(x = sub_72_cast_fp16, y = sqrt_36_cast_fp16)[name = tensor("real_div_36_cast_fp16")]; tensor reshape_145_shape_0 = const()[name = tensor("reshape_145_shape_0"), val = tensor([2, 2560, 16, 16])]; tensor reshape_145_cast_fp16 = reshape(shape = reshape_145_shape_0, x = real_div_36_cast_fp16)[name = tensor("reshape_145_cast_fp16")]; tensor add_73_gamma_0_to_fp16 = const()[name = tensor("add_73_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1189804096)))]; tensor add_73_beta_0_to_fp16 = const()[name = tensor("add_73_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1189809280)))]; tensor add_73_epsilon_0_to_fp16 = const()[name = tensor("add_73_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_73_cast_fp16 = batch_norm(beta = add_73_beta_0_to_fp16, epsilon = add_73_epsilon_0_to_fp16, gamma = add_73_gamma_0_to_fp16, mean = add_55_mean_0_to_fp16, variance = add_55_variance_0_to_fp16, x = reshape_145_cast_fp16)[name = tensor("add_73_cast_fp16")]; tensor input_315_cast_fp16 = silu(x = add_73_cast_fp16)[name = tensor("input_315_cast_fp16")]; tensor var_7088 = const()[name = tensor("op_7088"), val = tensor([1, 1])]; tensor var_7090 = const()[name = tensor("op_7090"), val = tensor([1, 1])]; tensor hidden_states_179_pad_type_0 = const()[name = tensor("hidden_states_179_pad_type_0"), val = tensor("custom")]; tensor hidden_states_179_pad_0 = const()[name = tensor("hidden_states_179_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_1_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1189814464)))]; tensor up_blocks_1_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1248796928)))]; tensor hidden_states_179_cast_fp16 = conv(bias = up_blocks_1_resnets_1_conv1_bias_to_fp16, dilations = var_7090, groups = var_6522, pad = hidden_states_179_pad_0, pad_type = hidden_states_179_pad_type_0, strides = var_7088, weight = up_blocks_1_resnets_1_conv1_weight_to_fp16, x = input_315_cast_fp16)[name = tensor("hidden_states_179_cast_fp16")]; tensor var_7096 = const()[name = tensor("op_7096"), val = tensor([1, 1])]; tensor var_7098 = const()[name = tensor("op_7098"), val = tensor([1, 1])]; tensor temb_29_pad_type_0 = const()[name = tensor("temb_29_pad_type_0"), val = tensor("custom")]; tensor temb_29_pad_0 = const()[name = tensor("temb_29_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1248799552)))]; tensor up_blocks_1_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1252076416)))]; tensor temb_29_cast_fp16 = conv(bias = up_blocks_1_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_7098, groups = var_6522, pad = temb_29_pad_0, pad_type = temb_29_pad_type_0, strides = var_7096, weight = up_blocks_1_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_29_cast_fp16")]; tensor input_319_cast_fp16 = add(x = hidden_states_179_cast_fp16, y = temb_29_cast_fp16)[name = tensor("input_319_cast_fp16")]; tensor reshape_148_shape_0 = const()[name = tensor("reshape_148_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_148_cast_fp16 = reshape(shape = reshape_148_shape_0, x = input_319_cast_fp16)[name = tensor("reshape_148_cast_fp16")]; tensor reduce_mean_111_axes_0 = const()[name = tensor("reduce_mean_111_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_111_keep_dims_0 = const()[name = tensor("reduce_mean_111_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_111_cast_fp16 = reduce_mean(axes = reduce_mean_111_axes_0, keep_dims = reduce_mean_111_keep_dims_0, x = reshape_148_cast_fp16)[name = tensor("reduce_mean_111_cast_fp16")]; tensor sub_74_cast_fp16 = sub(x = reshape_148_cast_fp16, y = reduce_mean_111_cast_fp16)[name = tensor("sub_74_cast_fp16")]; tensor square_37_cast_fp16 = square(x = sub_74_cast_fp16)[name = tensor("square_37_cast_fp16")]; tensor reduce_mean_113_axes_0 = const()[name = tensor("reduce_mean_113_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_113_keep_dims_0 = const()[name = tensor("reduce_mean_113_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_113_cast_fp16 = reduce_mean(axes = reduce_mean_113_axes_0, keep_dims = reduce_mean_113_keep_dims_0, x = square_37_cast_fp16)[name = tensor("reduce_mean_113_cast_fp16")]; tensor add_74_y_0_to_fp16 = const()[name = tensor("add_74_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_74_cast_fp16 = add(x = reduce_mean_113_cast_fp16, y = add_74_y_0_to_fp16)[name = tensor("add_74_cast_fp16")]; tensor sqrt_37_cast_fp16 = sqrt(x = add_74_cast_fp16)[name = tensor("sqrt_37_cast_fp16")]; tensor real_div_37_cast_fp16 = real_div(x = sub_74_cast_fp16, y = sqrt_37_cast_fp16)[name = tensor("real_div_37_cast_fp16")]; tensor reshape_149_shape_0 = const()[name = tensor("reshape_149_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_149_cast_fp16 = reshape(shape = reshape_149_shape_0, x = real_div_37_cast_fp16)[name = tensor("reshape_149_cast_fp16")]; tensor add_75_gamma_0_to_fp16 = const()[name = tensor("add_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1252079040)))]; tensor add_75_beta_0_to_fp16 = const()[name = tensor("add_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1252081664)))]; tensor add_75_epsilon_0_to_fp16 = const()[name = tensor("add_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_75_cast_fp16 = batch_norm(beta = add_75_beta_0_to_fp16, epsilon = add_75_epsilon_0_to_fp16, gamma = add_75_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_149_cast_fp16)[name = tensor("add_75_cast_fp16")]; tensor input_323_cast_fp16 = silu(x = add_75_cast_fp16)[name = tensor("input_323_cast_fp16")]; tensor var_7108 = const()[name = tensor("op_7108"), val = tensor([1, 1])]; tensor var_7110 = const()[name = tensor("op_7110"), val = tensor([1, 1])]; tensor hidden_states_181_pad_type_0 = const()[name = tensor("hidden_states_181_pad_type_0"), val = tensor("custom")]; tensor hidden_states_181_pad_0 = const()[name = tensor("hidden_states_181_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_1_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1252084288)))]; tensor up_blocks_1_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1281575552)))]; tensor hidden_states_181_cast_fp16 = conv(bias = up_blocks_1_resnets_1_conv2_bias_to_fp16, dilations = var_7110, groups = var_6522, pad = hidden_states_181_pad_0, pad_type = hidden_states_181_pad_type_0, strides = var_7108, weight = up_blocks_1_resnets_1_conv2_weight_to_fp16, x = input_323_cast_fp16)[name = tensor("hidden_states_181_cast_fp16")]; tensor var_7115 = const()[name = tensor("op_7115"), val = tensor([1, 1])]; tensor var_7117 = const()[name = tensor("op_7117"), val = tensor([1, 1])]; tensor x_13_pad_type_0 = const()[name = tensor("x_13_pad_type_0"), val = tensor("custom")]; tensor x_13_pad_0 = const()[name = tensor("x_13_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_resnets_1_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_1_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1281578176)))]; tensor up_blocks_1_resnets_1_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_1_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1288131840)))]; tensor x_13_cast_fp16 = conv(bias = up_blocks_1_resnets_1_conv_shortcut_bias_to_fp16, dilations = var_7117, groups = var_6522, pad = x_13_pad_0, pad_type = x_13_pad_type_0, strides = var_7115, weight = up_blocks_1_resnets_1_conv_shortcut_weight_to_fp16, x = input_311_cast_fp16)[name = tensor("x_13_cast_fp16")]; tensor hidden_states_183_cast_fp16 = add(x = x_13_cast_fp16, y = hidden_states_181_cast_fp16)[name = tensor("hidden_states_183_cast_fp16")]; tensor reshape_152_shape_0 = const()[name = tensor("reshape_152_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_152_cast_fp16 = reshape(shape = reshape_152_shape_0, x = hidden_states_183_cast_fp16)[name = tensor("reshape_152_cast_fp16")]; tensor reduce_mean_114_axes_0 = const()[name = tensor("reduce_mean_114_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_114_keep_dims_0 = const()[name = tensor("reduce_mean_114_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_114_cast_fp16 = reduce_mean(axes = reduce_mean_114_axes_0, keep_dims = reduce_mean_114_keep_dims_0, x = reshape_152_cast_fp16)[name = tensor("reduce_mean_114_cast_fp16")]; tensor sub_76_cast_fp16 = sub(x = reshape_152_cast_fp16, y = reduce_mean_114_cast_fp16)[name = tensor("sub_76_cast_fp16")]; tensor square_38_cast_fp16 = square(x = sub_76_cast_fp16)[name = tensor("square_38_cast_fp16")]; tensor reduce_mean_116_axes_0 = const()[name = tensor("reduce_mean_116_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_116_keep_dims_0 = const()[name = tensor("reduce_mean_116_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_116_cast_fp16 = reduce_mean(axes = reduce_mean_116_axes_0, keep_dims = reduce_mean_116_keep_dims_0, x = square_38_cast_fp16)[name = tensor("reduce_mean_116_cast_fp16")]; tensor add_76_y_0_to_fp16 = const()[name = tensor("add_76_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_76_cast_fp16 = add(x = reduce_mean_116_cast_fp16, y = add_76_y_0_to_fp16)[name = tensor("add_76_cast_fp16")]; tensor sqrt_38_cast_fp16 = sqrt(x = add_76_cast_fp16)[name = tensor("sqrt_38_cast_fp16")]; tensor real_div_38_cast_fp16 = real_div(x = sub_76_cast_fp16, y = sqrt_38_cast_fp16)[name = tensor("real_div_38_cast_fp16")]; tensor reshape_153_shape_0 = const()[name = tensor("reshape_153_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_153_cast_fp16 = reshape(shape = reshape_153_shape_0, x = real_div_38_cast_fp16)[name = tensor("reshape_153_cast_fp16")]; tensor add_77_gamma_0_to_fp16 = const()[name = tensor("add_77_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1288134464)))]; tensor add_77_beta_0_to_fp16 = const()[name = tensor("add_77_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1288137088)))]; tensor add_77_epsilon_0_to_fp16 = const()[name = tensor("add_77_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_77_cast_fp16 = batch_norm(beta = add_77_beta_0_to_fp16, epsilon = add_77_epsilon_0_to_fp16, gamma = add_77_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_153_cast_fp16)[name = tensor("add_77_cast_fp16")]; tensor var_7137 = const()[name = tensor("op_7137"), val = tensor([1, 1])]; tensor var_7139 = const()[name = tensor("op_7139"), val = tensor([1, 1])]; tensor hidden_states_185_pad_type_0 = const()[name = tensor("hidden_states_185_pad_type_0"), val = tensor("custom")]; tensor hidden_states_185_pad_0 = const()[name = tensor("hidden_states_185_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1288139712)))]; tensor up_blocks_1_attentions_1_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1291416576)))]; tensor hidden_states_185_cast_fp16 = conv(bias = up_blocks_1_attentions_1_proj_in_bias_to_fp16, dilations = var_7139, groups = var_6522, pad = hidden_states_185_pad_0, pad_type = hidden_states_185_pad_type_0, strides = var_7137, weight = up_blocks_1_attentions_1_proj_in_weight_to_fp16, x = add_77_cast_fp16)[name = tensor("hidden_states_185_cast_fp16")]; tensor var_7144 = const()[name = tensor("op_7144"), val = tensor([2, 1280, 1, 256])]; tensor inputs_49_cast_fp16 = reshape(shape = var_7144, x = hidden_states_185_cast_fp16)[name = tensor("inputs_49_cast_fp16")]; tensor hidden_states_187_axes_0 = const()[name = tensor("hidden_states_187_axes_0"), val = tensor([1])]; tensor hidden_states_187_gamma_0_to_fp16 = const()[name = tensor("hidden_states_187_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1291419200)))]; tensor hidden_states_187_beta_0_to_fp16 = const()[name = tensor("hidden_states_187_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1291421824)))]; tensor var_7160_to_fp16 = const()[name = tensor("op_7160_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_187_cast_fp16 = layer_norm(axes = hidden_states_187_axes_0, beta = hidden_states_187_beta_0_to_fp16, epsilon = var_7160_to_fp16, gamma = hidden_states_187_gamma_0_to_fp16, x = inputs_49_cast_fp16)[name = tensor("hidden_states_187_cast_fp16")]; tensor var_7175 = const()[name = tensor("op_7175"), val = tensor([1, 1])]; tensor var_7177 = const()[name = tensor("op_7177"), val = tensor([1, 1])]; tensor q_33_pad_type_0 = const()[name = tensor("q_33_pad_type_0"), val = tensor("custom")]; tensor q_33_pad_0 = const()[name = tensor("q_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1291424448)))]; tensor q_33_cast_fp16 = conv(dilations = var_7177, groups = var_6522, pad = q_33_pad_0, pad_type = q_33_pad_type_0, strides = var_7175, weight = up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_187_cast_fp16)[name = tensor("q_33_cast_fp16")]; tensor var_7181 = const()[name = tensor("op_7181"), val = tensor([1, 1])]; tensor var_7183 = const()[name = tensor("op_7183"), val = tensor([1, 1])]; tensor k_65_pad_type_0 = const()[name = tensor("k_65_pad_type_0"), val = tensor("custom")]; tensor k_65_pad_0 = const()[name = tensor("k_65_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1294701312)))]; tensor k_65_cast_fp16 = conv(dilations = var_7183, groups = var_6522, pad = k_65_pad_0, pad_type = k_65_pad_type_0, strides = var_7181, weight = up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_187_cast_fp16)[name = tensor("k_65_cast_fp16")]; tensor var_7187 = const()[name = tensor("op_7187"), val = tensor([1, 1])]; tensor var_7189 = const()[name = tensor("op_7189"), val = tensor([1, 1])]; tensor v_33_pad_type_0 = const()[name = tensor("v_33_pad_type_0"), val = tensor("custom")]; tensor v_33_pad_0 = const()[name = tensor("v_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1297978176)))]; tensor v_33_cast_fp16 = conv(dilations = var_7189, groups = var_6522, pad = v_33_pad_0, pad_type = v_33_pad_type_0, strides = var_7187, weight = up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_187_cast_fp16)[name = tensor("v_33_cast_fp16")]; tensor var_7193_begin_0 = const()[name = tensor("op_7193_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7193_end_0 = const()[name = tensor("op_7193_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_7193_end_mask_0 = const()[name = tensor("op_7193_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7193_cast_fp16 = slice_by_index(begin = var_7193_begin_0, end = var_7193_end_0, end_mask = var_7193_end_mask_0, x = q_33_cast_fp16)[name = tensor("op_7193_cast_fp16")]; tensor var_7197_begin_0 = const()[name = tensor("op_7197_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_7197_end_0 = const()[name = tensor("op_7197_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_7197_end_mask_0 = const()[name = tensor("op_7197_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7197_cast_fp16 = slice_by_index(begin = var_7197_begin_0, end = var_7197_end_0, end_mask = var_7197_end_mask_0, x = q_33_cast_fp16)[name = tensor("op_7197_cast_fp16")]; tensor var_7201_begin_0 = const()[name = tensor("op_7201_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7201_end_0 = const()[name = tensor("op_7201_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_7201_end_mask_0 = const()[name = tensor("op_7201_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7201_cast_fp16 = slice_by_index(begin = var_7201_begin_0, end = var_7201_end_0, end_mask = var_7201_end_mask_0, x = q_33_cast_fp16)[name = tensor("op_7201_cast_fp16")]; tensor var_7205_begin_0 = const()[name = tensor("op_7205_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_7205_end_0 = const()[name = tensor("op_7205_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_7205_end_mask_0 = const()[name = tensor("op_7205_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7205_cast_fp16 = slice_by_index(begin = var_7205_begin_0, end = var_7205_end_0, end_mask = var_7205_end_mask_0, x = q_33_cast_fp16)[name = tensor("op_7205_cast_fp16")]; tensor var_7209_begin_0 = const()[name = tensor("op_7209_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7209_end_0 = const()[name = tensor("op_7209_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_7209_end_mask_0 = const()[name = tensor("op_7209_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7209_cast_fp16 = slice_by_index(begin = var_7209_begin_0, end = var_7209_end_0, end_mask = var_7209_end_mask_0, x = q_33_cast_fp16)[name = tensor("op_7209_cast_fp16")]; tensor var_7213_begin_0 = const()[name = tensor("op_7213_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_7213_end_0 = const()[name = tensor("op_7213_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_7213_end_mask_0 = const()[name = tensor("op_7213_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7213_cast_fp16 = slice_by_index(begin = var_7213_begin_0, end = var_7213_end_0, end_mask = var_7213_end_mask_0, x = q_33_cast_fp16)[name = tensor("op_7213_cast_fp16")]; tensor var_7217_begin_0 = const()[name = tensor("op_7217_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_7217_end_0 = const()[name = tensor("op_7217_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_7217_end_mask_0 = const()[name = tensor("op_7217_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7217_cast_fp16 = slice_by_index(begin = var_7217_begin_0, end = var_7217_end_0, end_mask = var_7217_end_mask_0, x = q_33_cast_fp16)[name = tensor("op_7217_cast_fp16")]; tensor var_7221_begin_0 = const()[name = tensor("op_7221_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_7221_end_0 = const()[name = tensor("op_7221_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_7221_end_mask_0 = const()[name = tensor("op_7221_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7221_cast_fp16 = slice_by_index(begin = var_7221_begin_0, end = var_7221_end_0, end_mask = var_7221_end_mask_0, x = q_33_cast_fp16)[name = tensor("op_7221_cast_fp16")]; tensor k_67_perm_0 = const()[name = tensor("k_67_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_7228_begin_0 = const()[name = tensor("op_7228_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7228_end_0 = const()[name = tensor("op_7228_end_0"), val = tensor([2, 256, 1, 160])]; tensor var_7228_end_mask_0 = const()[name = tensor("op_7228_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_15 = transpose(perm = k_67_perm_0, x = k_65_cast_fp16)[name = tensor("transpose_15")]; tensor var_7228_cast_fp16 = slice_by_index(begin = var_7228_begin_0, end = var_7228_end_0, end_mask = var_7228_end_mask_0, x = transpose_15)[name = tensor("op_7228_cast_fp16")]; tensor var_7232_begin_0 = const()[name = tensor("op_7232_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_7232_end_0 = const()[name = tensor("op_7232_end_0"), val = tensor([2, 256, 1, 320])]; tensor var_7232_end_mask_0 = const()[name = tensor("op_7232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7232_cast_fp16 = slice_by_index(begin = var_7232_begin_0, end = var_7232_end_0, end_mask = var_7232_end_mask_0, x = transpose_15)[name = tensor("op_7232_cast_fp16")]; tensor var_7236_begin_0 = const()[name = tensor("op_7236_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_7236_end_0 = const()[name = tensor("op_7236_end_0"), val = tensor([2, 256, 1, 480])]; tensor var_7236_end_mask_0 = const()[name = tensor("op_7236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7236_cast_fp16 = slice_by_index(begin = var_7236_begin_0, end = var_7236_end_0, end_mask = var_7236_end_mask_0, x = transpose_15)[name = tensor("op_7236_cast_fp16")]; tensor var_7240_begin_0 = const()[name = tensor("op_7240_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_7240_end_0 = const()[name = tensor("op_7240_end_0"), val = tensor([2, 256, 1, 640])]; tensor var_7240_end_mask_0 = const()[name = tensor("op_7240_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7240_cast_fp16 = slice_by_index(begin = var_7240_begin_0, end = var_7240_end_0, end_mask = var_7240_end_mask_0, x = transpose_15)[name = tensor("op_7240_cast_fp16")]; tensor var_7244_begin_0 = const()[name = tensor("op_7244_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_7244_end_0 = const()[name = tensor("op_7244_end_0"), val = tensor([2, 256, 1, 800])]; tensor var_7244_end_mask_0 = const()[name = tensor("op_7244_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7244_cast_fp16 = slice_by_index(begin = var_7244_begin_0, end = var_7244_end_0, end_mask = var_7244_end_mask_0, x = transpose_15)[name = tensor("op_7244_cast_fp16")]; tensor var_7248_begin_0 = const()[name = tensor("op_7248_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_7248_end_0 = const()[name = tensor("op_7248_end_0"), val = tensor([2, 256, 1, 960])]; tensor var_7248_end_mask_0 = const()[name = tensor("op_7248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7248_cast_fp16 = slice_by_index(begin = var_7248_begin_0, end = var_7248_end_0, end_mask = var_7248_end_mask_0, x = transpose_15)[name = tensor("op_7248_cast_fp16")]; tensor var_7252_begin_0 = const()[name = tensor("op_7252_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_7252_end_0 = const()[name = tensor("op_7252_end_0"), val = tensor([2, 256, 1, 1120])]; tensor var_7252_end_mask_0 = const()[name = tensor("op_7252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7252_cast_fp16 = slice_by_index(begin = var_7252_begin_0, end = var_7252_end_0, end_mask = var_7252_end_mask_0, x = transpose_15)[name = tensor("op_7252_cast_fp16")]; tensor var_7256_begin_0 = const()[name = tensor("op_7256_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_7256_end_0 = const()[name = tensor("op_7256_end_0"), val = tensor([2, 256, 1, 1280])]; tensor var_7256_end_mask_0 = const()[name = tensor("op_7256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7256_cast_fp16 = slice_by_index(begin = var_7256_begin_0, end = var_7256_end_0, end_mask = var_7256_end_mask_0, x = transpose_15)[name = tensor("op_7256_cast_fp16")]; tensor var_7258_begin_0 = const()[name = tensor("op_7258_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7258_end_0 = const()[name = tensor("op_7258_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_7258_end_mask_0 = const()[name = tensor("op_7258_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7258_cast_fp16 = slice_by_index(begin = var_7258_begin_0, end = var_7258_end_0, end_mask = var_7258_end_mask_0, x = v_33_cast_fp16)[name = tensor("op_7258_cast_fp16")]; tensor var_7262_begin_0 = const()[name = tensor("op_7262_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_7262_end_0 = const()[name = tensor("op_7262_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_7262_end_mask_0 = const()[name = tensor("op_7262_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7262_cast_fp16 = slice_by_index(begin = var_7262_begin_0, end = var_7262_end_0, end_mask = var_7262_end_mask_0, x = v_33_cast_fp16)[name = tensor("op_7262_cast_fp16")]; tensor var_7266_begin_0 = const()[name = tensor("op_7266_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7266_end_0 = const()[name = tensor("op_7266_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_7266_end_mask_0 = const()[name = tensor("op_7266_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7266_cast_fp16 = slice_by_index(begin = var_7266_begin_0, end = var_7266_end_0, end_mask = var_7266_end_mask_0, x = v_33_cast_fp16)[name = tensor("op_7266_cast_fp16")]; tensor var_7270_begin_0 = const()[name = tensor("op_7270_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_7270_end_0 = const()[name = tensor("op_7270_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_7270_end_mask_0 = const()[name = tensor("op_7270_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7270_cast_fp16 = slice_by_index(begin = var_7270_begin_0, end = var_7270_end_0, end_mask = var_7270_end_mask_0, x = v_33_cast_fp16)[name = tensor("op_7270_cast_fp16")]; tensor var_7274_begin_0 = const()[name = tensor("op_7274_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7274_end_0 = const()[name = tensor("op_7274_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_7274_end_mask_0 = const()[name = tensor("op_7274_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7274_cast_fp16 = slice_by_index(begin = var_7274_begin_0, end = var_7274_end_0, end_mask = var_7274_end_mask_0, x = v_33_cast_fp16)[name = tensor("op_7274_cast_fp16")]; tensor var_7278_begin_0 = const()[name = tensor("op_7278_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_7278_end_0 = const()[name = tensor("op_7278_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_7278_end_mask_0 = const()[name = tensor("op_7278_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7278_cast_fp16 = slice_by_index(begin = var_7278_begin_0, end = var_7278_end_0, end_mask = var_7278_end_mask_0, x = v_33_cast_fp16)[name = tensor("op_7278_cast_fp16")]; tensor var_7282_begin_0 = const()[name = tensor("op_7282_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_7282_end_0 = const()[name = tensor("op_7282_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_7282_end_mask_0 = const()[name = tensor("op_7282_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7282_cast_fp16 = slice_by_index(begin = var_7282_begin_0, end = var_7282_end_0, end_mask = var_7282_end_mask_0, x = v_33_cast_fp16)[name = tensor("op_7282_cast_fp16")]; tensor var_7286_begin_0 = const()[name = tensor("op_7286_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_7286_end_0 = const()[name = tensor("op_7286_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_7286_end_mask_0 = const()[name = tensor("op_7286_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7286_cast_fp16 = slice_by_index(begin = var_7286_begin_0, end = var_7286_end_0, end_mask = var_7286_end_mask_0, x = v_33_cast_fp16)[name = tensor("op_7286_cast_fp16")]; tensor var_7290_equation_0 = const()[name = tensor("op_7290_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7290_cast_fp16 = einsum(equation = var_7290_equation_0, values = (var_7228_cast_fp16, var_7193_cast_fp16))[name = tensor("op_7290_cast_fp16")]; tensor var_7291_to_fp16 = const()[name = tensor("op_7291_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_129_cast_fp16 = mul(x = var_7290_cast_fp16, y = var_7291_to_fp16)[name = tensor("aw_129_cast_fp16")]; tensor var_7294_equation_0 = const()[name = tensor("op_7294_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7294_cast_fp16 = einsum(equation = var_7294_equation_0, values = (var_7232_cast_fp16, var_7197_cast_fp16))[name = tensor("op_7294_cast_fp16")]; tensor var_7295_to_fp16 = const()[name = tensor("op_7295_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_131_cast_fp16 = mul(x = var_7294_cast_fp16, y = var_7295_to_fp16)[name = tensor("aw_131_cast_fp16")]; tensor var_7298_equation_0 = const()[name = tensor("op_7298_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7298_cast_fp16 = einsum(equation = var_7298_equation_0, values = (var_7236_cast_fp16, var_7201_cast_fp16))[name = tensor("op_7298_cast_fp16")]; tensor var_7299_to_fp16 = const()[name = tensor("op_7299_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_133_cast_fp16 = mul(x = var_7298_cast_fp16, y = var_7299_to_fp16)[name = tensor("aw_133_cast_fp16")]; tensor var_7302_equation_0 = const()[name = tensor("op_7302_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7302_cast_fp16 = einsum(equation = var_7302_equation_0, values = (var_7240_cast_fp16, var_7205_cast_fp16))[name = tensor("op_7302_cast_fp16")]; tensor var_7303_to_fp16 = const()[name = tensor("op_7303_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_135_cast_fp16 = mul(x = var_7302_cast_fp16, y = var_7303_to_fp16)[name = tensor("aw_135_cast_fp16")]; tensor var_7306_equation_0 = const()[name = tensor("op_7306_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7306_cast_fp16 = einsum(equation = var_7306_equation_0, values = (var_7244_cast_fp16, var_7209_cast_fp16))[name = tensor("op_7306_cast_fp16")]; tensor var_7307_to_fp16 = const()[name = tensor("op_7307_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_137_cast_fp16 = mul(x = var_7306_cast_fp16, y = var_7307_to_fp16)[name = tensor("aw_137_cast_fp16")]; tensor var_7310_equation_0 = const()[name = tensor("op_7310_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7310_cast_fp16 = einsum(equation = var_7310_equation_0, values = (var_7248_cast_fp16, var_7213_cast_fp16))[name = tensor("op_7310_cast_fp16")]; tensor var_7311_to_fp16 = const()[name = tensor("op_7311_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_139_cast_fp16 = mul(x = var_7310_cast_fp16, y = var_7311_to_fp16)[name = tensor("aw_139_cast_fp16")]; tensor var_7314_equation_0 = const()[name = tensor("op_7314_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7314_cast_fp16 = einsum(equation = var_7314_equation_0, values = (var_7252_cast_fp16, var_7217_cast_fp16))[name = tensor("op_7314_cast_fp16")]; tensor var_7315_to_fp16 = const()[name = tensor("op_7315_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_141_cast_fp16 = mul(x = var_7314_cast_fp16, y = var_7315_to_fp16)[name = tensor("aw_141_cast_fp16")]; tensor var_7318_equation_0 = const()[name = tensor("op_7318_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7318_cast_fp16 = einsum(equation = var_7318_equation_0, values = (var_7256_cast_fp16, var_7221_cast_fp16))[name = tensor("op_7318_cast_fp16")]; tensor var_7319_to_fp16 = const()[name = tensor("op_7319_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_143_cast_fp16 = mul(x = var_7318_cast_fp16, y = var_7319_to_fp16)[name = tensor("aw_143_cast_fp16")]; tensor var_7321_cast_fp16 = softmax(axis = var_6522, x = aw_129_cast_fp16)[name = tensor("op_7321_cast_fp16")]; tensor var_7322_cast_fp16 = softmax(axis = var_6522, x = aw_131_cast_fp16)[name = tensor("op_7322_cast_fp16")]; tensor var_7323_cast_fp16 = softmax(axis = var_6522, x = aw_133_cast_fp16)[name = tensor("op_7323_cast_fp16")]; tensor var_7324_cast_fp16 = softmax(axis = var_6522, x = aw_135_cast_fp16)[name = tensor("op_7324_cast_fp16")]; tensor var_7325_cast_fp16 = softmax(axis = var_6522, x = aw_137_cast_fp16)[name = tensor("op_7325_cast_fp16")]; tensor var_7326_cast_fp16 = softmax(axis = var_6522, x = aw_139_cast_fp16)[name = tensor("op_7326_cast_fp16")]; tensor var_7327_cast_fp16 = softmax(axis = var_6522, x = aw_141_cast_fp16)[name = tensor("op_7327_cast_fp16")]; tensor var_7328_cast_fp16 = softmax(axis = var_6522, x = aw_143_cast_fp16)[name = tensor("op_7328_cast_fp16")]; tensor var_7330_equation_0 = const()[name = tensor("op_7330_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7330_cast_fp16 = einsum(equation = var_7330_equation_0, values = (var_7258_cast_fp16, var_7321_cast_fp16))[name = tensor("op_7330_cast_fp16")]; tensor var_7332_equation_0 = const()[name = tensor("op_7332_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7332_cast_fp16 = einsum(equation = var_7332_equation_0, values = (var_7262_cast_fp16, var_7322_cast_fp16))[name = tensor("op_7332_cast_fp16")]; tensor var_7334_equation_0 = const()[name = tensor("op_7334_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7334_cast_fp16 = einsum(equation = var_7334_equation_0, values = (var_7266_cast_fp16, var_7323_cast_fp16))[name = tensor("op_7334_cast_fp16")]; tensor var_7336_equation_0 = const()[name = tensor("op_7336_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7336_cast_fp16 = einsum(equation = var_7336_equation_0, values = (var_7270_cast_fp16, var_7324_cast_fp16))[name = tensor("op_7336_cast_fp16")]; tensor var_7338_equation_0 = const()[name = tensor("op_7338_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7338_cast_fp16 = einsum(equation = var_7338_equation_0, values = (var_7274_cast_fp16, var_7325_cast_fp16))[name = tensor("op_7338_cast_fp16")]; tensor var_7340_equation_0 = const()[name = tensor("op_7340_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7340_cast_fp16 = einsum(equation = var_7340_equation_0, values = (var_7278_cast_fp16, var_7326_cast_fp16))[name = tensor("op_7340_cast_fp16")]; tensor var_7342_equation_0 = const()[name = tensor("op_7342_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7342_cast_fp16 = einsum(equation = var_7342_equation_0, values = (var_7282_cast_fp16, var_7327_cast_fp16))[name = tensor("op_7342_cast_fp16")]; tensor var_7344_equation_0 = const()[name = tensor("op_7344_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7344_cast_fp16 = einsum(equation = var_7344_equation_0, values = (var_7286_cast_fp16, var_7328_cast_fp16))[name = tensor("op_7344_cast_fp16")]; tensor input_327_interleave_0 = const()[name = tensor("input_327_interleave_0"), val = tensor(false)]; tensor input_327_cast_fp16 = concat(axis = var_6522, interleave = input_327_interleave_0, values = (var_7330_cast_fp16, var_7332_cast_fp16, var_7334_cast_fp16, var_7336_cast_fp16, var_7338_cast_fp16, var_7340_cast_fp16, var_7342_cast_fp16, var_7344_cast_fp16))[name = tensor("input_327_cast_fp16")]; tensor var_7350 = const()[name = tensor("op_7350"), val = tensor([1, 1])]; tensor var_7352 = const()[name = tensor("op_7352"), val = tensor([1, 1])]; tensor var_7354_pad_type_0 = const()[name = tensor("op_7354_pad_type_0"), val = tensor("custom")]; tensor var_7354_pad_0 = const()[name = tensor("op_7354_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1301255040)))]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1304531904)))]; tensor var_7354_cast_fp16 = conv(bias = up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_7352, groups = var_6522, pad = var_7354_pad_0, pad_type = var_7354_pad_type_0, strides = var_7350, weight = up_blocks_1_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_327_cast_fp16)[name = tensor("op_7354_cast_fp16")]; tensor inputs_51_cast_fp16 = add(x = var_7354_cast_fp16, y = inputs_49_cast_fp16)[name = tensor("inputs_51_cast_fp16")]; tensor hidden_states_189_axes_0 = const()[name = tensor("hidden_states_189_axes_0"), val = tensor([1])]; tensor hidden_states_189_gamma_0_to_fp16 = const()[name = tensor("hidden_states_189_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1304534528)))]; tensor hidden_states_189_beta_0_to_fp16 = const()[name = tensor("hidden_states_189_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1304537152)))]; tensor var_7364_to_fp16 = const()[name = tensor("op_7364_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_189_cast_fp16 = layer_norm(axes = hidden_states_189_axes_0, beta = hidden_states_189_beta_0_to_fp16, epsilon = var_7364_to_fp16, gamma = hidden_states_189_gamma_0_to_fp16, x = inputs_51_cast_fp16)[name = tensor("hidden_states_189_cast_fp16")]; tensor var_7379 = const()[name = tensor("op_7379"), val = tensor([1, 1])]; tensor var_7381 = const()[name = tensor("op_7381"), val = tensor([1, 1])]; tensor q_35_pad_type_0 = const()[name = tensor("q_35_pad_type_0"), val = tensor("custom")]; tensor q_35_pad_0 = const()[name = tensor("q_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1304539776)))]; tensor q_35_cast_fp16 = conv(dilations = var_7381, groups = var_6522, pad = q_35_pad_0, pad_type = q_35_pad_type_0, strides = var_7379, weight = up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_189_cast_fp16)[name = tensor("q_35_cast_fp16")]; tensor var_7385 = const()[name = tensor("op_7385"), val = tensor([1, 1])]; tensor var_7387 = const()[name = tensor("op_7387"), val = tensor([1, 1])]; tensor k_69_pad_type_0 = const()[name = tensor("k_69_pad_type_0"), val = tensor("custom")]; tensor k_69_pad_0 = const()[name = tensor("k_69_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1307816640)))]; tensor k_69_cast_fp16 = conv(dilations = var_7387, groups = var_6522, pad = k_69_pad_0, pad_type = k_69_pad_type_0, strides = var_7385, weight = up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_69_cast_fp16")]; tensor var_7391 = const()[name = tensor("op_7391"), val = tensor([1, 1])]; tensor var_7393 = const()[name = tensor("op_7393"), val = tensor([1, 1])]; tensor v_35_pad_type_0 = const()[name = tensor("v_35_pad_type_0"), val = tensor("custom")]; tensor v_35_pad_0 = const()[name = tensor("v_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1309782784)))]; tensor v_35_cast_fp16 = conv(dilations = var_7393, groups = var_6522, pad = v_35_pad_0, pad_type = v_35_pad_type_0, strides = var_7391, weight = up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_35_cast_fp16")]; tensor var_7397_begin_0 = const()[name = tensor("op_7397_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7397_end_0 = const()[name = tensor("op_7397_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_7397_end_mask_0 = const()[name = tensor("op_7397_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7397_cast_fp16 = slice_by_index(begin = var_7397_begin_0, end = var_7397_end_0, end_mask = var_7397_end_mask_0, x = q_35_cast_fp16)[name = tensor("op_7397_cast_fp16")]; tensor var_7401_begin_0 = const()[name = tensor("op_7401_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_7401_end_0 = const()[name = tensor("op_7401_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_7401_end_mask_0 = const()[name = tensor("op_7401_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7401_cast_fp16 = slice_by_index(begin = var_7401_begin_0, end = var_7401_end_0, end_mask = var_7401_end_mask_0, x = q_35_cast_fp16)[name = tensor("op_7401_cast_fp16")]; tensor var_7405_begin_0 = const()[name = tensor("op_7405_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7405_end_0 = const()[name = tensor("op_7405_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_7405_end_mask_0 = const()[name = tensor("op_7405_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7405_cast_fp16 = slice_by_index(begin = var_7405_begin_0, end = var_7405_end_0, end_mask = var_7405_end_mask_0, x = q_35_cast_fp16)[name = tensor("op_7405_cast_fp16")]; tensor var_7409_begin_0 = const()[name = tensor("op_7409_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_7409_end_0 = const()[name = tensor("op_7409_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_7409_end_mask_0 = const()[name = tensor("op_7409_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7409_cast_fp16 = slice_by_index(begin = var_7409_begin_0, end = var_7409_end_0, end_mask = var_7409_end_mask_0, x = q_35_cast_fp16)[name = tensor("op_7409_cast_fp16")]; tensor var_7413_begin_0 = const()[name = tensor("op_7413_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7413_end_0 = const()[name = tensor("op_7413_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_7413_end_mask_0 = const()[name = tensor("op_7413_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7413_cast_fp16 = slice_by_index(begin = var_7413_begin_0, end = var_7413_end_0, end_mask = var_7413_end_mask_0, x = q_35_cast_fp16)[name = tensor("op_7413_cast_fp16")]; tensor var_7417_begin_0 = const()[name = tensor("op_7417_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_7417_end_0 = const()[name = tensor("op_7417_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_7417_end_mask_0 = const()[name = tensor("op_7417_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7417_cast_fp16 = slice_by_index(begin = var_7417_begin_0, end = var_7417_end_0, end_mask = var_7417_end_mask_0, x = q_35_cast_fp16)[name = tensor("op_7417_cast_fp16")]; tensor var_7421_begin_0 = const()[name = tensor("op_7421_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_7421_end_0 = const()[name = tensor("op_7421_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_7421_end_mask_0 = const()[name = tensor("op_7421_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7421_cast_fp16 = slice_by_index(begin = var_7421_begin_0, end = var_7421_end_0, end_mask = var_7421_end_mask_0, x = q_35_cast_fp16)[name = tensor("op_7421_cast_fp16")]; tensor var_7425_begin_0 = const()[name = tensor("op_7425_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_7425_end_0 = const()[name = tensor("op_7425_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_7425_end_mask_0 = const()[name = tensor("op_7425_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7425_cast_fp16 = slice_by_index(begin = var_7425_begin_0, end = var_7425_end_0, end_mask = var_7425_end_mask_0, x = q_35_cast_fp16)[name = tensor("op_7425_cast_fp16")]; tensor k_71_perm_0 = const()[name = tensor("k_71_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_7432_begin_0 = const()[name = tensor("op_7432_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7432_end_0 = const()[name = tensor("op_7432_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_7432_end_mask_0 = const()[name = tensor("op_7432_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_14 = transpose(perm = k_71_perm_0, x = k_69_cast_fp16)[name = tensor("transpose_14")]; tensor var_7432_cast_fp16 = slice_by_index(begin = var_7432_begin_0, end = var_7432_end_0, end_mask = var_7432_end_mask_0, x = transpose_14)[name = tensor("op_7432_cast_fp16")]; tensor var_7436_begin_0 = const()[name = tensor("op_7436_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_7436_end_0 = const()[name = tensor("op_7436_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_7436_end_mask_0 = const()[name = tensor("op_7436_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7436_cast_fp16 = slice_by_index(begin = var_7436_begin_0, end = var_7436_end_0, end_mask = var_7436_end_mask_0, x = transpose_14)[name = tensor("op_7436_cast_fp16")]; tensor var_7440_begin_0 = const()[name = tensor("op_7440_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_7440_end_0 = const()[name = tensor("op_7440_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_7440_end_mask_0 = const()[name = tensor("op_7440_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7440_cast_fp16 = slice_by_index(begin = var_7440_begin_0, end = var_7440_end_0, end_mask = var_7440_end_mask_0, x = transpose_14)[name = tensor("op_7440_cast_fp16")]; tensor var_7444_begin_0 = const()[name = tensor("op_7444_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_7444_end_0 = const()[name = tensor("op_7444_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_7444_end_mask_0 = const()[name = tensor("op_7444_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7444_cast_fp16 = slice_by_index(begin = var_7444_begin_0, end = var_7444_end_0, end_mask = var_7444_end_mask_0, x = transpose_14)[name = tensor("op_7444_cast_fp16")]; tensor var_7448_begin_0 = const()[name = tensor("op_7448_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_7448_end_0 = const()[name = tensor("op_7448_end_0"), val = tensor([2, 77, 1, 800])]; tensor var_7448_end_mask_0 = const()[name = tensor("op_7448_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7448_cast_fp16 = slice_by_index(begin = var_7448_begin_0, end = var_7448_end_0, end_mask = var_7448_end_mask_0, x = transpose_14)[name = tensor("op_7448_cast_fp16")]; tensor var_7452_begin_0 = const()[name = tensor("op_7452_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_7452_end_0 = const()[name = tensor("op_7452_end_0"), val = tensor([2, 77, 1, 960])]; tensor var_7452_end_mask_0 = const()[name = tensor("op_7452_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7452_cast_fp16 = slice_by_index(begin = var_7452_begin_0, end = var_7452_end_0, end_mask = var_7452_end_mask_0, x = transpose_14)[name = tensor("op_7452_cast_fp16")]; tensor var_7456_begin_0 = const()[name = tensor("op_7456_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_7456_end_0 = const()[name = tensor("op_7456_end_0"), val = tensor([2, 77, 1, 1120])]; tensor var_7456_end_mask_0 = const()[name = tensor("op_7456_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7456_cast_fp16 = slice_by_index(begin = var_7456_begin_0, end = var_7456_end_0, end_mask = var_7456_end_mask_0, x = transpose_14)[name = tensor("op_7456_cast_fp16")]; tensor var_7460_begin_0 = const()[name = tensor("op_7460_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_7460_end_0 = const()[name = tensor("op_7460_end_0"), val = tensor([2, 77, 1, 1280])]; tensor var_7460_end_mask_0 = const()[name = tensor("op_7460_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7460_cast_fp16 = slice_by_index(begin = var_7460_begin_0, end = var_7460_end_0, end_mask = var_7460_end_mask_0, x = transpose_14)[name = tensor("op_7460_cast_fp16")]; tensor var_7462_begin_0 = const()[name = tensor("op_7462_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7462_end_0 = const()[name = tensor("op_7462_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_7462_end_mask_0 = const()[name = tensor("op_7462_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7462_cast_fp16 = slice_by_index(begin = var_7462_begin_0, end = var_7462_end_0, end_mask = var_7462_end_mask_0, x = v_35_cast_fp16)[name = tensor("op_7462_cast_fp16")]; tensor var_7466_begin_0 = const()[name = tensor("op_7466_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_7466_end_0 = const()[name = tensor("op_7466_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_7466_end_mask_0 = const()[name = tensor("op_7466_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7466_cast_fp16 = slice_by_index(begin = var_7466_begin_0, end = var_7466_end_0, end_mask = var_7466_end_mask_0, x = v_35_cast_fp16)[name = tensor("op_7466_cast_fp16")]; tensor var_7470_begin_0 = const()[name = tensor("op_7470_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7470_end_0 = const()[name = tensor("op_7470_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_7470_end_mask_0 = const()[name = tensor("op_7470_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7470_cast_fp16 = slice_by_index(begin = var_7470_begin_0, end = var_7470_end_0, end_mask = var_7470_end_mask_0, x = v_35_cast_fp16)[name = tensor("op_7470_cast_fp16")]; tensor var_7474_begin_0 = const()[name = tensor("op_7474_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_7474_end_0 = const()[name = tensor("op_7474_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_7474_end_mask_0 = const()[name = tensor("op_7474_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7474_cast_fp16 = slice_by_index(begin = var_7474_begin_0, end = var_7474_end_0, end_mask = var_7474_end_mask_0, x = v_35_cast_fp16)[name = tensor("op_7474_cast_fp16")]; tensor var_7478_begin_0 = const()[name = tensor("op_7478_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7478_end_0 = const()[name = tensor("op_7478_end_0"), val = tensor([2, 800, 1, 77])]; tensor var_7478_end_mask_0 = const()[name = tensor("op_7478_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7478_cast_fp16 = slice_by_index(begin = var_7478_begin_0, end = var_7478_end_0, end_mask = var_7478_end_mask_0, x = v_35_cast_fp16)[name = tensor("op_7478_cast_fp16")]; tensor var_7482_begin_0 = const()[name = tensor("op_7482_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_7482_end_0 = const()[name = tensor("op_7482_end_0"), val = tensor([2, 960, 1, 77])]; tensor var_7482_end_mask_0 = const()[name = tensor("op_7482_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7482_cast_fp16 = slice_by_index(begin = var_7482_begin_0, end = var_7482_end_0, end_mask = var_7482_end_mask_0, x = v_35_cast_fp16)[name = tensor("op_7482_cast_fp16")]; tensor var_7486_begin_0 = const()[name = tensor("op_7486_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_7486_end_0 = const()[name = tensor("op_7486_end_0"), val = tensor([2, 1120, 1, 77])]; tensor var_7486_end_mask_0 = const()[name = tensor("op_7486_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7486_cast_fp16 = slice_by_index(begin = var_7486_begin_0, end = var_7486_end_0, end_mask = var_7486_end_mask_0, x = v_35_cast_fp16)[name = tensor("op_7486_cast_fp16")]; tensor var_7490_begin_0 = const()[name = tensor("op_7490_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_7490_end_0 = const()[name = tensor("op_7490_end_0"), val = tensor([2, 1280, 1, 77])]; tensor var_7490_end_mask_0 = const()[name = tensor("op_7490_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7490_cast_fp16 = slice_by_index(begin = var_7490_begin_0, end = var_7490_end_0, end_mask = var_7490_end_mask_0, x = v_35_cast_fp16)[name = tensor("op_7490_cast_fp16")]; tensor var_7494_equation_0 = const()[name = tensor("op_7494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7494_cast_fp16 = einsum(equation = var_7494_equation_0, values = (var_7432_cast_fp16, var_7397_cast_fp16))[name = tensor("op_7494_cast_fp16")]; tensor var_7495_to_fp16 = const()[name = tensor("op_7495_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_145_cast_fp16 = mul(x = var_7494_cast_fp16, y = var_7495_to_fp16)[name = tensor("aw_145_cast_fp16")]; tensor var_7498_equation_0 = const()[name = tensor("op_7498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7498_cast_fp16 = einsum(equation = var_7498_equation_0, values = (var_7436_cast_fp16, var_7401_cast_fp16))[name = tensor("op_7498_cast_fp16")]; tensor var_7499_to_fp16 = const()[name = tensor("op_7499_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_147_cast_fp16 = mul(x = var_7498_cast_fp16, y = var_7499_to_fp16)[name = tensor("aw_147_cast_fp16")]; tensor var_7502_equation_0 = const()[name = tensor("op_7502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7502_cast_fp16 = einsum(equation = var_7502_equation_0, values = (var_7440_cast_fp16, var_7405_cast_fp16))[name = tensor("op_7502_cast_fp16")]; tensor var_7503_to_fp16 = const()[name = tensor("op_7503_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_149_cast_fp16 = mul(x = var_7502_cast_fp16, y = var_7503_to_fp16)[name = tensor("aw_149_cast_fp16")]; tensor var_7506_equation_0 = const()[name = tensor("op_7506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7506_cast_fp16 = einsum(equation = var_7506_equation_0, values = (var_7444_cast_fp16, var_7409_cast_fp16))[name = tensor("op_7506_cast_fp16")]; tensor var_7507_to_fp16 = const()[name = tensor("op_7507_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_151_cast_fp16 = mul(x = var_7506_cast_fp16, y = var_7507_to_fp16)[name = tensor("aw_151_cast_fp16")]; tensor var_7510_equation_0 = const()[name = tensor("op_7510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7510_cast_fp16 = einsum(equation = var_7510_equation_0, values = (var_7448_cast_fp16, var_7413_cast_fp16))[name = tensor("op_7510_cast_fp16")]; tensor var_7511_to_fp16 = const()[name = tensor("op_7511_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_153_cast_fp16 = mul(x = var_7510_cast_fp16, y = var_7511_to_fp16)[name = tensor("aw_153_cast_fp16")]; tensor var_7514_equation_0 = const()[name = tensor("op_7514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7514_cast_fp16 = einsum(equation = var_7514_equation_0, values = (var_7452_cast_fp16, var_7417_cast_fp16))[name = tensor("op_7514_cast_fp16")]; tensor var_7515_to_fp16 = const()[name = tensor("op_7515_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_155_cast_fp16 = mul(x = var_7514_cast_fp16, y = var_7515_to_fp16)[name = tensor("aw_155_cast_fp16")]; tensor var_7518_equation_0 = const()[name = tensor("op_7518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7518_cast_fp16 = einsum(equation = var_7518_equation_0, values = (var_7456_cast_fp16, var_7421_cast_fp16))[name = tensor("op_7518_cast_fp16")]; tensor var_7519_to_fp16 = const()[name = tensor("op_7519_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_157_cast_fp16 = mul(x = var_7518_cast_fp16, y = var_7519_to_fp16)[name = tensor("aw_157_cast_fp16")]; tensor var_7522_equation_0 = const()[name = tensor("op_7522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7522_cast_fp16 = einsum(equation = var_7522_equation_0, values = (var_7460_cast_fp16, var_7425_cast_fp16))[name = tensor("op_7522_cast_fp16")]; tensor var_7523_to_fp16 = const()[name = tensor("op_7523_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_159_cast_fp16 = mul(x = var_7522_cast_fp16, y = var_7523_to_fp16)[name = tensor("aw_159_cast_fp16")]; tensor var_7525_cast_fp16 = softmax(axis = var_6522, x = aw_145_cast_fp16)[name = tensor("op_7525_cast_fp16")]; tensor var_7526_cast_fp16 = softmax(axis = var_6522, x = aw_147_cast_fp16)[name = tensor("op_7526_cast_fp16")]; tensor var_7527_cast_fp16 = softmax(axis = var_6522, x = aw_149_cast_fp16)[name = tensor("op_7527_cast_fp16")]; tensor var_7528_cast_fp16 = softmax(axis = var_6522, x = aw_151_cast_fp16)[name = tensor("op_7528_cast_fp16")]; tensor var_7529_cast_fp16 = softmax(axis = var_6522, x = aw_153_cast_fp16)[name = tensor("op_7529_cast_fp16")]; tensor var_7530_cast_fp16 = softmax(axis = var_6522, x = aw_155_cast_fp16)[name = tensor("op_7530_cast_fp16")]; tensor var_7531_cast_fp16 = softmax(axis = var_6522, x = aw_157_cast_fp16)[name = tensor("op_7531_cast_fp16")]; tensor var_7532_cast_fp16 = softmax(axis = var_6522, x = aw_159_cast_fp16)[name = tensor("op_7532_cast_fp16")]; tensor var_7534_equation_0 = const()[name = tensor("op_7534_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7534_cast_fp16 = einsum(equation = var_7534_equation_0, values = (var_7462_cast_fp16, var_7525_cast_fp16))[name = tensor("op_7534_cast_fp16")]; tensor var_7536_equation_0 = const()[name = tensor("op_7536_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7536_cast_fp16 = einsum(equation = var_7536_equation_0, values = (var_7466_cast_fp16, var_7526_cast_fp16))[name = tensor("op_7536_cast_fp16")]; tensor var_7538_equation_0 = const()[name = tensor("op_7538_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7538_cast_fp16 = einsum(equation = var_7538_equation_0, values = (var_7470_cast_fp16, var_7527_cast_fp16))[name = tensor("op_7538_cast_fp16")]; tensor var_7540_equation_0 = const()[name = tensor("op_7540_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7540_cast_fp16 = einsum(equation = var_7540_equation_0, values = (var_7474_cast_fp16, var_7528_cast_fp16))[name = tensor("op_7540_cast_fp16")]; tensor var_7542_equation_0 = const()[name = tensor("op_7542_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7542_cast_fp16 = einsum(equation = var_7542_equation_0, values = (var_7478_cast_fp16, var_7529_cast_fp16))[name = tensor("op_7542_cast_fp16")]; tensor var_7544_equation_0 = const()[name = tensor("op_7544_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7544_cast_fp16 = einsum(equation = var_7544_equation_0, values = (var_7482_cast_fp16, var_7530_cast_fp16))[name = tensor("op_7544_cast_fp16")]; tensor var_7546_equation_0 = const()[name = tensor("op_7546_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7546_cast_fp16 = einsum(equation = var_7546_equation_0, values = (var_7486_cast_fp16, var_7531_cast_fp16))[name = tensor("op_7546_cast_fp16")]; tensor var_7548_equation_0 = const()[name = tensor("op_7548_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7548_cast_fp16 = einsum(equation = var_7548_equation_0, values = (var_7490_cast_fp16, var_7532_cast_fp16))[name = tensor("op_7548_cast_fp16")]; tensor input_329_interleave_0 = const()[name = tensor("input_329_interleave_0"), val = tensor(false)]; tensor input_329_cast_fp16 = concat(axis = var_6522, interleave = input_329_interleave_0, values = (var_7534_cast_fp16, var_7536_cast_fp16, var_7538_cast_fp16, var_7540_cast_fp16, var_7542_cast_fp16, var_7544_cast_fp16, var_7546_cast_fp16, var_7548_cast_fp16))[name = tensor("input_329_cast_fp16")]; tensor var_7554 = const()[name = tensor("op_7554"), val = tensor([1, 1])]; tensor var_7556 = const()[name = tensor("op_7556"), val = tensor([1, 1])]; tensor var_7558_pad_type_0 = const()[name = tensor("op_7558_pad_type_0"), val = tensor("custom")]; tensor var_7558_pad_0 = const()[name = tensor("op_7558_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1311748928)))]; tensor up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1315025792)))]; tensor var_7558_cast_fp16 = conv(bias = up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_7556, groups = var_6522, pad = var_7558_pad_0, pad_type = var_7558_pad_type_0, strides = var_7554, weight = up_blocks_1_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_329_cast_fp16)[name = tensor("op_7558_cast_fp16")]; tensor inputs_53_cast_fp16 = add(x = var_7558_cast_fp16, y = inputs_51_cast_fp16)[name = tensor("inputs_53_cast_fp16")]; tensor input_331_axes_0 = const()[name = tensor("input_331_axes_0"), val = tensor([1])]; tensor input_331_gamma_0_to_fp16 = const()[name = tensor("input_331_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1315028416)))]; tensor input_331_beta_0_to_fp16 = const()[name = tensor("input_331_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1315031040)))]; tensor var_7568_to_fp16 = const()[name = tensor("op_7568_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_331_cast_fp16 = layer_norm(axes = input_331_axes_0, beta = input_331_beta_0_to_fp16, epsilon = var_7568_to_fp16, gamma = input_331_gamma_0_to_fp16, x = inputs_53_cast_fp16)[name = tensor("input_331_cast_fp16")]; tensor var_7584 = const()[name = tensor("op_7584"), val = tensor([1, 1])]; tensor var_7586 = const()[name = tensor("op_7586"), val = tensor([1, 1])]; tensor var_7588_pad_type_0 = const()[name = tensor("op_7588_pad_type_0"), val = tensor("custom")]; tensor var_7588_pad_0 = const()[name = tensor("op_7588_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1315033664)))]; tensor up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1341248128)))]; tensor var_7588_cast_fp16 = conv(bias = up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_7586, groups = var_6522, pad = var_7588_pad_0, pad_type = var_7588_pad_type_0, strides = var_7584, weight = up_blocks_1_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_331_cast_fp16)[name = tensor("op_7588_cast_fp16")]; tensor var_7589_split_sizes_0 = const()[name = tensor("op_7589_split_sizes_0"), val = tensor([5120, 5120])]; tensor var_7589_axis_0 = const()[name = tensor("op_7589_axis_0"), val = tensor(1)]; tensor var_7589_cast_fp16_0, tensor var_7589_cast_fp16_1 = split(axis = var_7589_axis_0, split_sizes = var_7589_split_sizes_0, x = var_7588_cast_fp16)[name = tensor("op_7589_cast_fp16")]; tensor var_7591_mode_0 = const()[name = tensor("op_7591_mode_0"), val = tensor("EXACT")]; tensor var_7591_cast_fp16 = gelu(mode = var_7591_mode_0, x = var_7589_cast_fp16_1)[name = tensor("op_7591_cast_fp16")]; tensor input_333_cast_fp16 = mul(x = var_7589_cast_fp16_0, y = var_7591_cast_fp16)[name = tensor("input_333_cast_fp16")]; tensor var_7595 = const()[name = tensor("op_7595"), val = tensor([1, 1])]; tensor var_7597 = const()[name = tensor("op_7597"), val = tensor([1, 1])]; tensor var_7599_pad_type_0 = const()[name = tensor("op_7599_pad_type_0"), val = tensor("custom")]; tensor var_7599_pad_0 = const()[name = tensor("op_7599_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1341268672)))]; tensor up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1354375936)))]; tensor var_7599_cast_fp16 = conv(bias = up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_7597, groups = var_6522, pad = var_7599_pad_0, pad_type = var_7599_pad_type_0, strides = var_7595, weight = up_blocks_1_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_333_cast_fp16)[name = tensor("op_7599_cast_fp16")]; tensor hidden_states_193_cast_fp16 = add(x = var_7599_cast_fp16, y = inputs_53_cast_fp16)[name = tensor("hidden_states_193_cast_fp16")]; tensor var_7601 = const()[name = tensor("op_7601"), val = tensor([2, 1280, 16, 16])]; tensor input_335_cast_fp16 = reshape(shape = var_7601, x = hidden_states_193_cast_fp16)[name = tensor("input_335_cast_fp16")]; tensor var_7605 = const()[name = tensor("op_7605"), val = tensor([1, 1])]; tensor var_7607 = const()[name = tensor("op_7607"), val = tensor([1, 1])]; tensor hidden_states_195_pad_type_0 = const()[name = tensor("hidden_states_195_pad_type_0"), val = tensor("custom")]; tensor hidden_states_195_pad_0 = const()[name = tensor("hidden_states_195_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_1_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1354378560)))]; tensor up_blocks_1_attentions_1_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_1_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1357655424)))]; tensor hidden_states_195_cast_fp16 = conv(bias = up_blocks_1_attentions_1_proj_out_bias_to_fp16, dilations = var_7607, groups = var_6522, pad = hidden_states_195_pad_0, pad_type = hidden_states_195_pad_type_0, strides = var_7605, weight = up_blocks_1_attentions_1_proj_out_weight_to_fp16, x = input_335_cast_fp16)[name = tensor("hidden_states_195_cast_fp16")]; tensor hidden_states_197_cast_fp16 = add(x = hidden_states_195_cast_fp16, y = hidden_states_183_cast_fp16)[name = tensor("hidden_states_197_cast_fp16")]; tensor input_337_interleave_0 = const()[name = tensor("input_337_interleave_0"), val = tensor(false)]; tensor input_337_cast_fp16 = concat(axis = var_6522, interleave = input_337_interleave_0, values = (hidden_states_197_cast_fp16, input_117_cast_fp16))[name = tensor("input_337_cast_fp16")]; tensor reshape_156_shape_0 = const()[name = tensor("reshape_156_shape_0"), val = tensor([2, 32, 60, 16, 16])]; tensor reshape_156_cast_fp16 = reshape(shape = reshape_156_shape_0, x = input_337_cast_fp16)[name = tensor("reshape_156_cast_fp16")]; tensor reduce_mean_117_axes_0 = const()[name = tensor("reduce_mean_117_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_117_keep_dims_0 = const()[name = tensor("reduce_mean_117_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_117_cast_fp16 = reduce_mean(axes = reduce_mean_117_axes_0, keep_dims = reduce_mean_117_keep_dims_0, x = reshape_156_cast_fp16)[name = tensor("reduce_mean_117_cast_fp16")]; tensor sub_78_cast_fp16 = sub(x = reshape_156_cast_fp16, y = reduce_mean_117_cast_fp16)[name = tensor("sub_78_cast_fp16")]; tensor square_39_cast_fp16 = square(x = sub_78_cast_fp16)[name = tensor("square_39_cast_fp16")]; tensor reduce_mean_119_axes_0 = const()[name = tensor("reduce_mean_119_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_119_keep_dims_0 = const()[name = tensor("reduce_mean_119_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_119_cast_fp16 = reduce_mean(axes = reduce_mean_119_axes_0, keep_dims = reduce_mean_119_keep_dims_0, x = square_39_cast_fp16)[name = tensor("reduce_mean_119_cast_fp16")]; tensor add_78_y_0_to_fp16 = const()[name = tensor("add_78_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_78_cast_fp16 = add(x = reduce_mean_119_cast_fp16, y = add_78_y_0_to_fp16)[name = tensor("add_78_cast_fp16")]; tensor sqrt_39_cast_fp16 = sqrt(x = add_78_cast_fp16)[name = tensor("sqrt_39_cast_fp16")]; tensor real_div_39_cast_fp16 = real_div(x = sub_78_cast_fp16, y = sqrt_39_cast_fp16)[name = tensor("real_div_39_cast_fp16")]; tensor reshape_157_shape_0 = const()[name = tensor("reshape_157_shape_0"), val = tensor([2, 1920, 16, 16])]; tensor reshape_157_cast_fp16 = reshape(shape = reshape_157_shape_0, x = real_div_39_cast_fp16)[name = tensor("reshape_157_cast_fp16")]; tensor add_79_mean_0_to_fp16 = const()[name = tensor("add_79_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1357658048)))]; tensor add_79_variance_0_to_fp16 = const()[name = tensor("add_79_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1357661952)))]; tensor add_79_gamma_0_to_fp16 = const()[name = tensor("add_79_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1357665856)))]; tensor add_79_beta_0_to_fp16 = const()[name = tensor("add_79_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1357669760)))]; tensor add_79_epsilon_0_to_fp16 = const()[name = tensor("add_79_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_79_cast_fp16 = batch_norm(beta = add_79_beta_0_to_fp16, epsilon = add_79_epsilon_0_to_fp16, gamma = add_79_gamma_0_to_fp16, mean = add_79_mean_0_to_fp16, variance = add_79_variance_0_to_fp16, x = reshape_157_cast_fp16)[name = tensor("add_79_cast_fp16")]; tensor input_341_cast_fp16 = silu(x = add_79_cast_fp16)[name = tensor("input_341_cast_fp16")]; tensor var_7625 = const()[name = tensor("op_7625"), val = tensor([1, 1])]; tensor var_7627 = const()[name = tensor("op_7627"), val = tensor([1, 1])]; tensor hidden_states_199_pad_type_0 = const()[name = tensor("hidden_states_199_pad_type_0"), val = tensor("custom")]; tensor hidden_states_199_pad_0 = const()[name = tensor("hidden_states_199_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_1_resnets_2_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_2_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1357673664)))]; tensor up_blocks_1_resnets_2_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_2_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1401910528)))]; tensor hidden_states_199_cast_fp16 = conv(bias = up_blocks_1_resnets_2_conv1_bias_to_fp16, dilations = var_7627, groups = var_6522, pad = hidden_states_199_pad_0, pad_type = hidden_states_199_pad_type_0, strides = var_7625, weight = up_blocks_1_resnets_2_conv1_weight_to_fp16, x = input_341_cast_fp16)[name = tensor("hidden_states_199_cast_fp16")]; tensor var_7633 = const()[name = tensor("op_7633"), val = tensor([1, 1])]; tensor var_7635 = const()[name = tensor("op_7635"), val = tensor([1, 1])]; tensor temb_31_pad_type_0 = const()[name = tensor("temb_31_pad_type_0"), val = tensor("custom")]; tensor temb_31_pad_0 = const()[name = tensor("temb_31_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_resnets_2_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_2_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1401913152)))]; tensor up_blocks_1_resnets_2_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_2_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1405190016)))]; tensor temb_31_cast_fp16 = conv(bias = up_blocks_1_resnets_2_time_emb_proj_bias_to_fp16, dilations = var_7635, groups = var_6522, pad = temb_31_pad_0, pad_type = temb_31_pad_type_0, strides = var_7633, weight = up_blocks_1_resnets_2_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_31_cast_fp16")]; tensor input_345_cast_fp16 = add(x = hidden_states_199_cast_fp16, y = temb_31_cast_fp16)[name = tensor("input_345_cast_fp16")]; tensor reshape_160_shape_0 = const()[name = tensor("reshape_160_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_160_cast_fp16 = reshape(shape = reshape_160_shape_0, x = input_345_cast_fp16)[name = tensor("reshape_160_cast_fp16")]; tensor reduce_mean_120_axes_0 = const()[name = tensor("reduce_mean_120_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_120_keep_dims_0 = const()[name = tensor("reduce_mean_120_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_120_cast_fp16 = reduce_mean(axes = reduce_mean_120_axes_0, keep_dims = reduce_mean_120_keep_dims_0, x = reshape_160_cast_fp16)[name = tensor("reduce_mean_120_cast_fp16")]; tensor sub_80_cast_fp16 = sub(x = reshape_160_cast_fp16, y = reduce_mean_120_cast_fp16)[name = tensor("sub_80_cast_fp16")]; tensor square_40_cast_fp16 = square(x = sub_80_cast_fp16)[name = tensor("square_40_cast_fp16")]; tensor reduce_mean_122_axes_0 = const()[name = tensor("reduce_mean_122_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_122_keep_dims_0 = const()[name = tensor("reduce_mean_122_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_122_cast_fp16 = reduce_mean(axes = reduce_mean_122_axes_0, keep_dims = reduce_mean_122_keep_dims_0, x = square_40_cast_fp16)[name = tensor("reduce_mean_122_cast_fp16")]; tensor add_80_y_0_to_fp16 = const()[name = tensor("add_80_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_80_cast_fp16 = add(x = reduce_mean_122_cast_fp16, y = add_80_y_0_to_fp16)[name = tensor("add_80_cast_fp16")]; tensor sqrt_40_cast_fp16 = sqrt(x = add_80_cast_fp16)[name = tensor("sqrt_40_cast_fp16")]; tensor real_div_40_cast_fp16 = real_div(x = sub_80_cast_fp16, y = sqrt_40_cast_fp16)[name = tensor("real_div_40_cast_fp16")]; tensor reshape_161_shape_0 = const()[name = tensor("reshape_161_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_161_cast_fp16 = reshape(shape = reshape_161_shape_0, x = real_div_40_cast_fp16)[name = tensor("reshape_161_cast_fp16")]; tensor add_81_gamma_0_to_fp16 = const()[name = tensor("add_81_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1405192640)))]; tensor add_81_beta_0_to_fp16 = const()[name = tensor("add_81_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1405195264)))]; tensor add_81_epsilon_0_to_fp16 = const()[name = tensor("add_81_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_81_cast_fp16 = batch_norm(beta = add_81_beta_0_to_fp16, epsilon = add_81_epsilon_0_to_fp16, gamma = add_81_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_161_cast_fp16)[name = tensor("add_81_cast_fp16")]; tensor input_349_cast_fp16 = silu(x = add_81_cast_fp16)[name = tensor("input_349_cast_fp16")]; tensor var_7645 = const()[name = tensor("op_7645"), val = tensor([1, 1])]; tensor var_7647 = const()[name = tensor("op_7647"), val = tensor([1, 1])]; tensor hidden_states_201_pad_type_0 = const()[name = tensor("hidden_states_201_pad_type_0"), val = tensor("custom")]; tensor hidden_states_201_pad_0 = const()[name = tensor("hidden_states_201_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_1_resnets_2_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_2_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1405197888)))]; tensor up_blocks_1_resnets_2_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_2_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1434689152)))]; tensor hidden_states_201_cast_fp16 = conv(bias = up_blocks_1_resnets_2_conv2_bias_to_fp16, dilations = var_7647, groups = var_6522, pad = hidden_states_201_pad_0, pad_type = hidden_states_201_pad_type_0, strides = var_7645, weight = up_blocks_1_resnets_2_conv2_weight_to_fp16, x = input_349_cast_fp16)[name = tensor("hidden_states_201_cast_fp16")]; tensor var_7652 = const()[name = tensor("op_7652"), val = tensor([1, 1])]; tensor var_7654 = const()[name = tensor("op_7654"), val = tensor([1, 1])]; tensor x_15_pad_type_0 = const()[name = tensor("x_15_pad_type_0"), val = tensor("custom")]; tensor x_15_pad_0 = const()[name = tensor("x_15_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_resnets_2_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_1_resnets_2_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1434691776)))]; tensor up_blocks_1_resnets_2_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_1_resnets_2_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1439607040)))]; tensor x_15_cast_fp16 = conv(bias = up_blocks_1_resnets_2_conv_shortcut_bias_to_fp16, dilations = var_7654, groups = var_6522, pad = x_15_pad_0, pad_type = x_15_pad_type_0, strides = var_7652, weight = up_blocks_1_resnets_2_conv_shortcut_weight_to_fp16, x = input_337_cast_fp16)[name = tensor("x_15_cast_fp16")]; tensor hidden_states_203_cast_fp16 = add(x = x_15_cast_fp16, y = hidden_states_201_cast_fp16)[name = tensor("hidden_states_203_cast_fp16")]; tensor reshape_164_shape_0 = const()[name = tensor("reshape_164_shape_0"), val = tensor([2, 32, 40, 16, 16])]; tensor reshape_164_cast_fp16 = reshape(shape = reshape_164_shape_0, x = hidden_states_203_cast_fp16)[name = tensor("reshape_164_cast_fp16")]; tensor reduce_mean_123_axes_0 = const()[name = tensor("reduce_mean_123_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_123_keep_dims_0 = const()[name = tensor("reduce_mean_123_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_123_cast_fp16 = reduce_mean(axes = reduce_mean_123_axes_0, keep_dims = reduce_mean_123_keep_dims_0, x = reshape_164_cast_fp16)[name = tensor("reduce_mean_123_cast_fp16")]; tensor sub_82_cast_fp16 = sub(x = reshape_164_cast_fp16, y = reduce_mean_123_cast_fp16)[name = tensor("sub_82_cast_fp16")]; tensor square_41_cast_fp16 = square(x = sub_82_cast_fp16)[name = tensor("square_41_cast_fp16")]; tensor reduce_mean_125_axes_0 = const()[name = tensor("reduce_mean_125_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_125_keep_dims_0 = const()[name = tensor("reduce_mean_125_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_125_cast_fp16 = reduce_mean(axes = reduce_mean_125_axes_0, keep_dims = reduce_mean_125_keep_dims_0, x = square_41_cast_fp16)[name = tensor("reduce_mean_125_cast_fp16")]; tensor add_82_y_0_to_fp16 = const()[name = tensor("add_82_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_82_cast_fp16 = add(x = reduce_mean_125_cast_fp16, y = add_82_y_0_to_fp16)[name = tensor("add_82_cast_fp16")]; tensor sqrt_41_cast_fp16 = sqrt(x = add_82_cast_fp16)[name = tensor("sqrt_41_cast_fp16")]; tensor real_div_41_cast_fp16 = real_div(x = sub_82_cast_fp16, y = sqrt_41_cast_fp16)[name = tensor("real_div_41_cast_fp16")]; tensor reshape_165_shape_0 = const()[name = tensor("reshape_165_shape_0"), val = tensor([2, 1280, 16, 16])]; tensor reshape_165_cast_fp16 = reshape(shape = reshape_165_shape_0, x = real_div_41_cast_fp16)[name = tensor("reshape_165_cast_fp16")]; tensor add_83_gamma_0_to_fp16 = const()[name = tensor("add_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1439609664)))]; tensor add_83_beta_0_to_fp16 = const()[name = tensor("add_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1439612288)))]; tensor add_83_epsilon_0_to_fp16 = const()[name = tensor("add_83_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_83_cast_fp16 = batch_norm(beta = add_83_beta_0_to_fp16, epsilon = add_83_epsilon_0_to_fp16, gamma = add_83_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_165_cast_fp16)[name = tensor("add_83_cast_fp16")]; tensor var_7674 = const()[name = tensor("op_7674"), val = tensor([1, 1])]; tensor var_7676 = const()[name = tensor("op_7676"), val = tensor([1, 1])]; tensor hidden_states_205_pad_type_0 = const()[name = tensor("hidden_states_205_pad_type_0"), val = tensor("custom")]; tensor hidden_states_205_pad_0 = const()[name = tensor("hidden_states_205_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1439614912)))]; tensor up_blocks_1_attentions_2_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1442891776)))]; tensor hidden_states_205_cast_fp16 = conv(bias = up_blocks_1_attentions_2_proj_in_bias_to_fp16, dilations = var_7676, groups = var_6522, pad = hidden_states_205_pad_0, pad_type = hidden_states_205_pad_type_0, strides = var_7674, weight = up_blocks_1_attentions_2_proj_in_weight_to_fp16, x = add_83_cast_fp16)[name = tensor("hidden_states_205_cast_fp16")]; tensor var_7681 = const()[name = tensor("op_7681"), val = tensor([2, 1280, 1, 256])]; tensor inputs_55_cast_fp16 = reshape(shape = var_7681, x = hidden_states_205_cast_fp16)[name = tensor("inputs_55_cast_fp16")]; tensor hidden_states_207_axes_0 = const()[name = tensor("hidden_states_207_axes_0"), val = tensor([1])]; tensor hidden_states_207_gamma_0_to_fp16 = const()[name = tensor("hidden_states_207_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1442894400)))]; tensor hidden_states_207_beta_0_to_fp16 = const()[name = tensor("hidden_states_207_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1442897024)))]; tensor var_7697_to_fp16 = const()[name = tensor("op_7697_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_207_cast_fp16 = layer_norm(axes = hidden_states_207_axes_0, beta = hidden_states_207_beta_0_to_fp16, epsilon = var_7697_to_fp16, gamma = hidden_states_207_gamma_0_to_fp16, x = inputs_55_cast_fp16)[name = tensor("hidden_states_207_cast_fp16")]; tensor var_7712 = const()[name = tensor("op_7712"), val = tensor([1, 1])]; tensor var_7714 = const()[name = tensor("op_7714"), val = tensor([1, 1])]; tensor q_37_pad_type_0 = const()[name = tensor("q_37_pad_type_0"), val = tensor("custom")]; tensor q_37_pad_0 = const()[name = tensor("q_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1442899648)))]; tensor q_37_cast_fp16 = conv(dilations = var_7714, groups = var_6522, pad = q_37_pad_0, pad_type = q_37_pad_type_0, strides = var_7712, weight = up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_207_cast_fp16)[name = tensor("q_37_cast_fp16")]; tensor var_7718 = const()[name = tensor("op_7718"), val = tensor([1, 1])]; tensor var_7720 = const()[name = tensor("op_7720"), val = tensor([1, 1])]; tensor k_73_pad_type_0 = const()[name = tensor("k_73_pad_type_0"), val = tensor("custom")]; tensor k_73_pad_0 = const()[name = tensor("k_73_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1446176512)))]; tensor k_73_cast_fp16 = conv(dilations = var_7720, groups = var_6522, pad = k_73_pad_0, pad_type = k_73_pad_type_0, strides = var_7718, weight = up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_207_cast_fp16)[name = tensor("k_73_cast_fp16")]; tensor var_7724 = const()[name = tensor("op_7724"), val = tensor([1, 1])]; tensor var_7726 = const()[name = tensor("op_7726"), val = tensor([1, 1])]; tensor v_37_pad_type_0 = const()[name = tensor("v_37_pad_type_0"), val = tensor("custom")]; tensor v_37_pad_0 = const()[name = tensor("v_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1449453376)))]; tensor v_37_cast_fp16 = conv(dilations = var_7726, groups = var_6522, pad = v_37_pad_0, pad_type = v_37_pad_type_0, strides = var_7724, weight = up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_207_cast_fp16)[name = tensor("v_37_cast_fp16")]; tensor var_7730_begin_0 = const()[name = tensor("op_7730_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7730_end_0 = const()[name = tensor("op_7730_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_7730_end_mask_0 = const()[name = tensor("op_7730_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7730_cast_fp16 = slice_by_index(begin = var_7730_begin_0, end = var_7730_end_0, end_mask = var_7730_end_mask_0, x = q_37_cast_fp16)[name = tensor("op_7730_cast_fp16")]; tensor var_7734_begin_0 = const()[name = tensor("op_7734_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_7734_end_0 = const()[name = tensor("op_7734_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_7734_end_mask_0 = const()[name = tensor("op_7734_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7734_cast_fp16 = slice_by_index(begin = var_7734_begin_0, end = var_7734_end_0, end_mask = var_7734_end_mask_0, x = q_37_cast_fp16)[name = tensor("op_7734_cast_fp16")]; tensor var_7738_begin_0 = const()[name = tensor("op_7738_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7738_end_0 = const()[name = tensor("op_7738_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_7738_end_mask_0 = const()[name = tensor("op_7738_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7738_cast_fp16 = slice_by_index(begin = var_7738_begin_0, end = var_7738_end_0, end_mask = var_7738_end_mask_0, x = q_37_cast_fp16)[name = tensor("op_7738_cast_fp16")]; tensor var_7742_begin_0 = const()[name = tensor("op_7742_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_7742_end_0 = const()[name = tensor("op_7742_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_7742_end_mask_0 = const()[name = tensor("op_7742_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7742_cast_fp16 = slice_by_index(begin = var_7742_begin_0, end = var_7742_end_0, end_mask = var_7742_end_mask_0, x = q_37_cast_fp16)[name = tensor("op_7742_cast_fp16")]; tensor var_7746_begin_0 = const()[name = tensor("op_7746_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7746_end_0 = const()[name = tensor("op_7746_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_7746_end_mask_0 = const()[name = tensor("op_7746_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7746_cast_fp16 = slice_by_index(begin = var_7746_begin_0, end = var_7746_end_0, end_mask = var_7746_end_mask_0, x = q_37_cast_fp16)[name = tensor("op_7746_cast_fp16")]; tensor var_7750_begin_0 = const()[name = tensor("op_7750_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_7750_end_0 = const()[name = tensor("op_7750_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_7750_end_mask_0 = const()[name = tensor("op_7750_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7750_cast_fp16 = slice_by_index(begin = var_7750_begin_0, end = var_7750_end_0, end_mask = var_7750_end_mask_0, x = q_37_cast_fp16)[name = tensor("op_7750_cast_fp16")]; tensor var_7754_begin_0 = const()[name = tensor("op_7754_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_7754_end_0 = const()[name = tensor("op_7754_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_7754_end_mask_0 = const()[name = tensor("op_7754_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7754_cast_fp16 = slice_by_index(begin = var_7754_begin_0, end = var_7754_end_0, end_mask = var_7754_end_mask_0, x = q_37_cast_fp16)[name = tensor("op_7754_cast_fp16")]; tensor var_7758_begin_0 = const()[name = tensor("op_7758_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_7758_end_0 = const()[name = tensor("op_7758_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_7758_end_mask_0 = const()[name = tensor("op_7758_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7758_cast_fp16 = slice_by_index(begin = var_7758_begin_0, end = var_7758_end_0, end_mask = var_7758_end_mask_0, x = q_37_cast_fp16)[name = tensor("op_7758_cast_fp16")]; tensor k_75_perm_0 = const()[name = tensor("k_75_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_7765_begin_0 = const()[name = tensor("op_7765_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7765_end_0 = const()[name = tensor("op_7765_end_0"), val = tensor([2, 256, 1, 160])]; tensor var_7765_end_mask_0 = const()[name = tensor("op_7765_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_13 = transpose(perm = k_75_perm_0, x = k_73_cast_fp16)[name = tensor("transpose_13")]; tensor var_7765_cast_fp16 = slice_by_index(begin = var_7765_begin_0, end = var_7765_end_0, end_mask = var_7765_end_mask_0, x = transpose_13)[name = tensor("op_7765_cast_fp16")]; tensor var_7769_begin_0 = const()[name = tensor("op_7769_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_7769_end_0 = const()[name = tensor("op_7769_end_0"), val = tensor([2, 256, 1, 320])]; tensor var_7769_end_mask_0 = const()[name = tensor("op_7769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7769_cast_fp16 = slice_by_index(begin = var_7769_begin_0, end = var_7769_end_0, end_mask = var_7769_end_mask_0, x = transpose_13)[name = tensor("op_7769_cast_fp16")]; tensor var_7773_begin_0 = const()[name = tensor("op_7773_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_7773_end_0 = const()[name = tensor("op_7773_end_0"), val = tensor([2, 256, 1, 480])]; tensor var_7773_end_mask_0 = const()[name = tensor("op_7773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7773_cast_fp16 = slice_by_index(begin = var_7773_begin_0, end = var_7773_end_0, end_mask = var_7773_end_mask_0, x = transpose_13)[name = tensor("op_7773_cast_fp16")]; tensor var_7777_begin_0 = const()[name = tensor("op_7777_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_7777_end_0 = const()[name = tensor("op_7777_end_0"), val = tensor([2, 256, 1, 640])]; tensor var_7777_end_mask_0 = const()[name = tensor("op_7777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7777_cast_fp16 = slice_by_index(begin = var_7777_begin_0, end = var_7777_end_0, end_mask = var_7777_end_mask_0, x = transpose_13)[name = tensor("op_7777_cast_fp16")]; tensor var_7781_begin_0 = const()[name = tensor("op_7781_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_7781_end_0 = const()[name = tensor("op_7781_end_0"), val = tensor([2, 256, 1, 800])]; tensor var_7781_end_mask_0 = const()[name = tensor("op_7781_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7781_cast_fp16 = slice_by_index(begin = var_7781_begin_0, end = var_7781_end_0, end_mask = var_7781_end_mask_0, x = transpose_13)[name = tensor("op_7781_cast_fp16")]; tensor var_7785_begin_0 = const()[name = tensor("op_7785_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_7785_end_0 = const()[name = tensor("op_7785_end_0"), val = tensor([2, 256, 1, 960])]; tensor var_7785_end_mask_0 = const()[name = tensor("op_7785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7785_cast_fp16 = slice_by_index(begin = var_7785_begin_0, end = var_7785_end_0, end_mask = var_7785_end_mask_0, x = transpose_13)[name = tensor("op_7785_cast_fp16")]; tensor var_7789_begin_0 = const()[name = tensor("op_7789_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_7789_end_0 = const()[name = tensor("op_7789_end_0"), val = tensor([2, 256, 1, 1120])]; tensor var_7789_end_mask_0 = const()[name = tensor("op_7789_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7789_cast_fp16 = slice_by_index(begin = var_7789_begin_0, end = var_7789_end_0, end_mask = var_7789_end_mask_0, x = transpose_13)[name = tensor("op_7789_cast_fp16")]; tensor var_7793_begin_0 = const()[name = tensor("op_7793_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_7793_end_0 = const()[name = tensor("op_7793_end_0"), val = tensor([2, 256, 1, 1280])]; tensor var_7793_end_mask_0 = const()[name = tensor("op_7793_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7793_cast_fp16 = slice_by_index(begin = var_7793_begin_0, end = var_7793_end_0, end_mask = var_7793_end_mask_0, x = transpose_13)[name = tensor("op_7793_cast_fp16")]; tensor var_7795_begin_0 = const()[name = tensor("op_7795_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7795_end_0 = const()[name = tensor("op_7795_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_7795_end_mask_0 = const()[name = tensor("op_7795_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7795_cast_fp16 = slice_by_index(begin = var_7795_begin_0, end = var_7795_end_0, end_mask = var_7795_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_7795_cast_fp16")]; tensor var_7799_begin_0 = const()[name = tensor("op_7799_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_7799_end_0 = const()[name = tensor("op_7799_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_7799_end_mask_0 = const()[name = tensor("op_7799_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7799_cast_fp16 = slice_by_index(begin = var_7799_begin_0, end = var_7799_end_0, end_mask = var_7799_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_7799_cast_fp16")]; tensor var_7803_begin_0 = const()[name = tensor("op_7803_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7803_end_0 = const()[name = tensor("op_7803_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_7803_end_mask_0 = const()[name = tensor("op_7803_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7803_cast_fp16 = slice_by_index(begin = var_7803_begin_0, end = var_7803_end_0, end_mask = var_7803_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_7803_cast_fp16")]; tensor var_7807_begin_0 = const()[name = tensor("op_7807_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_7807_end_0 = const()[name = tensor("op_7807_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_7807_end_mask_0 = const()[name = tensor("op_7807_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7807_cast_fp16 = slice_by_index(begin = var_7807_begin_0, end = var_7807_end_0, end_mask = var_7807_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_7807_cast_fp16")]; tensor var_7811_begin_0 = const()[name = tensor("op_7811_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7811_end_0 = const()[name = tensor("op_7811_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_7811_end_mask_0 = const()[name = tensor("op_7811_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7811_cast_fp16 = slice_by_index(begin = var_7811_begin_0, end = var_7811_end_0, end_mask = var_7811_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_7811_cast_fp16")]; tensor var_7815_begin_0 = const()[name = tensor("op_7815_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_7815_end_0 = const()[name = tensor("op_7815_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_7815_end_mask_0 = const()[name = tensor("op_7815_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7815_cast_fp16 = slice_by_index(begin = var_7815_begin_0, end = var_7815_end_0, end_mask = var_7815_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_7815_cast_fp16")]; tensor var_7819_begin_0 = const()[name = tensor("op_7819_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_7819_end_0 = const()[name = tensor("op_7819_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_7819_end_mask_0 = const()[name = tensor("op_7819_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7819_cast_fp16 = slice_by_index(begin = var_7819_begin_0, end = var_7819_end_0, end_mask = var_7819_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_7819_cast_fp16")]; tensor var_7823_begin_0 = const()[name = tensor("op_7823_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_7823_end_0 = const()[name = tensor("op_7823_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_7823_end_mask_0 = const()[name = tensor("op_7823_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7823_cast_fp16 = slice_by_index(begin = var_7823_begin_0, end = var_7823_end_0, end_mask = var_7823_end_mask_0, x = v_37_cast_fp16)[name = tensor("op_7823_cast_fp16")]; tensor var_7827_equation_0 = const()[name = tensor("op_7827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7827_cast_fp16 = einsum(equation = var_7827_equation_0, values = (var_7765_cast_fp16, var_7730_cast_fp16))[name = tensor("op_7827_cast_fp16")]; tensor var_7828_to_fp16 = const()[name = tensor("op_7828_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_161_cast_fp16 = mul(x = var_7827_cast_fp16, y = var_7828_to_fp16)[name = tensor("aw_161_cast_fp16")]; tensor var_7831_equation_0 = const()[name = tensor("op_7831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7831_cast_fp16 = einsum(equation = var_7831_equation_0, values = (var_7769_cast_fp16, var_7734_cast_fp16))[name = tensor("op_7831_cast_fp16")]; tensor var_7832_to_fp16 = const()[name = tensor("op_7832_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_163_cast_fp16 = mul(x = var_7831_cast_fp16, y = var_7832_to_fp16)[name = tensor("aw_163_cast_fp16")]; tensor var_7835_equation_0 = const()[name = tensor("op_7835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7835_cast_fp16 = einsum(equation = var_7835_equation_0, values = (var_7773_cast_fp16, var_7738_cast_fp16))[name = tensor("op_7835_cast_fp16")]; tensor var_7836_to_fp16 = const()[name = tensor("op_7836_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_165_cast_fp16 = mul(x = var_7835_cast_fp16, y = var_7836_to_fp16)[name = tensor("aw_165_cast_fp16")]; tensor var_7839_equation_0 = const()[name = tensor("op_7839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7839_cast_fp16 = einsum(equation = var_7839_equation_0, values = (var_7777_cast_fp16, var_7742_cast_fp16))[name = tensor("op_7839_cast_fp16")]; tensor var_7840_to_fp16 = const()[name = tensor("op_7840_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_167_cast_fp16 = mul(x = var_7839_cast_fp16, y = var_7840_to_fp16)[name = tensor("aw_167_cast_fp16")]; tensor var_7843_equation_0 = const()[name = tensor("op_7843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7843_cast_fp16 = einsum(equation = var_7843_equation_0, values = (var_7781_cast_fp16, var_7746_cast_fp16))[name = tensor("op_7843_cast_fp16")]; tensor var_7844_to_fp16 = const()[name = tensor("op_7844_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_169_cast_fp16 = mul(x = var_7843_cast_fp16, y = var_7844_to_fp16)[name = tensor("aw_169_cast_fp16")]; tensor var_7847_equation_0 = const()[name = tensor("op_7847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7847_cast_fp16 = einsum(equation = var_7847_equation_0, values = (var_7785_cast_fp16, var_7750_cast_fp16))[name = tensor("op_7847_cast_fp16")]; tensor var_7848_to_fp16 = const()[name = tensor("op_7848_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_171_cast_fp16 = mul(x = var_7847_cast_fp16, y = var_7848_to_fp16)[name = tensor("aw_171_cast_fp16")]; tensor var_7851_equation_0 = const()[name = tensor("op_7851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7851_cast_fp16 = einsum(equation = var_7851_equation_0, values = (var_7789_cast_fp16, var_7754_cast_fp16))[name = tensor("op_7851_cast_fp16")]; tensor var_7852_to_fp16 = const()[name = tensor("op_7852_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_173_cast_fp16 = mul(x = var_7851_cast_fp16, y = var_7852_to_fp16)[name = tensor("aw_173_cast_fp16")]; tensor var_7855_equation_0 = const()[name = tensor("op_7855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_7855_cast_fp16 = einsum(equation = var_7855_equation_0, values = (var_7793_cast_fp16, var_7758_cast_fp16))[name = tensor("op_7855_cast_fp16")]; tensor var_7856_to_fp16 = const()[name = tensor("op_7856_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_175_cast_fp16 = mul(x = var_7855_cast_fp16, y = var_7856_to_fp16)[name = tensor("aw_175_cast_fp16")]; tensor var_7858_cast_fp16 = softmax(axis = var_6522, x = aw_161_cast_fp16)[name = tensor("op_7858_cast_fp16")]; tensor var_7859_cast_fp16 = softmax(axis = var_6522, x = aw_163_cast_fp16)[name = tensor("op_7859_cast_fp16")]; tensor var_7860_cast_fp16 = softmax(axis = var_6522, x = aw_165_cast_fp16)[name = tensor("op_7860_cast_fp16")]; tensor var_7861_cast_fp16 = softmax(axis = var_6522, x = aw_167_cast_fp16)[name = tensor("op_7861_cast_fp16")]; tensor var_7862_cast_fp16 = softmax(axis = var_6522, x = aw_169_cast_fp16)[name = tensor("op_7862_cast_fp16")]; tensor var_7863_cast_fp16 = softmax(axis = var_6522, x = aw_171_cast_fp16)[name = tensor("op_7863_cast_fp16")]; tensor var_7864_cast_fp16 = softmax(axis = var_6522, x = aw_173_cast_fp16)[name = tensor("op_7864_cast_fp16")]; tensor var_7865_cast_fp16 = softmax(axis = var_6522, x = aw_175_cast_fp16)[name = tensor("op_7865_cast_fp16")]; tensor var_7867_equation_0 = const()[name = tensor("op_7867_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7867_cast_fp16 = einsum(equation = var_7867_equation_0, values = (var_7795_cast_fp16, var_7858_cast_fp16))[name = tensor("op_7867_cast_fp16")]; tensor var_7869_equation_0 = const()[name = tensor("op_7869_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7869_cast_fp16 = einsum(equation = var_7869_equation_0, values = (var_7799_cast_fp16, var_7859_cast_fp16))[name = tensor("op_7869_cast_fp16")]; tensor var_7871_equation_0 = const()[name = tensor("op_7871_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7871_cast_fp16 = einsum(equation = var_7871_equation_0, values = (var_7803_cast_fp16, var_7860_cast_fp16))[name = tensor("op_7871_cast_fp16")]; tensor var_7873_equation_0 = const()[name = tensor("op_7873_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7873_cast_fp16 = einsum(equation = var_7873_equation_0, values = (var_7807_cast_fp16, var_7861_cast_fp16))[name = tensor("op_7873_cast_fp16")]; tensor var_7875_equation_0 = const()[name = tensor("op_7875_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7875_cast_fp16 = einsum(equation = var_7875_equation_0, values = (var_7811_cast_fp16, var_7862_cast_fp16))[name = tensor("op_7875_cast_fp16")]; tensor var_7877_equation_0 = const()[name = tensor("op_7877_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7877_cast_fp16 = einsum(equation = var_7877_equation_0, values = (var_7815_cast_fp16, var_7863_cast_fp16))[name = tensor("op_7877_cast_fp16")]; tensor var_7879_equation_0 = const()[name = tensor("op_7879_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7879_cast_fp16 = einsum(equation = var_7879_equation_0, values = (var_7819_cast_fp16, var_7864_cast_fp16))[name = tensor("op_7879_cast_fp16")]; tensor var_7881_equation_0 = const()[name = tensor("op_7881_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7881_cast_fp16 = einsum(equation = var_7881_equation_0, values = (var_7823_cast_fp16, var_7865_cast_fp16))[name = tensor("op_7881_cast_fp16")]; tensor input_353_interleave_0 = const()[name = tensor("input_353_interleave_0"), val = tensor(false)]; tensor input_353_cast_fp16 = concat(axis = var_6522, interleave = input_353_interleave_0, values = (var_7867_cast_fp16, var_7869_cast_fp16, var_7871_cast_fp16, var_7873_cast_fp16, var_7875_cast_fp16, var_7877_cast_fp16, var_7879_cast_fp16, var_7881_cast_fp16))[name = tensor("input_353_cast_fp16")]; tensor var_7887 = const()[name = tensor("op_7887"), val = tensor([1, 1])]; tensor var_7889 = const()[name = tensor("op_7889"), val = tensor([1, 1])]; tensor var_7891_pad_type_0 = const()[name = tensor("op_7891_pad_type_0"), val = tensor("custom")]; tensor var_7891_pad_0 = const()[name = tensor("op_7891_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1452730240)))]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1456007104)))]; tensor var_7891_cast_fp16 = conv(bias = up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_7889, groups = var_6522, pad = var_7891_pad_0, pad_type = var_7891_pad_type_0, strides = var_7887, weight = up_blocks_1_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_353_cast_fp16)[name = tensor("op_7891_cast_fp16")]; tensor inputs_57_cast_fp16 = add(x = var_7891_cast_fp16, y = inputs_55_cast_fp16)[name = tensor("inputs_57_cast_fp16")]; tensor hidden_states_209_axes_0 = const()[name = tensor("hidden_states_209_axes_0"), val = tensor([1])]; tensor hidden_states_209_gamma_0_to_fp16 = const()[name = tensor("hidden_states_209_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1456009728)))]; tensor hidden_states_209_beta_0_to_fp16 = const()[name = tensor("hidden_states_209_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1456012352)))]; tensor var_7901_to_fp16 = const()[name = tensor("op_7901_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_209_cast_fp16 = layer_norm(axes = hidden_states_209_axes_0, beta = hidden_states_209_beta_0_to_fp16, epsilon = var_7901_to_fp16, gamma = hidden_states_209_gamma_0_to_fp16, x = inputs_57_cast_fp16)[name = tensor("hidden_states_209_cast_fp16")]; tensor var_7916 = const()[name = tensor("op_7916"), val = tensor([1, 1])]; tensor var_7918 = const()[name = tensor("op_7918"), val = tensor([1, 1])]; tensor q_39_pad_type_0 = const()[name = tensor("q_39_pad_type_0"), val = tensor("custom")]; tensor q_39_pad_0 = const()[name = tensor("q_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1456014976)))]; tensor q_39_cast_fp16 = conv(dilations = var_7918, groups = var_6522, pad = q_39_pad_0, pad_type = q_39_pad_type_0, strides = var_7916, weight = up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_209_cast_fp16)[name = tensor("q_39_cast_fp16")]; tensor var_7922 = const()[name = tensor("op_7922"), val = tensor([1, 1])]; tensor var_7924 = const()[name = tensor("op_7924"), val = tensor([1, 1])]; tensor k_77_pad_type_0 = const()[name = tensor("k_77_pad_type_0"), val = tensor("custom")]; tensor k_77_pad_0 = const()[name = tensor("k_77_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1459291840)))]; tensor k_77_cast_fp16 = conv(dilations = var_7924, groups = var_6522, pad = k_77_pad_0, pad_type = k_77_pad_type_0, strides = var_7922, weight = up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_77_cast_fp16")]; tensor var_7928 = const()[name = tensor("op_7928"), val = tensor([1, 1])]; tensor var_7930 = const()[name = tensor("op_7930"), val = tensor([1, 1])]; tensor v_39_pad_type_0 = const()[name = tensor("v_39_pad_type_0"), val = tensor("custom")]; tensor v_39_pad_0 = const()[name = tensor("v_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1461257984)))]; tensor v_39_cast_fp16 = conv(dilations = var_7930, groups = var_6522, pad = v_39_pad_0, pad_type = v_39_pad_type_0, strides = var_7928, weight = up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_39_cast_fp16")]; tensor var_7934_begin_0 = const()[name = tensor("op_7934_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7934_end_0 = const()[name = tensor("op_7934_end_0"), val = tensor([2, 160, 1, 256])]; tensor var_7934_end_mask_0 = const()[name = tensor("op_7934_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7934_cast_fp16 = slice_by_index(begin = var_7934_begin_0, end = var_7934_end_0, end_mask = var_7934_end_mask_0, x = q_39_cast_fp16)[name = tensor("op_7934_cast_fp16")]; tensor var_7938_begin_0 = const()[name = tensor("op_7938_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_7938_end_0 = const()[name = tensor("op_7938_end_0"), val = tensor([2, 320, 1, 256])]; tensor var_7938_end_mask_0 = const()[name = tensor("op_7938_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7938_cast_fp16 = slice_by_index(begin = var_7938_begin_0, end = var_7938_end_0, end_mask = var_7938_end_mask_0, x = q_39_cast_fp16)[name = tensor("op_7938_cast_fp16")]; tensor var_7942_begin_0 = const()[name = tensor("op_7942_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7942_end_0 = const()[name = tensor("op_7942_end_0"), val = tensor([2, 480, 1, 256])]; tensor var_7942_end_mask_0 = const()[name = tensor("op_7942_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7942_cast_fp16 = slice_by_index(begin = var_7942_begin_0, end = var_7942_end_0, end_mask = var_7942_end_mask_0, x = q_39_cast_fp16)[name = tensor("op_7942_cast_fp16")]; tensor var_7946_begin_0 = const()[name = tensor("op_7946_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_7946_end_0 = const()[name = tensor("op_7946_end_0"), val = tensor([2, 640, 1, 256])]; tensor var_7946_end_mask_0 = const()[name = tensor("op_7946_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7946_cast_fp16 = slice_by_index(begin = var_7946_begin_0, end = var_7946_end_0, end_mask = var_7946_end_mask_0, x = q_39_cast_fp16)[name = tensor("op_7946_cast_fp16")]; tensor var_7950_begin_0 = const()[name = tensor("op_7950_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7950_end_0 = const()[name = tensor("op_7950_end_0"), val = tensor([2, 800, 1, 256])]; tensor var_7950_end_mask_0 = const()[name = tensor("op_7950_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7950_cast_fp16 = slice_by_index(begin = var_7950_begin_0, end = var_7950_end_0, end_mask = var_7950_end_mask_0, x = q_39_cast_fp16)[name = tensor("op_7950_cast_fp16")]; tensor var_7954_begin_0 = const()[name = tensor("op_7954_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_7954_end_0 = const()[name = tensor("op_7954_end_0"), val = tensor([2, 960, 1, 256])]; tensor var_7954_end_mask_0 = const()[name = tensor("op_7954_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7954_cast_fp16 = slice_by_index(begin = var_7954_begin_0, end = var_7954_end_0, end_mask = var_7954_end_mask_0, x = q_39_cast_fp16)[name = tensor("op_7954_cast_fp16")]; tensor var_7958_begin_0 = const()[name = tensor("op_7958_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_7958_end_0 = const()[name = tensor("op_7958_end_0"), val = tensor([2, 1120, 1, 256])]; tensor var_7958_end_mask_0 = const()[name = tensor("op_7958_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7958_cast_fp16 = slice_by_index(begin = var_7958_begin_0, end = var_7958_end_0, end_mask = var_7958_end_mask_0, x = q_39_cast_fp16)[name = tensor("op_7958_cast_fp16")]; tensor var_7962_begin_0 = const()[name = tensor("op_7962_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_7962_end_0 = const()[name = tensor("op_7962_end_0"), val = tensor([2, 1280, 1, 256])]; tensor var_7962_end_mask_0 = const()[name = tensor("op_7962_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7962_cast_fp16 = slice_by_index(begin = var_7962_begin_0, end = var_7962_end_0, end_mask = var_7962_end_mask_0, x = q_39_cast_fp16)[name = tensor("op_7962_cast_fp16")]; tensor k_79_perm_0 = const()[name = tensor("k_79_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_7969_begin_0 = const()[name = tensor("op_7969_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7969_end_0 = const()[name = tensor("op_7969_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_7969_end_mask_0 = const()[name = tensor("op_7969_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_12 = transpose(perm = k_79_perm_0, x = k_77_cast_fp16)[name = tensor("transpose_12")]; tensor var_7969_cast_fp16 = slice_by_index(begin = var_7969_begin_0, end = var_7969_end_0, end_mask = var_7969_end_mask_0, x = transpose_12)[name = tensor("op_7969_cast_fp16")]; tensor var_7973_begin_0 = const()[name = tensor("op_7973_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_7973_end_0 = const()[name = tensor("op_7973_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_7973_end_mask_0 = const()[name = tensor("op_7973_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7973_cast_fp16 = slice_by_index(begin = var_7973_begin_0, end = var_7973_end_0, end_mask = var_7973_end_mask_0, x = transpose_12)[name = tensor("op_7973_cast_fp16")]; tensor var_7977_begin_0 = const()[name = tensor("op_7977_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_7977_end_0 = const()[name = tensor("op_7977_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_7977_end_mask_0 = const()[name = tensor("op_7977_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7977_cast_fp16 = slice_by_index(begin = var_7977_begin_0, end = var_7977_end_0, end_mask = var_7977_end_mask_0, x = transpose_12)[name = tensor("op_7977_cast_fp16")]; tensor var_7981_begin_0 = const()[name = tensor("op_7981_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_7981_end_0 = const()[name = tensor("op_7981_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_7981_end_mask_0 = const()[name = tensor("op_7981_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7981_cast_fp16 = slice_by_index(begin = var_7981_begin_0, end = var_7981_end_0, end_mask = var_7981_end_mask_0, x = transpose_12)[name = tensor("op_7981_cast_fp16")]; tensor var_7985_begin_0 = const()[name = tensor("op_7985_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_7985_end_0 = const()[name = tensor("op_7985_end_0"), val = tensor([2, 77, 1, 800])]; tensor var_7985_end_mask_0 = const()[name = tensor("op_7985_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7985_cast_fp16 = slice_by_index(begin = var_7985_begin_0, end = var_7985_end_0, end_mask = var_7985_end_mask_0, x = transpose_12)[name = tensor("op_7985_cast_fp16")]; tensor var_7989_begin_0 = const()[name = tensor("op_7989_begin_0"), val = tensor([0, 0, 0, 800])]; tensor var_7989_end_0 = const()[name = tensor("op_7989_end_0"), val = tensor([2, 77, 1, 960])]; tensor var_7989_end_mask_0 = const()[name = tensor("op_7989_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7989_cast_fp16 = slice_by_index(begin = var_7989_begin_0, end = var_7989_end_0, end_mask = var_7989_end_mask_0, x = transpose_12)[name = tensor("op_7989_cast_fp16")]; tensor var_7993_begin_0 = const()[name = tensor("op_7993_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_7993_end_0 = const()[name = tensor("op_7993_end_0"), val = tensor([2, 77, 1, 1120])]; tensor var_7993_end_mask_0 = const()[name = tensor("op_7993_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7993_cast_fp16 = slice_by_index(begin = var_7993_begin_0, end = var_7993_end_0, end_mask = var_7993_end_mask_0, x = transpose_12)[name = tensor("op_7993_cast_fp16")]; tensor var_7997_begin_0 = const()[name = tensor("op_7997_begin_0"), val = tensor([0, 0, 0, 1120])]; tensor var_7997_end_0 = const()[name = tensor("op_7997_end_0"), val = tensor([2, 77, 1, 1280])]; tensor var_7997_end_mask_0 = const()[name = tensor("op_7997_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7997_cast_fp16 = slice_by_index(begin = var_7997_begin_0, end = var_7997_end_0, end_mask = var_7997_end_mask_0, x = transpose_12)[name = tensor("op_7997_cast_fp16")]; tensor var_7999_begin_0 = const()[name = tensor("op_7999_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7999_end_0 = const()[name = tensor("op_7999_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_7999_end_mask_0 = const()[name = tensor("op_7999_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7999_cast_fp16 = slice_by_index(begin = var_7999_begin_0, end = var_7999_end_0, end_mask = var_7999_end_mask_0, x = v_39_cast_fp16)[name = tensor("op_7999_cast_fp16")]; tensor var_8003_begin_0 = const()[name = tensor("op_8003_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_8003_end_0 = const()[name = tensor("op_8003_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_8003_end_mask_0 = const()[name = tensor("op_8003_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8003_cast_fp16 = slice_by_index(begin = var_8003_begin_0, end = var_8003_end_0, end_mask = var_8003_end_mask_0, x = v_39_cast_fp16)[name = tensor("op_8003_cast_fp16")]; tensor var_8007_begin_0 = const()[name = tensor("op_8007_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_8007_end_0 = const()[name = tensor("op_8007_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_8007_end_mask_0 = const()[name = tensor("op_8007_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8007_cast_fp16 = slice_by_index(begin = var_8007_begin_0, end = var_8007_end_0, end_mask = var_8007_end_mask_0, x = v_39_cast_fp16)[name = tensor("op_8007_cast_fp16")]; tensor var_8011_begin_0 = const()[name = tensor("op_8011_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_8011_end_0 = const()[name = tensor("op_8011_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_8011_end_mask_0 = const()[name = tensor("op_8011_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8011_cast_fp16 = slice_by_index(begin = var_8011_begin_0, end = var_8011_end_0, end_mask = var_8011_end_mask_0, x = v_39_cast_fp16)[name = tensor("op_8011_cast_fp16")]; tensor var_8015_begin_0 = const()[name = tensor("op_8015_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_8015_end_0 = const()[name = tensor("op_8015_end_0"), val = tensor([2, 800, 1, 77])]; tensor var_8015_end_mask_0 = const()[name = tensor("op_8015_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8015_cast_fp16 = slice_by_index(begin = var_8015_begin_0, end = var_8015_end_0, end_mask = var_8015_end_mask_0, x = v_39_cast_fp16)[name = tensor("op_8015_cast_fp16")]; tensor var_8019_begin_0 = const()[name = tensor("op_8019_begin_0"), val = tensor([0, 800, 0, 0])]; tensor var_8019_end_0 = const()[name = tensor("op_8019_end_0"), val = tensor([2, 960, 1, 77])]; tensor var_8019_end_mask_0 = const()[name = tensor("op_8019_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8019_cast_fp16 = slice_by_index(begin = var_8019_begin_0, end = var_8019_end_0, end_mask = var_8019_end_mask_0, x = v_39_cast_fp16)[name = tensor("op_8019_cast_fp16")]; tensor var_8023_begin_0 = const()[name = tensor("op_8023_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_8023_end_0 = const()[name = tensor("op_8023_end_0"), val = tensor([2, 1120, 1, 77])]; tensor var_8023_end_mask_0 = const()[name = tensor("op_8023_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8023_cast_fp16 = slice_by_index(begin = var_8023_begin_0, end = var_8023_end_0, end_mask = var_8023_end_mask_0, x = v_39_cast_fp16)[name = tensor("op_8023_cast_fp16")]; tensor var_8027_begin_0 = const()[name = tensor("op_8027_begin_0"), val = tensor([0, 1120, 0, 0])]; tensor var_8027_end_0 = const()[name = tensor("op_8027_end_0"), val = tensor([2, 1280, 1, 77])]; tensor var_8027_end_mask_0 = const()[name = tensor("op_8027_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8027_cast_fp16 = slice_by_index(begin = var_8027_begin_0, end = var_8027_end_0, end_mask = var_8027_end_mask_0, x = v_39_cast_fp16)[name = tensor("op_8027_cast_fp16")]; tensor var_8031_equation_0 = const()[name = tensor("op_8031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8031_cast_fp16 = einsum(equation = var_8031_equation_0, values = (var_7969_cast_fp16, var_7934_cast_fp16))[name = tensor("op_8031_cast_fp16")]; tensor var_8032_to_fp16 = const()[name = tensor("op_8032_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_177_cast_fp16 = mul(x = var_8031_cast_fp16, y = var_8032_to_fp16)[name = tensor("aw_177_cast_fp16")]; tensor var_8035_equation_0 = const()[name = tensor("op_8035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8035_cast_fp16 = einsum(equation = var_8035_equation_0, values = (var_7973_cast_fp16, var_7938_cast_fp16))[name = tensor("op_8035_cast_fp16")]; tensor var_8036_to_fp16 = const()[name = tensor("op_8036_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_179_cast_fp16 = mul(x = var_8035_cast_fp16, y = var_8036_to_fp16)[name = tensor("aw_179_cast_fp16")]; tensor var_8039_equation_0 = const()[name = tensor("op_8039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8039_cast_fp16 = einsum(equation = var_8039_equation_0, values = (var_7977_cast_fp16, var_7942_cast_fp16))[name = tensor("op_8039_cast_fp16")]; tensor var_8040_to_fp16 = const()[name = tensor("op_8040_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_181_cast_fp16 = mul(x = var_8039_cast_fp16, y = var_8040_to_fp16)[name = tensor("aw_181_cast_fp16")]; tensor var_8043_equation_0 = const()[name = tensor("op_8043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8043_cast_fp16 = einsum(equation = var_8043_equation_0, values = (var_7981_cast_fp16, var_7946_cast_fp16))[name = tensor("op_8043_cast_fp16")]; tensor var_8044_to_fp16 = const()[name = tensor("op_8044_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_183_cast_fp16 = mul(x = var_8043_cast_fp16, y = var_8044_to_fp16)[name = tensor("aw_183_cast_fp16")]; tensor var_8047_equation_0 = const()[name = tensor("op_8047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8047_cast_fp16 = einsum(equation = var_8047_equation_0, values = (var_7985_cast_fp16, var_7950_cast_fp16))[name = tensor("op_8047_cast_fp16")]; tensor var_8048_to_fp16 = const()[name = tensor("op_8048_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_185_cast_fp16 = mul(x = var_8047_cast_fp16, y = var_8048_to_fp16)[name = tensor("aw_185_cast_fp16")]; tensor var_8051_equation_0 = const()[name = tensor("op_8051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8051_cast_fp16 = einsum(equation = var_8051_equation_0, values = (var_7989_cast_fp16, var_7954_cast_fp16))[name = tensor("op_8051_cast_fp16")]; tensor var_8052_to_fp16 = const()[name = tensor("op_8052_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_187_cast_fp16 = mul(x = var_8051_cast_fp16, y = var_8052_to_fp16)[name = tensor("aw_187_cast_fp16")]; tensor var_8055_equation_0 = const()[name = tensor("op_8055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8055_cast_fp16 = einsum(equation = var_8055_equation_0, values = (var_7993_cast_fp16, var_7958_cast_fp16))[name = tensor("op_8055_cast_fp16")]; tensor var_8056_to_fp16 = const()[name = tensor("op_8056_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_189_cast_fp16 = mul(x = var_8055_cast_fp16, y = var_8056_to_fp16)[name = tensor("aw_189_cast_fp16")]; tensor var_8059_equation_0 = const()[name = tensor("op_8059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8059_cast_fp16 = einsum(equation = var_8059_equation_0, values = (var_7997_cast_fp16, var_7962_cast_fp16))[name = tensor("op_8059_cast_fp16")]; tensor var_8060_to_fp16 = const()[name = tensor("op_8060_to_fp16"), val = tensor(0x1.43cp-4)]; tensor aw_cast_fp16 = mul(x = var_8059_cast_fp16, y = var_8060_to_fp16)[name = tensor("aw_cast_fp16")]; tensor var_8062_cast_fp16 = softmax(axis = var_6522, x = aw_177_cast_fp16)[name = tensor("op_8062_cast_fp16")]; tensor var_8063_cast_fp16 = softmax(axis = var_6522, x = aw_179_cast_fp16)[name = tensor("op_8063_cast_fp16")]; tensor var_8064_cast_fp16 = softmax(axis = var_6522, x = aw_181_cast_fp16)[name = tensor("op_8064_cast_fp16")]; tensor var_8065_cast_fp16 = softmax(axis = var_6522, x = aw_183_cast_fp16)[name = tensor("op_8065_cast_fp16")]; tensor var_8066_cast_fp16 = softmax(axis = var_6522, x = aw_185_cast_fp16)[name = tensor("op_8066_cast_fp16")]; tensor var_8067_cast_fp16 = softmax(axis = var_6522, x = aw_187_cast_fp16)[name = tensor("op_8067_cast_fp16")]; tensor var_8068_cast_fp16 = softmax(axis = var_6522, x = aw_189_cast_fp16)[name = tensor("op_8068_cast_fp16")]; tensor var_8069_cast_fp16 = softmax(axis = var_6522, x = aw_cast_fp16)[name = tensor("op_8069_cast_fp16")]; tensor var_8071_equation_0 = const()[name = tensor("op_8071_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8071_cast_fp16 = einsum(equation = var_8071_equation_0, values = (var_7999_cast_fp16, var_8062_cast_fp16))[name = tensor("op_8071_cast_fp16")]; tensor var_8073_equation_0 = const()[name = tensor("op_8073_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8073_cast_fp16 = einsum(equation = var_8073_equation_0, values = (var_8003_cast_fp16, var_8063_cast_fp16))[name = tensor("op_8073_cast_fp16")]; tensor var_8075_equation_0 = const()[name = tensor("op_8075_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8075_cast_fp16 = einsum(equation = var_8075_equation_0, values = (var_8007_cast_fp16, var_8064_cast_fp16))[name = tensor("op_8075_cast_fp16")]; tensor var_8077_equation_0 = const()[name = tensor("op_8077_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8077_cast_fp16 = einsum(equation = var_8077_equation_0, values = (var_8011_cast_fp16, var_8065_cast_fp16))[name = tensor("op_8077_cast_fp16")]; tensor var_8079_equation_0 = const()[name = tensor("op_8079_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8079_cast_fp16 = einsum(equation = var_8079_equation_0, values = (var_8015_cast_fp16, var_8066_cast_fp16))[name = tensor("op_8079_cast_fp16")]; tensor var_8081_equation_0 = const()[name = tensor("op_8081_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8081_cast_fp16 = einsum(equation = var_8081_equation_0, values = (var_8019_cast_fp16, var_8067_cast_fp16))[name = tensor("op_8081_cast_fp16")]; tensor var_8083_equation_0 = const()[name = tensor("op_8083_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8083_cast_fp16 = einsum(equation = var_8083_equation_0, values = (var_8023_cast_fp16, var_8068_cast_fp16))[name = tensor("op_8083_cast_fp16")]; tensor var_8085_equation_0 = const()[name = tensor("op_8085_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8085_cast_fp16 = einsum(equation = var_8085_equation_0, values = (var_8027_cast_fp16, var_8069_cast_fp16))[name = tensor("op_8085_cast_fp16")]; tensor input_355_interleave_0 = const()[name = tensor("input_355_interleave_0"), val = tensor(false)]; tensor input_355_cast_fp16 = concat(axis = var_6522, interleave = input_355_interleave_0, values = (var_8071_cast_fp16, var_8073_cast_fp16, var_8075_cast_fp16, var_8077_cast_fp16, var_8079_cast_fp16, var_8081_cast_fp16, var_8083_cast_fp16, var_8085_cast_fp16))[name = tensor("input_355_cast_fp16")]; tensor var_8091 = const()[name = tensor("op_8091"), val = tensor([1, 1])]; tensor var_8093 = const()[name = tensor("op_8093"), val = tensor([1, 1])]; tensor var_8095_pad_type_0 = const()[name = tensor("op_8095_pad_type_0"), val = tensor("custom")]; tensor var_8095_pad_0 = const()[name = tensor("op_8095_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1463224128)))]; tensor up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1466500992)))]; tensor var_8095_cast_fp16 = conv(bias = up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_8093, groups = var_6522, pad = var_8095_pad_0, pad_type = var_8095_pad_type_0, strides = var_8091, weight = up_blocks_1_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_355_cast_fp16)[name = tensor("op_8095_cast_fp16")]; tensor inputs_59_cast_fp16 = add(x = var_8095_cast_fp16, y = inputs_57_cast_fp16)[name = tensor("inputs_59_cast_fp16")]; tensor input_357_axes_0 = const()[name = tensor("input_357_axes_0"), val = tensor([1])]; tensor input_357_gamma_0_to_fp16 = const()[name = tensor("input_357_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1466503616)))]; tensor input_357_beta_0_to_fp16 = const()[name = tensor("input_357_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1466506240)))]; tensor var_8105_to_fp16 = const()[name = tensor("op_8105_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_357_cast_fp16 = layer_norm(axes = input_357_axes_0, beta = input_357_beta_0_to_fp16, epsilon = var_8105_to_fp16, gamma = input_357_gamma_0_to_fp16, x = inputs_59_cast_fp16)[name = tensor("input_357_cast_fp16")]; tensor var_8121 = const()[name = tensor("op_8121"), val = tensor([1, 1])]; tensor var_8123 = const()[name = tensor("op_8123"), val = tensor([1, 1])]; tensor var_8125_pad_type_0 = const()[name = tensor("op_8125_pad_type_0"), val = tensor("custom")]; tensor var_8125_pad_0 = const()[name = tensor("op_8125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1466508864)))]; tensor up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1492723328)))]; tensor var_8125_cast_fp16 = conv(bias = up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_8123, groups = var_6522, pad = var_8125_pad_0, pad_type = var_8125_pad_type_0, strides = var_8121, weight = up_blocks_1_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_357_cast_fp16)[name = tensor("op_8125_cast_fp16")]; tensor var_8126_split_sizes_0 = const()[name = tensor("op_8126_split_sizes_0"), val = tensor([5120, 5120])]; tensor var_8126_axis_0 = const()[name = tensor("op_8126_axis_0"), val = tensor(1)]; tensor var_8126_cast_fp16_0, tensor var_8126_cast_fp16_1 = split(axis = var_8126_axis_0, split_sizes = var_8126_split_sizes_0, x = var_8125_cast_fp16)[name = tensor("op_8126_cast_fp16")]; tensor var_8128_mode_0 = const()[name = tensor("op_8128_mode_0"), val = tensor("EXACT")]; tensor var_8128_cast_fp16 = gelu(mode = var_8128_mode_0, x = var_8126_cast_fp16_1)[name = tensor("op_8128_cast_fp16")]; tensor input_359_cast_fp16 = mul(x = var_8126_cast_fp16_0, y = var_8128_cast_fp16)[name = tensor("input_359_cast_fp16")]; tensor var_8132 = const()[name = tensor("op_8132"), val = tensor([1, 1])]; tensor var_8134 = const()[name = tensor("op_8134"), val = tensor([1, 1])]; tensor var_8136_pad_type_0 = const()[name = tensor("op_8136_pad_type_0"), val = tensor("custom")]; tensor var_8136_pad_0 = const()[name = tensor("op_8136_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1492743872)))]; tensor up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1505851136)))]; tensor var_8136_cast_fp16 = conv(bias = up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_8134, groups = var_6522, pad = var_8136_pad_0, pad_type = var_8136_pad_type_0, strides = var_8132, weight = up_blocks_1_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_359_cast_fp16)[name = tensor("op_8136_cast_fp16")]; tensor hidden_states_213_cast_fp16 = add(x = var_8136_cast_fp16, y = inputs_59_cast_fp16)[name = tensor("hidden_states_213_cast_fp16")]; tensor var_8138 = const()[name = tensor("op_8138"), val = tensor([2, 1280, 16, 16])]; tensor input_361_cast_fp16 = reshape(shape = var_8138, x = hidden_states_213_cast_fp16)[name = tensor("input_361_cast_fp16")]; tensor var_8142 = const()[name = tensor("op_8142"), val = tensor([1, 1])]; tensor var_8144 = const()[name = tensor("op_8144"), val = tensor([1, 1])]; tensor hidden_states_215_pad_type_0 = const()[name = tensor("hidden_states_215_pad_type_0"), val = tensor("custom")]; tensor hidden_states_215_pad_0 = const()[name = tensor("hidden_states_215_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_1_attentions_2_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1505853760)))]; tensor up_blocks_1_attentions_2_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_1_attentions_2_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1509130624)))]; tensor hidden_states_215_cast_fp16 = conv(bias = up_blocks_1_attentions_2_proj_out_bias_to_fp16, dilations = var_8144, groups = var_6522, pad = hidden_states_215_pad_0, pad_type = hidden_states_215_pad_type_0, strides = var_8142, weight = up_blocks_1_attentions_2_proj_out_weight_to_fp16, x = input_361_cast_fp16)[name = tensor("hidden_states_215_cast_fp16")]; tensor input_363_cast_fp16 = add(x = hidden_states_215_cast_fp16, y = hidden_states_203_cast_fp16)[name = tensor("input_363_cast_fp16")]; tensor input_365_scale_factor_height_0 = const()[name = tensor("input_365_scale_factor_height_0"), val = tensor(0x1p+1)]; tensor input_365_scale_factor_width_0 = const()[name = tensor("input_365_scale_factor_width_0"), val = tensor(0x1p+1)]; tensor input_365_cast_fp16 = upsample_nearest_neighbor(scale_factor_height = input_365_scale_factor_height_0, scale_factor_width = input_365_scale_factor_width_0, x = input_363_cast_fp16)[name = tensor("input_365_cast_fp16")]; tensor var_8153 = const()[name = tensor("op_8153"), val = tensor([1, 1])]; tensor var_8155 = const()[name = tensor("op_8155"), val = tensor([1, 1])]; tensor hidden_states_217_pad_type_0 = const()[name = tensor("hidden_states_217_pad_type_0"), val = tensor("custom")]; tensor hidden_states_217_pad_0 = const()[name = tensor("hidden_states_217_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_1_upsamplers_0_conv_weight_to_fp16 = const()[name = tensor("up_blocks_1_upsamplers_0_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1509133248)))]; tensor up_blocks_1_upsamplers_0_conv_bias_to_fp16 = const()[name = tensor("up_blocks_1_upsamplers_0_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1538624512)))]; tensor hidden_states_217_cast_fp16 = conv(bias = up_blocks_1_upsamplers_0_conv_bias_to_fp16, dilations = var_8155, groups = var_6522, pad = hidden_states_217_pad_0, pad_type = hidden_states_217_pad_type_0, strides = var_8153, weight = up_blocks_1_upsamplers_0_conv_weight_to_fp16, x = input_365_cast_fp16)[name = tensor("hidden_states_217_cast_fp16")]; tensor var_8160 = const()[name = tensor("op_8160"), val = tensor(3)]; tensor var_8182 = const()[name = tensor("op_8182"), val = tensor(1)]; tensor input_367_interleave_0 = const()[name = tensor("input_367_interleave_0"), val = tensor(false)]; tensor input_367_cast_fp16 = concat(axis = var_8182, interleave = input_367_interleave_0, values = (hidden_states_217_cast_fp16, input_115_cast_fp16))[name = tensor("input_367_cast_fp16")]; tensor reshape_168_shape_0 = const()[name = tensor("reshape_168_shape_0"), val = tensor([2, 32, 60, 32, 32])]; tensor reshape_168_cast_fp16 = reshape(shape = reshape_168_shape_0, x = input_367_cast_fp16)[name = tensor("reshape_168_cast_fp16")]; tensor reduce_mean_126_axes_0 = const()[name = tensor("reduce_mean_126_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_126_keep_dims_0 = const()[name = tensor("reduce_mean_126_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_126_cast_fp16 = reduce_mean(axes = reduce_mean_126_axes_0, keep_dims = reduce_mean_126_keep_dims_0, x = reshape_168_cast_fp16)[name = tensor("reduce_mean_126_cast_fp16")]; tensor sub_84_cast_fp16 = sub(x = reshape_168_cast_fp16, y = reduce_mean_126_cast_fp16)[name = tensor("sub_84_cast_fp16")]; tensor square_42_cast_fp16 = square(x = sub_84_cast_fp16)[name = tensor("square_42_cast_fp16")]; tensor reduce_mean_128_axes_0 = const()[name = tensor("reduce_mean_128_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_128_keep_dims_0 = const()[name = tensor("reduce_mean_128_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_128_cast_fp16 = reduce_mean(axes = reduce_mean_128_axes_0, keep_dims = reduce_mean_128_keep_dims_0, x = square_42_cast_fp16)[name = tensor("reduce_mean_128_cast_fp16")]; tensor add_84_y_0_to_fp16 = const()[name = tensor("add_84_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_84_cast_fp16 = add(x = reduce_mean_128_cast_fp16, y = add_84_y_0_to_fp16)[name = tensor("add_84_cast_fp16")]; tensor sqrt_42_cast_fp16 = sqrt(x = add_84_cast_fp16)[name = tensor("sqrt_42_cast_fp16")]; tensor real_div_42_cast_fp16 = real_div(x = sub_84_cast_fp16, y = sqrt_42_cast_fp16)[name = tensor("real_div_42_cast_fp16")]; tensor reshape_169_shape_0 = const()[name = tensor("reshape_169_shape_0"), val = tensor([2, 1920, 32, 32])]; tensor reshape_169_cast_fp16 = reshape(shape = reshape_169_shape_0, x = real_div_42_cast_fp16)[name = tensor("reshape_169_cast_fp16")]; tensor add_85_gamma_0_to_fp16 = const()[name = tensor("add_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1538627136)))]; tensor add_85_beta_0_to_fp16 = const()[name = tensor("add_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1538631040)))]; tensor add_85_epsilon_0_to_fp16 = const()[name = tensor("add_85_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_85_cast_fp16 = batch_norm(beta = add_85_beta_0_to_fp16, epsilon = add_85_epsilon_0_to_fp16, gamma = add_85_gamma_0_to_fp16, mean = add_79_mean_0_to_fp16, variance = add_79_variance_0_to_fp16, x = reshape_169_cast_fp16)[name = tensor("add_85_cast_fp16")]; tensor input_371_cast_fp16 = silu(x = add_85_cast_fp16)[name = tensor("input_371_cast_fp16")]; tensor var_8211 = const()[name = tensor("op_8211"), val = tensor([1, 1])]; tensor var_8213 = const()[name = tensor("op_8213"), val = tensor([1, 1])]; tensor hidden_states_219_pad_type_0 = const()[name = tensor("hidden_states_219_pad_type_0"), val = tensor("custom")]; tensor hidden_states_219_pad_0 = const()[name = tensor("hidden_states_219_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_2_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1538634944)))]; tensor up_blocks_2_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1560753408)))]; tensor hidden_states_219_cast_fp16 = conv(bias = up_blocks_2_resnets_0_conv1_bias_to_fp16, dilations = var_8213, groups = var_8182, pad = hidden_states_219_pad_0, pad_type = hidden_states_219_pad_type_0, strides = var_8211, weight = up_blocks_2_resnets_0_conv1_weight_to_fp16, x = input_371_cast_fp16)[name = tensor("hidden_states_219_cast_fp16")]; tensor var_8219 = const()[name = tensor("op_8219"), val = tensor([1, 1])]; tensor var_8221 = const()[name = tensor("op_8221"), val = tensor([1, 1])]; tensor temb_33_pad_type_0 = const()[name = tensor("temb_33_pad_type_0"), val = tensor("custom")]; tensor temb_33_pad_0 = const()[name = tensor("temb_33_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1560754752)))]; tensor up_blocks_2_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1562393216)))]; tensor temb_33_cast_fp16 = conv(bias = up_blocks_2_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_8221, groups = var_8182, pad = temb_33_pad_0, pad_type = temb_33_pad_type_0, strides = var_8219, weight = up_blocks_2_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_33_cast_fp16")]; tensor input_375_cast_fp16 = add(x = hidden_states_219_cast_fp16, y = temb_33_cast_fp16)[name = tensor("input_375_cast_fp16")]; tensor reshape_172_shape_0 = const()[name = tensor("reshape_172_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_172_cast_fp16 = reshape(shape = reshape_172_shape_0, x = input_375_cast_fp16)[name = tensor("reshape_172_cast_fp16")]; tensor reduce_mean_129_axes_0 = const()[name = tensor("reduce_mean_129_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_129_keep_dims_0 = const()[name = tensor("reduce_mean_129_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_129_cast_fp16 = reduce_mean(axes = reduce_mean_129_axes_0, keep_dims = reduce_mean_129_keep_dims_0, x = reshape_172_cast_fp16)[name = tensor("reduce_mean_129_cast_fp16")]; tensor sub_86_cast_fp16 = sub(x = reshape_172_cast_fp16, y = reduce_mean_129_cast_fp16)[name = tensor("sub_86_cast_fp16")]; tensor square_43_cast_fp16 = square(x = sub_86_cast_fp16)[name = tensor("square_43_cast_fp16")]; tensor reduce_mean_131_axes_0 = const()[name = tensor("reduce_mean_131_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_131_keep_dims_0 = const()[name = tensor("reduce_mean_131_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_131_cast_fp16 = reduce_mean(axes = reduce_mean_131_axes_0, keep_dims = reduce_mean_131_keep_dims_0, x = square_43_cast_fp16)[name = tensor("reduce_mean_131_cast_fp16")]; tensor add_86_y_0_to_fp16 = const()[name = tensor("add_86_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_86_cast_fp16 = add(x = reduce_mean_131_cast_fp16, y = add_86_y_0_to_fp16)[name = tensor("add_86_cast_fp16")]; tensor sqrt_43_cast_fp16 = sqrt(x = add_86_cast_fp16)[name = tensor("sqrt_43_cast_fp16")]; tensor real_div_43_cast_fp16 = real_div(x = sub_86_cast_fp16, y = sqrt_43_cast_fp16)[name = tensor("real_div_43_cast_fp16")]; tensor reshape_173_shape_0 = const()[name = tensor("reshape_173_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_173_cast_fp16 = reshape(shape = reshape_173_shape_0, x = real_div_43_cast_fp16)[name = tensor("reshape_173_cast_fp16")]; tensor add_87_gamma_0_to_fp16 = const()[name = tensor("add_87_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1562394560)))]; tensor add_87_beta_0_to_fp16 = const()[name = tensor("add_87_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1562395904)))]; tensor add_87_epsilon_0_to_fp16 = const()[name = tensor("add_87_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_87_cast_fp16 = batch_norm(beta = add_87_beta_0_to_fp16, epsilon = add_87_epsilon_0_to_fp16, gamma = add_87_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_173_cast_fp16)[name = tensor("add_87_cast_fp16")]; tensor input_379_cast_fp16 = silu(x = add_87_cast_fp16)[name = tensor("input_379_cast_fp16")]; tensor var_8231 = const()[name = tensor("op_8231"), val = tensor([1, 1])]; tensor var_8233 = const()[name = tensor("op_8233"), val = tensor([1, 1])]; tensor hidden_states_221_pad_type_0 = const()[name = tensor("hidden_states_221_pad_type_0"), val = tensor("custom")]; tensor hidden_states_221_pad_0 = const()[name = tensor("hidden_states_221_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_2_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1562397248)))]; tensor up_blocks_2_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1569770112)))]; tensor hidden_states_221_cast_fp16 = conv(bias = up_blocks_2_resnets_0_conv2_bias_to_fp16, dilations = var_8233, groups = var_8182, pad = hidden_states_221_pad_0, pad_type = hidden_states_221_pad_type_0, strides = var_8231, weight = up_blocks_2_resnets_0_conv2_weight_to_fp16, x = input_379_cast_fp16)[name = tensor("hidden_states_221_cast_fp16")]; tensor var_8238 = const()[name = tensor("op_8238"), val = tensor([1, 1])]; tensor var_8240 = const()[name = tensor("op_8240"), val = tensor([1, 1])]; tensor x_17_pad_type_0 = const()[name = tensor("x_17_pad_type_0"), val = tensor("custom")]; tensor x_17_pad_0 = const()[name = tensor("x_17_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_resnets_0_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_0_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1569771456)))]; tensor up_blocks_2_resnets_0_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_0_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1572229120)))]; tensor x_17_cast_fp16 = conv(bias = up_blocks_2_resnets_0_conv_shortcut_bias_to_fp16, dilations = var_8240, groups = var_8182, pad = x_17_pad_0, pad_type = x_17_pad_type_0, strides = var_8238, weight = up_blocks_2_resnets_0_conv_shortcut_weight_to_fp16, x = input_367_cast_fp16)[name = tensor("x_17_cast_fp16")]; tensor hidden_states_223_cast_fp16 = add(x = x_17_cast_fp16, y = hidden_states_221_cast_fp16)[name = tensor("hidden_states_223_cast_fp16")]; tensor reshape_176_shape_0 = const()[name = tensor("reshape_176_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_176_cast_fp16 = reshape(shape = reshape_176_shape_0, x = hidden_states_223_cast_fp16)[name = tensor("reshape_176_cast_fp16")]; tensor reduce_mean_132_axes_0 = const()[name = tensor("reduce_mean_132_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_132_keep_dims_0 = const()[name = tensor("reduce_mean_132_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_132_cast_fp16 = reduce_mean(axes = reduce_mean_132_axes_0, keep_dims = reduce_mean_132_keep_dims_0, x = reshape_176_cast_fp16)[name = tensor("reduce_mean_132_cast_fp16")]; tensor sub_88_cast_fp16 = sub(x = reshape_176_cast_fp16, y = reduce_mean_132_cast_fp16)[name = tensor("sub_88_cast_fp16")]; tensor square_44_cast_fp16 = square(x = sub_88_cast_fp16)[name = tensor("square_44_cast_fp16")]; tensor reduce_mean_134_axes_0 = const()[name = tensor("reduce_mean_134_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_134_keep_dims_0 = const()[name = tensor("reduce_mean_134_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_134_cast_fp16 = reduce_mean(axes = reduce_mean_134_axes_0, keep_dims = reduce_mean_134_keep_dims_0, x = square_44_cast_fp16)[name = tensor("reduce_mean_134_cast_fp16")]; tensor add_88_y_0_to_fp16 = const()[name = tensor("add_88_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_88_cast_fp16 = add(x = reduce_mean_134_cast_fp16, y = add_88_y_0_to_fp16)[name = tensor("add_88_cast_fp16")]; tensor sqrt_44_cast_fp16 = sqrt(x = add_88_cast_fp16)[name = tensor("sqrt_44_cast_fp16")]; tensor real_div_44_cast_fp16 = real_div(x = sub_88_cast_fp16, y = sqrt_44_cast_fp16)[name = tensor("real_div_44_cast_fp16")]; tensor reshape_177_shape_0 = const()[name = tensor("reshape_177_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_177_cast_fp16 = reshape(shape = reshape_177_shape_0, x = real_div_44_cast_fp16)[name = tensor("reshape_177_cast_fp16")]; tensor add_89_gamma_0_to_fp16 = const()[name = tensor("add_89_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1572230464)))]; tensor add_89_beta_0_to_fp16 = const()[name = tensor("add_89_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1572231808)))]; tensor add_89_epsilon_0_to_fp16 = const()[name = tensor("add_89_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_89_cast_fp16 = batch_norm(beta = add_89_beta_0_to_fp16, epsilon = add_89_epsilon_0_to_fp16, gamma = add_89_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_177_cast_fp16)[name = tensor("add_89_cast_fp16")]; tensor var_8260 = const()[name = tensor("op_8260"), val = tensor([1, 1])]; tensor var_8262 = const()[name = tensor("op_8262"), val = tensor([1, 1])]; tensor hidden_states_225_pad_type_0 = const()[name = tensor("hidden_states_225_pad_type_0"), val = tensor("custom")]; tensor hidden_states_225_pad_0 = const()[name = tensor("hidden_states_225_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1572233152)))]; tensor up_blocks_2_attentions_0_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1573052416)))]; tensor hidden_states_225_cast_fp16 = conv(bias = up_blocks_2_attentions_0_proj_in_bias_to_fp16, dilations = var_8262, groups = var_8182, pad = hidden_states_225_pad_0, pad_type = hidden_states_225_pad_type_0, strides = var_8260, weight = up_blocks_2_attentions_0_proj_in_weight_to_fp16, x = add_89_cast_fp16)[name = tensor("hidden_states_225_cast_fp16")]; tensor var_8267 = const()[name = tensor("op_8267"), val = tensor([2, 640, 1, 1024])]; tensor inputs_61_cast_fp16 = reshape(shape = var_8267, x = hidden_states_225_cast_fp16)[name = tensor("inputs_61_cast_fp16")]; tensor hidden_states_227_axes_0 = const()[name = tensor("hidden_states_227_axes_0"), val = tensor([1])]; tensor hidden_states_227_gamma_0_to_fp16 = const()[name = tensor("hidden_states_227_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1573053760)))]; tensor hidden_states_227_beta_0_to_fp16 = const()[name = tensor("hidden_states_227_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1573055104)))]; tensor var_8283_to_fp16 = const()[name = tensor("op_8283_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_227_cast_fp16 = layer_norm(axes = hidden_states_227_axes_0, beta = hidden_states_227_beta_0_to_fp16, epsilon = var_8283_to_fp16, gamma = hidden_states_227_gamma_0_to_fp16, x = inputs_61_cast_fp16)[name = tensor("hidden_states_227_cast_fp16")]; tensor var_8298 = const()[name = tensor("op_8298"), val = tensor([1, 1])]; tensor var_8300 = const()[name = tensor("op_8300"), val = tensor([1, 1])]; tensor q_41_pad_type_0 = const()[name = tensor("q_41_pad_type_0"), val = tensor("custom")]; tensor q_41_pad_0 = const()[name = tensor("q_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1573056448)))]; tensor q_41_cast_fp16 = conv(dilations = var_8300, groups = var_8182, pad = q_41_pad_0, pad_type = q_41_pad_type_0, strides = var_8298, weight = up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_227_cast_fp16)[name = tensor("q_41_cast_fp16")]; tensor var_8304 = const()[name = tensor("op_8304"), val = tensor([1, 1])]; tensor var_8306 = const()[name = tensor("op_8306"), val = tensor([1, 1])]; tensor k_81_pad_type_0 = const()[name = tensor("k_81_pad_type_0"), val = tensor("custom")]; tensor k_81_pad_0 = const()[name = tensor("k_81_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1573875712)))]; tensor k_81_cast_fp16 = conv(dilations = var_8306, groups = var_8182, pad = k_81_pad_0, pad_type = k_81_pad_type_0, strides = var_8304, weight = up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_227_cast_fp16)[name = tensor("k_81_cast_fp16")]; tensor var_8310 = const()[name = tensor("op_8310"), val = tensor([1, 1])]; tensor var_8312 = const()[name = tensor("op_8312"), val = tensor([1, 1])]; tensor v_41_pad_type_0 = const()[name = tensor("v_41_pad_type_0"), val = tensor("custom")]; tensor v_41_pad_0 = const()[name = tensor("v_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1574694976)))]; tensor v_41_cast_fp16 = conv(dilations = var_8312, groups = var_8182, pad = v_41_pad_0, pad_type = v_41_pad_type_0, strides = var_8310, weight = up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_227_cast_fp16)[name = tensor("v_41_cast_fp16")]; tensor var_8316_begin_0 = const()[name = tensor("op_8316_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8316_end_0 = const()[name = tensor("op_8316_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8316_end_mask_0 = const()[name = tensor("op_8316_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8316_cast_fp16 = slice_by_index(begin = var_8316_begin_0, end = var_8316_end_0, end_mask = var_8316_end_mask_0, x = q_41_cast_fp16)[name = tensor("op_8316_cast_fp16")]; tensor var_8320_begin_0 = const()[name = tensor("op_8320_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_8320_end_0 = const()[name = tensor("op_8320_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_8320_end_mask_0 = const()[name = tensor("op_8320_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8320_cast_fp16 = slice_by_index(begin = var_8320_begin_0, end = var_8320_end_0, end_mask = var_8320_end_mask_0, x = q_41_cast_fp16)[name = tensor("op_8320_cast_fp16")]; tensor var_8324_begin_0 = const()[name = tensor("op_8324_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_8324_end_0 = const()[name = tensor("op_8324_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_8324_end_mask_0 = const()[name = tensor("op_8324_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8324_cast_fp16 = slice_by_index(begin = var_8324_begin_0, end = var_8324_end_0, end_mask = var_8324_end_mask_0, x = q_41_cast_fp16)[name = tensor("op_8324_cast_fp16")]; tensor var_8328_begin_0 = const()[name = tensor("op_8328_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_8328_end_0 = const()[name = tensor("op_8328_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_8328_end_mask_0 = const()[name = tensor("op_8328_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8328_cast_fp16 = slice_by_index(begin = var_8328_begin_0, end = var_8328_end_0, end_mask = var_8328_end_mask_0, x = q_41_cast_fp16)[name = tensor("op_8328_cast_fp16")]; tensor var_8332_begin_0 = const()[name = tensor("op_8332_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_8332_end_0 = const()[name = tensor("op_8332_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_8332_end_mask_0 = const()[name = tensor("op_8332_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8332_cast_fp16 = slice_by_index(begin = var_8332_begin_0, end = var_8332_end_0, end_mask = var_8332_end_mask_0, x = q_41_cast_fp16)[name = tensor("op_8332_cast_fp16")]; tensor var_8336_begin_0 = const()[name = tensor("op_8336_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_8336_end_0 = const()[name = tensor("op_8336_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_8336_end_mask_0 = const()[name = tensor("op_8336_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8336_cast_fp16 = slice_by_index(begin = var_8336_begin_0, end = var_8336_end_0, end_mask = var_8336_end_mask_0, x = q_41_cast_fp16)[name = tensor("op_8336_cast_fp16")]; tensor var_8340_begin_0 = const()[name = tensor("op_8340_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_8340_end_0 = const()[name = tensor("op_8340_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_8340_end_mask_0 = const()[name = tensor("op_8340_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8340_cast_fp16 = slice_by_index(begin = var_8340_begin_0, end = var_8340_end_0, end_mask = var_8340_end_mask_0, x = q_41_cast_fp16)[name = tensor("op_8340_cast_fp16")]; tensor var_8344_begin_0 = const()[name = tensor("op_8344_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_8344_end_0 = const()[name = tensor("op_8344_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_8344_end_mask_0 = const()[name = tensor("op_8344_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8344_cast_fp16 = slice_by_index(begin = var_8344_begin_0, end = var_8344_end_0, end_mask = var_8344_end_mask_0, x = q_41_cast_fp16)[name = tensor("op_8344_cast_fp16")]; tensor var_8347_begin_0 = const()[name = tensor("op_8347_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8347_end_0 = const()[name = tensor("op_8347_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8347_end_mask_0 = const()[name = tensor("op_8347_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8347_cast_fp16 = slice_by_index(begin = var_8347_begin_0, end = var_8347_end_0, end_mask = var_8347_end_mask_0, x = var_8316_cast_fp16)[name = tensor("op_8347_cast_fp16")]; tensor var_8348_begin_0 = const()[name = tensor("op_8348_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8348_end_0 = const()[name = tensor("op_8348_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8348_end_mask_0 = const()[name = tensor("op_8348_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8348_cast_fp16 = slice_by_index(begin = var_8348_begin_0, end = var_8348_end_0, end_mask = var_8348_end_mask_0, x = var_8316_cast_fp16)[name = tensor("op_8348_cast_fp16")]; tensor var_8349_begin_0 = const()[name = tensor("op_8349_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8349_end_0 = const()[name = tensor("op_8349_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8349_end_mask_0 = const()[name = tensor("op_8349_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8349_cast_fp16 = slice_by_index(begin = var_8349_begin_0, end = var_8349_end_0, end_mask = var_8349_end_mask_0, x = var_8320_cast_fp16)[name = tensor("op_8349_cast_fp16")]; tensor var_8350_begin_0 = const()[name = tensor("op_8350_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8350_end_0 = const()[name = tensor("op_8350_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8350_end_mask_0 = const()[name = tensor("op_8350_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8350_cast_fp16 = slice_by_index(begin = var_8350_begin_0, end = var_8350_end_0, end_mask = var_8350_end_mask_0, x = var_8320_cast_fp16)[name = tensor("op_8350_cast_fp16")]; tensor var_8351_begin_0 = const()[name = tensor("op_8351_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8351_end_0 = const()[name = tensor("op_8351_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8351_end_mask_0 = const()[name = tensor("op_8351_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8351_cast_fp16 = slice_by_index(begin = var_8351_begin_0, end = var_8351_end_0, end_mask = var_8351_end_mask_0, x = var_8324_cast_fp16)[name = tensor("op_8351_cast_fp16")]; tensor var_8352_begin_0 = const()[name = tensor("op_8352_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8352_end_0 = const()[name = tensor("op_8352_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8352_end_mask_0 = const()[name = tensor("op_8352_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8352_cast_fp16 = slice_by_index(begin = var_8352_begin_0, end = var_8352_end_0, end_mask = var_8352_end_mask_0, x = var_8324_cast_fp16)[name = tensor("op_8352_cast_fp16")]; tensor var_8353_begin_0 = const()[name = tensor("op_8353_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8353_end_0 = const()[name = tensor("op_8353_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8353_end_mask_0 = const()[name = tensor("op_8353_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8353_cast_fp16 = slice_by_index(begin = var_8353_begin_0, end = var_8353_end_0, end_mask = var_8353_end_mask_0, x = var_8328_cast_fp16)[name = tensor("op_8353_cast_fp16")]; tensor var_8354_begin_0 = const()[name = tensor("op_8354_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8354_end_0 = const()[name = tensor("op_8354_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8354_end_mask_0 = const()[name = tensor("op_8354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8354_cast_fp16 = slice_by_index(begin = var_8354_begin_0, end = var_8354_end_0, end_mask = var_8354_end_mask_0, x = var_8328_cast_fp16)[name = tensor("op_8354_cast_fp16")]; tensor var_8355_begin_0 = const()[name = tensor("op_8355_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8355_end_0 = const()[name = tensor("op_8355_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8355_end_mask_0 = const()[name = tensor("op_8355_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8355_cast_fp16 = slice_by_index(begin = var_8355_begin_0, end = var_8355_end_0, end_mask = var_8355_end_mask_0, x = var_8332_cast_fp16)[name = tensor("op_8355_cast_fp16")]; tensor var_8356_begin_0 = const()[name = tensor("op_8356_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8356_end_0 = const()[name = tensor("op_8356_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8356_end_mask_0 = const()[name = tensor("op_8356_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8356_cast_fp16 = slice_by_index(begin = var_8356_begin_0, end = var_8356_end_0, end_mask = var_8356_end_mask_0, x = var_8332_cast_fp16)[name = tensor("op_8356_cast_fp16")]; tensor var_8357_begin_0 = const()[name = tensor("op_8357_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8357_end_0 = const()[name = tensor("op_8357_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8357_end_mask_0 = const()[name = tensor("op_8357_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8357_cast_fp16 = slice_by_index(begin = var_8357_begin_0, end = var_8357_end_0, end_mask = var_8357_end_mask_0, x = var_8336_cast_fp16)[name = tensor("op_8357_cast_fp16")]; tensor var_8358_begin_0 = const()[name = tensor("op_8358_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8358_end_0 = const()[name = tensor("op_8358_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8358_end_mask_0 = const()[name = tensor("op_8358_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8358_cast_fp16 = slice_by_index(begin = var_8358_begin_0, end = var_8358_end_0, end_mask = var_8358_end_mask_0, x = var_8336_cast_fp16)[name = tensor("op_8358_cast_fp16")]; tensor var_8359_begin_0 = const()[name = tensor("op_8359_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8359_end_0 = const()[name = tensor("op_8359_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8359_end_mask_0 = const()[name = tensor("op_8359_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8359_cast_fp16 = slice_by_index(begin = var_8359_begin_0, end = var_8359_end_0, end_mask = var_8359_end_mask_0, x = var_8340_cast_fp16)[name = tensor("op_8359_cast_fp16")]; tensor var_8360_begin_0 = const()[name = tensor("op_8360_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8360_end_0 = const()[name = tensor("op_8360_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8360_end_mask_0 = const()[name = tensor("op_8360_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8360_cast_fp16 = slice_by_index(begin = var_8360_begin_0, end = var_8360_end_0, end_mask = var_8360_end_mask_0, x = var_8340_cast_fp16)[name = tensor("op_8360_cast_fp16")]; tensor var_8361_begin_0 = const()[name = tensor("op_8361_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8361_end_0 = const()[name = tensor("op_8361_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8361_end_mask_0 = const()[name = tensor("op_8361_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8361_cast_fp16 = slice_by_index(begin = var_8361_begin_0, end = var_8361_end_0, end_mask = var_8361_end_mask_0, x = var_8344_cast_fp16)[name = tensor("op_8361_cast_fp16")]; tensor var_8362_begin_0 = const()[name = tensor("op_8362_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8362_end_0 = const()[name = tensor("op_8362_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8362_end_mask_0 = const()[name = tensor("op_8362_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8362_cast_fp16 = slice_by_index(begin = var_8362_begin_0, end = var_8362_end_0, end_mask = var_8362_end_mask_0, x = var_8344_cast_fp16)[name = tensor("op_8362_cast_fp16")]; tensor k_83_perm_0 = const()[name = tensor("k_83_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_8367_begin_0 = const()[name = tensor("op_8367_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8367_end_0 = const()[name = tensor("op_8367_end_0"), val = tensor([2, 1024, 1, 80])]; tensor var_8367_end_mask_0 = const()[name = tensor("op_8367_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_11 = transpose(perm = k_83_perm_0, x = k_81_cast_fp16)[name = tensor("transpose_11")]; tensor var_8367_cast_fp16 = slice_by_index(begin = var_8367_begin_0, end = var_8367_end_0, end_mask = var_8367_end_mask_0, x = transpose_11)[name = tensor("op_8367_cast_fp16")]; tensor var_8371_begin_0 = const()[name = tensor("op_8371_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_8371_end_0 = const()[name = tensor("op_8371_end_0"), val = tensor([2, 1024, 1, 160])]; tensor var_8371_end_mask_0 = const()[name = tensor("op_8371_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8371_cast_fp16 = slice_by_index(begin = var_8371_begin_0, end = var_8371_end_0, end_mask = var_8371_end_mask_0, x = transpose_11)[name = tensor("op_8371_cast_fp16")]; tensor var_8375_begin_0 = const()[name = tensor("op_8375_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_8375_end_0 = const()[name = tensor("op_8375_end_0"), val = tensor([2, 1024, 1, 240])]; tensor var_8375_end_mask_0 = const()[name = tensor("op_8375_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8375_cast_fp16 = slice_by_index(begin = var_8375_begin_0, end = var_8375_end_0, end_mask = var_8375_end_mask_0, x = transpose_11)[name = tensor("op_8375_cast_fp16")]; tensor var_8379_begin_0 = const()[name = tensor("op_8379_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_8379_end_0 = const()[name = tensor("op_8379_end_0"), val = tensor([2, 1024, 1, 320])]; tensor var_8379_end_mask_0 = const()[name = tensor("op_8379_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8379_cast_fp16 = slice_by_index(begin = var_8379_begin_0, end = var_8379_end_0, end_mask = var_8379_end_mask_0, x = transpose_11)[name = tensor("op_8379_cast_fp16")]; tensor var_8383_begin_0 = const()[name = tensor("op_8383_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_8383_end_0 = const()[name = tensor("op_8383_end_0"), val = tensor([2, 1024, 1, 400])]; tensor var_8383_end_mask_0 = const()[name = tensor("op_8383_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8383_cast_fp16 = slice_by_index(begin = var_8383_begin_0, end = var_8383_end_0, end_mask = var_8383_end_mask_0, x = transpose_11)[name = tensor("op_8383_cast_fp16")]; tensor var_8387_begin_0 = const()[name = tensor("op_8387_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_8387_end_0 = const()[name = tensor("op_8387_end_0"), val = tensor([2, 1024, 1, 480])]; tensor var_8387_end_mask_0 = const()[name = tensor("op_8387_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8387_cast_fp16 = slice_by_index(begin = var_8387_begin_0, end = var_8387_end_0, end_mask = var_8387_end_mask_0, x = transpose_11)[name = tensor("op_8387_cast_fp16")]; tensor var_8391_begin_0 = const()[name = tensor("op_8391_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_8391_end_0 = const()[name = tensor("op_8391_end_0"), val = tensor([2, 1024, 1, 560])]; tensor var_8391_end_mask_0 = const()[name = tensor("op_8391_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8391_cast_fp16 = slice_by_index(begin = var_8391_begin_0, end = var_8391_end_0, end_mask = var_8391_end_mask_0, x = transpose_11)[name = tensor("op_8391_cast_fp16")]; tensor var_8395_begin_0 = const()[name = tensor("op_8395_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_8395_end_0 = const()[name = tensor("op_8395_end_0"), val = tensor([2, 1024, 1, 640])]; tensor var_8395_end_mask_0 = const()[name = tensor("op_8395_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8395_cast_fp16 = slice_by_index(begin = var_8395_begin_0, end = var_8395_end_0, end_mask = var_8395_end_mask_0, x = transpose_11)[name = tensor("op_8395_cast_fp16")]; tensor var_8397_begin_0 = const()[name = tensor("op_8397_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8397_end_0 = const()[name = tensor("op_8397_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8397_end_mask_0 = const()[name = tensor("op_8397_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8397_cast_fp16 = slice_by_index(begin = var_8397_begin_0, end = var_8397_end_0, end_mask = var_8397_end_mask_0, x = v_41_cast_fp16)[name = tensor("op_8397_cast_fp16")]; tensor var_8401_begin_0 = const()[name = tensor("op_8401_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_8401_end_0 = const()[name = tensor("op_8401_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_8401_end_mask_0 = const()[name = tensor("op_8401_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8401_cast_fp16 = slice_by_index(begin = var_8401_begin_0, end = var_8401_end_0, end_mask = var_8401_end_mask_0, x = v_41_cast_fp16)[name = tensor("op_8401_cast_fp16")]; tensor var_8405_begin_0 = const()[name = tensor("op_8405_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_8405_end_0 = const()[name = tensor("op_8405_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_8405_end_mask_0 = const()[name = tensor("op_8405_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8405_cast_fp16 = slice_by_index(begin = var_8405_begin_0, end = var_8405_end_0, end_mask = var_8405_end_mask_0, x = v_41_cast_fp16)[name = tensor("op_8405_cast_fp16")]; tensor var_8409_begin_0 = const()[name = tensor("op_8409_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_8409_end_0 = const()[name = tensor("op_8409_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_8409_end_mask_0 = const()[name = tensor("op_8409_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8409_cast_fp16 = slice_by_index(begin = var_8409_begin_0, end = var_8409_end_0, end_mask = var_8409_end_mask_0, x = v_41_cast_fp16)[name = tensor("op_8409_cast_fp16")]; tensor var_8413_begin_0 = const()[name = tensor("op_8413_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_8413_end_0 = const()[name = tensor("op_8413_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_8413_end_mask_0 = const()[name = tensor("op_8413_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8413_cast_fp16 = slice_by_index(begin = var_8413_begin_0, end = var_8413_end_0, end_mask = var_8413_end_mask_0, x = v_41_cast_fp16)[name = tensor("op_8413_cast_fp16")]; tensor var_8417_begin_0 = const()[name = tensor("op_8417_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_8417_end_0 = const()[name = tensor("op_8417_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_8417_end_mask_0 = const()[name = tensor("op_8417_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8417_cast_fp16 = slice_by_index(begin = var_8417_begin_0, end = var_8417_end_0, end_mask = var_8417_end_mask_0, x = v_41_cast_fp16)[name = tensor("op_8417_cast_fp16")]; tensor var_8421_begin_0 = const()[name = tensor("op_8421_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_8421_end_0 = const()[name = tensor("op_8421_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_8421_end_mask_0 = const()[name = tensor("op_8421_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8421_cast_fp16 = slice_by_index(begin = var_8421_begin_0, end = var_8421_end_0, end_mask = var_8421_end_mask_0, x = v_41_cast_fp16)[name = tensor("op_8421_cast_fp16")]; tensor var_8425_begin_0 = const()[name = tensor("op_8425_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_8425_end_0 = const()[name = tensor("op_8425_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_8425_end_mask_0 = const()[name = tensor("op_8425_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8425_cast_fp16 = slice_by_index(begin = var_8425_begin_0, end = var_8425_end_0, end_mask = var_8425_end_mask_0, x = v_41_cast_fp16)[name = tensor("op_8425_cast_fp16")]; tensor var_8429_equation_0 = const()[name = tensor("op_8429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8429_cast_fp16 = einsum(equation = var_8429_equation_0, values = (var_8367_cast_fp16, var_8347_cast_fp16))[name = tensor("op_8429_cast_fp16")]; tensor var_8430_to_fp16 = const()[name = tensor("op_8430_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_641_cast_fp16 = mul(x = var_8429_cast_fp16, y = var_8430_to_fp16)[name = tensor("aw_chunk_641_cast_fp16")]; tensor var_8433_equation_0 = const()[name = tensor("op_8433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8433_cast_fp16 = einsum(equation = var_8433_equation_0, values = (var_8367_cast_fp16, var_8348_cast_fp16))[name = tensor("op_8433_cast_fp16")]; tensor var_8434_to_fp16 = const()[name = tensor("op_8434_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_643_cast_fp16 = mul(x = var_8433_cast_fp16, y = var_8434_to_fp16)[name = tensor("aw_chunk_643_cast_fp16")]; tensor var_8437_equation_0 = const()[name = tensor("op_8437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8437_cast_fp16 = einsum(equation = var_8437_equation_0, values = (var_8371_cast_fp16, var_8349_cast_fp16))[name = tensor("op_8437_cast_fp16")]; tensor var_8438_to_fp16 = const()[name = tensor("op_8438_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_645_cast_fp16 = mul(x = var_8437_cast_fp16, y = var_8438_to_fp16)[name = tensor("aw_chunk_645_cast_fp16")]; tensor var_8441_equation_0 = const()[name = tensor("op_8441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8441_cast_fp16 = einsum(equation = var_8441_equation_0, values = (var_8371_cast_fp16, var_8350_cast_fp16))[name = tensor("op_8441_cast_fp16")]; tensor var_8442_to_fp16 = const()[name = tensor("op_8442_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_647_cast_fp16 = mul(x = var_8441_cast_fp16, y = var_8442_to_fp16)[name = tensor("aw_chunk_647_cast_fp16")]; tensor var_8445_equation_0 = const()[name = tensor("op_8445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8445_cast_fp16 = einsum(equation = var_8445_equation_0, values = (var_8375_cast_fp16, var_8351_cast_fp16))[name = tensor("op_8445_cast_fp16")]; tensor var_8446_to_fp16 = const()[name = tensor("op_8446_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_649_cast_fp16 = mul(x = var_8445_cast_fp16, y = var_8446_to_fp16)[name = tensor("aw_chunk_649_cast_fp16")]; tensor var_8449_equation_0 = const()[name = tensor("op_8449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8449_cast_fp16 = einsum(equation = var_8449_equation_0, values = (var_8375_cast_fp16, var_8352_cast_fp16))[name = tensor("op_8449_cast_fp16")]; tensor var_8450_to_fp16 = const()[name = tensor("op_8450_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_651_cast_fp16 = mul(x = var_8449_cast_fp16, y = var_8450_to_fp16)[name = tensor("aw_chunk_651_cast_fp16")]; tensor var_8453_equation_0 = const()[name = tensor("op_8453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8453_cast_fp16 = einsum(equation = var_8453_equation_0, values = (var_8379_cast_fp16, var_8353_cast_fp16))[name = tensor("op_8453_cast_fp16")]; tensor var_8454_to_fp16 = const()[name = tensor("op_8454_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_653_cast_fp16 = mul(x = var_8453_cast_fp16, y = var_8454_to_fp16)[name = tensor("aw_chunk_653_cast_fp16")]; tensor var_8457_equation_0 = const()[name = tensor("op_8457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8457_cast_fp16 = einsum(equation = var_8457_equation_0, values = (var_8379_cast_fp16, var_8354_cast_fp16))[name = tensor("op_8457_cast_fp16")]; tensor var_8458_to_fp16 = const()[name = tensor("op_8458_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_655_cast_fp16 = mul(x = var_8457_cast_fp16, y = var_8458_to_fp16)[name = tensor("aw_chunk_655_cast_fp16")]; tensor var_8461_equation_0 = const()[name = tensor("op_8461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8461_cast_fp16 = einsum(equation = var_8461_equation_0, values = (var_8383_cast_fp16, var_8355_cast_fp16))[name = tensor("op_8461_cast_fp16")]; tensor var_8462_to_fp16 = const()[name = tensor("op_8462_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_657_cast_fp16 = mul(x = var_8461_cast_fp16, y = var_8462_to_fp16)[name = tensor("aw_chunk_657_cast_fp16")]; tensor var_8465_equation_0 = const()[name = tensor("op_8465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8465_cast_fp16 = einsum(equation = var_8465_equation_0, values = (var_8383_cast_fp16, var_8356_cast_fp16))[name = tensor("op_8465_cast_fp16")]; tensor var_8466_to_fp16 = const()[name = tensor("op_8466_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_659_cast_fp16 = mul(x = var_8465_cast_fp16, y = var_8466_to_fp16)[name = tensor("aw_chunk_659_cast_fp16")]; tensor var_8469_equation_0 = const()[name = tensor("op_8469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8469_cast_fp16 = einsum(equation = var_8469_equation_0, values = (var_8387_cast_fp16, var_8357_cast_fp16))[name = tensor("op_8469_cast_fp16")]; tensor var_8470_to_fp16 = const()[name = tensor("op_8470_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_661_cast_fp16 = mul(x = var_8469_cast_fp16, y = var_8470_to_fp16)[name = tensor("aw_chunk_661_cast_fp16")]; tensor var_8473_equation_0 = const()[name = tensor("op_8473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8473_cast_fp16 = einsum(equation = var_8473_equation_0, values = (var_8387_cast_fp16, var_8358_cast_fp16))[name = tensor("op_8473_cast_fp16")]; tensor var_8474_to_fp16 = const()[name = tensor("op_8474_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_663_cast_fp16 = mul(x = var_8473_cast_fp16, y = var_8474_to_fp16)[name = tensor("aw_chunk_663_cast_fp16")]; tensor var_8477_equation_0 = const()[name = tensor("op_8477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8477_cast_fp16 = einsum(equation = var_8477_equation_0, values = (var_8391_cast_fp16, var_8359_cast_fp16))[name = tensor("op_8477_cast_fp16")]; tensor var_8478_to_fp16 = const()[name = tensor("op_8478_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_665_cast_fp16 = mul(x = var_8477_cast_fp16, y = var_8478_to_fp16)[name = tensor("aw_chunk_665_cast_fp16")]; tensor var_8481_equation_0 = const()[name = tensor("op_8481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8481_cast_fp16 = einsum(equation = var_8481_equation_0, values = (var_8391_cast_fp16, var_8360_cast_fp16))[name = tensor("op_8481_cast_fp16")]; tensor var_8482_to_fp16 = const()[name = tensor("op_8482_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_667_cast_fp16 = mul(x = var_8481_cast_fp16, y = var_8482_to_fp16)[name = tensor("aw_chunk_667_cast_fp16")]; tensor var_8485_equation_0 = const()[name = tensor("op_8485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8485_cast_fp16 = einsum(equation = var_8485_equation_0, values = (var_8395_cast_fp16, var_8361_cast_fp16))[name = tensor("op_8485_cast_fp16")]; tensor var_8486_to_fp16 = const()[name = tensor("op_8486_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_669_cast_fp16 = mul(x = var_8485_cast_fp16, y = var_8486_to_fp16)[name = tensor("aw_chunk_669_cast_fp16")]; tensor var_8489_equation_0 = const()[name = tensor("op_8489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8489_cast_fp16 = einsum(equation = var_8489_equation_0, values = (var_8395_cast_fp16, var_8362_cast_fp16))[name = tensor("op_8489_cast_fp16")]; tensor var_8490_to_fp16 = const()[name = tensor("op_8490_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_671_cast_fp16 = mul(x = var_8489_cast_fp16, y = var_8490_to_fp16)[name = tensor("aw_chunk_671_cast_fp16")]; tensor var_8492_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_641_cast_fp16)[name = tensor("op_8492_cast_fp16")]; tensor var_8493_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_643_cast_fp16)[name = tensor("op_8493_cast_fp16")]; tensor var_8494_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_645_cast_fp16)[name = tensor("op_8494_cast_fp16")]; tensor var_8495_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_647_cast_fp16)[name = tensor("op_8495_cast_fp16")]; tensor var_8496_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_649_cast_fp16)[name = tensor("op_8496_cast_fp16")]; tensor var_8497_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_651_cast_fp16)[name = tensor("op_8497_cast_fp16")]; tensor var_8498_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_653_cast_fp16)[name = tensor("op_8498_cast_fp16")]; tensor var_8499_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_655_cast_fp16)[name = tensor("op_8499_cast_fp16")]; tensor var_8500_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_657_cast_fp16)[name = tensor("op_8500_cast_fp16")]; tensor var_8501_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_659_cast_fp16)[name = tensor("op_8501_cast_fp16")]; tensor var_8502_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_661_cast_fp16)[name = tensor("op_8502_cast_fp16")]; tensor var_8503_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_663_cast_fp16)[name = tensor("op_8503_cast_fp16")]; tensor var_8504_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_665_cast_fp16)[name = tensor("op_8504_cast_fp16")]; tensor var_8505_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_667_cast_fp16)[name = tensor("op_8505_cast_fp16")]; tensor var_8506_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_669_cast_fp16)[name = tensor("op_8506_cast_fp16")]; tensor var_8507_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_671_cast_fp16)[name = tensor("op_8507_cast_fp16")]; tensor var_8509_equation_0 = const()[name = tensor("op_8509_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8509_cast_fp16 = einsum(equation = var_8509_equation_0, values = (var_8397_cast_fp16, var_8492_cast_fp16))[name = tensor("op_8509_cast_fp16")]; tensor var_8511_equation_0 = const()[name = tensor("op_8511_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8511_cast_fp16 = einsum(equation = var_8511_equation_0, values = (var_8397_cast_fp16, var_8493_cast_fp16))[name = tensor("op_8511_cast_fp16")]; tensor var_8513_equation_0 = const()[name = tensor("op_8513_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8513_cast_fp16 = einsum(equation = var_8513_equation_0, values = (var_8401_cast_fp16, var_8494_cast_fp16))[name = tensor("op_8513_cast_fp16")]; tensor var_8515_equation_0 = const()[name = tensor("op_8515_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8515_cast_fp16 = einsum(equation = var_8515_equation_0, values = (var_8401_cast_fp16, var_8495_cast_fp16))[name = tensor("op_8515_cast_fp16")]; tensor var_8517_equation_0 = const()[name = tensor("op_8517_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8517_cast_fp16 = einsum(equation = var_8517_equation_0, values = (var_8405_cast_fp16, var_8496_cast_fp16))[name = tensor("op_8517_cast_fp16")]; tensor var_8519_equation_0 = const()[name = tensor("op_8519_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8519_cast_fp16 = einsum(equation = var_8519_equation_0, values = (var_8405_cast_fp16, var_8497_cast_fp16))[name = tensor("op_8519_cast_fp16")]; tensor var_8521_equation_0 = const()[name = tensor("op_8521_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8521_cast_fp16 = einsum(equation = var_8521_equation_0, values = (var_8409_cast_fp16, var_8498_cast_fp16))[name = tensor("op_8521_cast_fp16")]; tensor var_8523_equation_0 = const()[name = tensor("op_8523_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8523_cast_fp16 = einsum(equation = var_8523_equation_0, values = (var_8409_cast_fp16, var_8499_cast_fp16))[name = tensor("op_8523_cast_fp16")]; tensor var_8525_equation_0 = const()[name = tensor("op_8525_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8525_cast_fp16 = einsum(equation = var_8525_equation_0, values = (var_8413_cast_fp16, var_8500_cast_fp16))[name = tensor("op_8525_cast_fp16")]; tensor var_8527_equation_0 = const()[name = tensor("op_8527_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8527_cast_fp16 = einsum(equation = var_8527_equation_0, values = (var_8413_cast_fp16, var_8501_cast_fp16))[name = tensor("op_8527_cast_fp16")]; tensor var_8529_equation_0 = const()[name = tensor("op_8529_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8529_cast_fp16 = einsum(equation = var_8529_equation_0, values = (var_8417_cast_fp16, var_8502_cast_fp16))[name = tensor("op_8529_cast_fp16")]; tensor var_8531_equation_0 = const()[name = tensor("op_8531_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8531_cast_fp16 = einsum(equation = var_8531_equation_0, values = (var_8417_cast_fp16, var_8503_cast_fp16))[name = tensor("op_8531_cast_fp16")]; tensor var_8533_equation_0 = const()[name = tensor("op_8533_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8533_cast_fp16 = einsum(equation = var_8533_equation_0, values = (var_8421_cast_fp16, var_8504_cast_fp16))[name = tensor("op_8533_cast_fp16")]; tensor var_8535_equation_0 = const()[name = tensor("op_8535_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8535_cast_fp16 = einsum(equation = var_8535_equation_0, values = (var_8421_cast_fp16, var_8505_cast_fp16))[name = tensor("op_8535_cast_fp16")]; tensor var_8537_equation_0 = const()[name = tensor("op_8537_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8537_cast_fp16 = einsum(equation = var_8537_equation_0, values = (var_8425_cast_fp16, var_8506_cast_fp16))[name = tensor("op_8537_cast_fp16")]; tensor var_8539_equation_0 = const()[name = tensor("op_8539_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8539_cast_fp16 = einsum(equation = var_8539_equation_0, values = (var_8425_cast_fp16, var_8507_cast_fp16))[name = tensor("op_8539_cast_fp16")]; tensor var_8541_interleave_0 = const()[name = tensor("op_8541_interleave_0"), val = tensor(false)]; tensor var_8541_cast_fp16 = concat(axis = var_8160, interleave = var_8541_interleave_0, values = (var_8509_cast_fp16, var_8511_cast_fp16))[name = tensor("op_8541_cast_fp16")]; tensor var_8543_interleave_0 = const()[name = tensor("op_8543_interleave_0"), val = tensor(false)]; tensor var_8543_cast_fp16 = concat(axis = var_8160, interleave = var_8543_interleave_0, values = (var_8513_cast_fp16, var_8515_cast_fp16))[name = tensor("op_8543_cast_fp16")]; tensor var_8545_interleave_0 = const()[name = tensor("op_8545_interleave_0"), val = tensor(false)]; tensor var_8545_cast_fp16 = concat(axis = var_8160, interleave = var_8545_interleave_0, values = (var_8517_cast_fp16, var_8519_cast_fp16))[name = tensor("op_8545_cast_fp16")]; tensor var_8547_interleave_0 = const()[name = tensor("op_8547_interleave_0"), val = tensor(false)]; tensor var_8547_cast_fp16 = concat(axis = var_8160, interleave = var_8547_interleave_0, values = (var_8521_cast_fp16, var_8523_cast_fp16))[name = tensor("op_8547_cast_fp16")]; tensor var_8549_interleave_0 = const()[name = tensor("op_8549_interleave_0"), val = tensor(false)]; tensor var_8549_cast_fp16 = concat(axis = var_8160, interleave = var_8549_interleave_0, values = (var_8525_cast_fp16, var_8527_cast_fp16))[name = tensor("op_8549_cast_fp16")]; tensor var_8551_interleave_0 = const()[name = tensor("op_8551_interleave_0"), val = tensor(false)]; tensor var_8551_cast_fp16 = concat(axis = var_8160, interleave = var_8551_interleave_0, values = (var_8529_cast_fp16, var_8531_cast_fp16))[name = tensor("op_8551_cast_fp16")]; tensor var_8553_interleave_0 = const()[name = tensor("op_8553_interleave_0"), val = tensor(false)]; tensor var_8553_cast_fp16 = concat(axis = var_8160, interleave = var_8553_interleave_0, values = (var_8533_cast_fp16, var_8535_cast_fp16))[name = tensor("op_8553_cast_fp16")]; tensor var_8555_interleave_0 = const()[name = tensor("op_8555_interleave_0"), val = tensor(false)]; tensor var_8555_cast_fp16 = concat(axis = var_8160, interleave = var_8555_interleave_0, values = (var_8537_cast_fp16, var_8539_cast_fp16))[name = tensor("op_8555_cast_fp16")]; tensor input_383_interleave_0 = const()[name = tensor("input_383_interleave_0"), val = tensor(false)]; tensor input_383_cast_fp16 = concat(axis = var_8182, interleave = input_383_interleave_0, values = (var_8541_cast_fp16, var_8543_cast_fp16, var_8545_cast_fp16, var_8547_cast_fp16, var_8549_cast_fp16, var_8551_cast_fp16, var_8553_cast_fp16, var_8555_cast_fp16))[name = tensor("input_383_cast_fp16")]; tensor var_8561 = const()[name = tensor("op_8561"), val = tensor([1, 1])]; tensor var_8563 = const()[name = tensor("op_8563"), val = tensor([1, 1])]; tensor var_8565_pad_type_0 = const()[name = tensor("op_8565_pad_type_0"), val = tensor("custom")]; tensor var_8565_pad_0 = const()[name = tensor("op_8565_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1575514240)))]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1576333504)))]; tensor var_8565_cast_fp16 = conv(bias = up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_8563, groups = var_8182, pad = var_8565_pad_0, pad_type = var_8565_pad_type_0, strides = var_8561, weight = up_blocks_2_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_383_cast_fp16)[name = tensor("op_8565_cast_fp16")]; tensor inputs_63_cast_fp16 = add(x = var_8565_cast_fp16, y = inputs_61_cast_fp16)[name = tensor("inputs_63_cast_fp16")]; tensor hidden_states_229_axes_0 = const()[name = tensor("hidden_states_229_axes_0"), val = tensor([1])]; tensor hidden_states_229_gamma_0_to_fp16 = const()[name = tensor("hidden_states_229_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1576334848)))]; tensor hidden_states_229_beta_0_to_fp16 = const()[name = tensor("hidden_states_229_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1576336192)))]; tensor var_8575_to_fp16 = const()[name = tensor("op_8575_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_229_cast_fp16 = layer_norm(axes = hidden_states_229_axes_0, beta = hidden_states_229_beta_0_to_fp16, epsilon = var_8575_to_fp16, gamma = hidden_states_229_gamma_0_to_fp16, x = inputs_63_cast_fp16)[name = tensor("hidden_states_229_cast_fp16")]; tensor var_8590 = const()[name = tensor("op_8590"), val = tensor([1, 1])]; tensor var_8592 = const()[name = tensor("op_8592"), val = tensor([1, 1])]; tensor q_43_pad_type_0 = const()[name = tensor("q_43_pad_type_0"), val = tensor("custom")]; tensor q_43_pad_0 = const()[name = tensor("q_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1576337536)))]; tensor q_43_cast_fp16 = conv(dilations = var_8592, groups = var_8182, pad = q_43_pad_0, pad_type = q_43_pad_type_0, strides = var_8590, weight = up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_229_cast_fp16)[name = tensor("q_43_cast_fp16")]; tensor var_8596 = const()[name = tensor("op_8596"), val = tensor([1, 1])]; tensor var_8598 = const()[name = tensor("op_8598"), val = tensor([1, 1])]; tensor k_85_pad_type_0 = const()[name = tensor("k_85_pad_type_0"), val = tensor("custom")]; tensor k_85_pad_0 = const()[name = tensor("k_85_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1577156800)))]; tensor k_85_cast_fp16 = conv(dilations = var_8598, groups = var_8182, pad = k_85_pad_0, pad_type = k_85_pad_type_0, strides = var_8596, weight = up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_85_cast_fp16")]; tensor var_8602 = const()[name = tensor("op_8602"), val = tensor([1, 1])]; tensor var_8604 = const()[name = tensor("op_8604"), val = tensor([1, 1])]; tensor v_43_pad_type_0 = const()[name = tensor("v_43_pad_type_0"), val = tensor("custom")]; tensor v_43_pad_0 = const()[name = tensor("v_43_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1578139904)))]; tensor v_43_cast_fp16 = conv(dilations = var_8604, groups = var_8182, pad = v_43_pad_0, pad_type = v_43_pad_type_0, strides = var_8602, weight = up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_43_cast_fp16")]; tensor var_8608_begin_0 = const()[name = tensor("op_8608_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8608_end_0 = const()[name = tensor("op_8608_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8608_end_mask_0 = const()[name = tensor("op_8608_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8608_cast_fp16 = slice_by_index(begin = var_8608_begin_0, end = var_8608_end_0, end_mask = var_8608_end_mask_0, x = q_43_cast_fp16)[name = tensor("op_8608_cast_fp16")]; tensor var_8612_begin_0 = const()[name = tensor("op_8612_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_8612_end_0 = const()[name = tensor("op_8612_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_8612_end_mask_0 = const()[name = tensor("op_8612_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8612_cast_fp16 = slice_by_index(begin = var_8612_begin_0, end = var_8612_end_0, end_mask = var_8612_end_mask_0, x = q_43_cast_fp16)[name = tensor("op_8612_cast_fp16")]; tensor var_8616_begin_0 = const()[name = tensor("op_8616_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_8616_end_0 = const()[name = tensor("op_8616_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_8616_end_mask_0 = const()[name = tensor("op_8616_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8616_cast_fp16 = slice_by_index(begin = var_8616_begin_0, end = var_8616_end_0, end_mask = var_8616_end_mask_0, x = q_43_cast_fp16)[name = tensor("op_8616_cast_fp16")]; tensor var_8620_begin_0 = const()[name = tensor("op_8620_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_8620_end_0 = const()[name = tensor("op_8620_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_8620_end_mask_0 = const()[name = tensor("op_8620_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8620_cast_fp16 = slice_by_index(begin = var_8620_begin_0, end = var_8620_end_0, end_mask = var_8620_end_mask_0, x = q_43_cast_fp16)[name = tensor("op_8620_cast_fp16")]; tensor var_8624_begin_0 = const()[name = tensor("op_8624_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_8624_end_0 = const()[name = tensor("op_8624_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_8624_end_mask_0 = const()[name = tensor("op_8624_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8624_cast_fp16 = slice_by_index(begin = var_8624_begin_0, end = var_8624_end_0, end_mask = var_8624_end_mask_0, x = q_43_cast_fp16)[name = tensor("op_8624_cast_fp16")]; tensor var_8628_begin_0 = const()[name = tensor("op_8628_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_8628_end_0 = const()[name = tensor("op_8628_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_8628_end_mask_0 = const()[name = tensor("op_8628_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8628_cast_fp16 = slice_by_index(begin = var_8628_begin_0, end = var_8628_end_0, end_mask = var_8628_end_mask_0, x = q_43_cast_fp16)[name = tensor("op_8628_cast_fp16")]; tensor var_8632_begin_0 = const()[name = tensor("op_8632_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_8632_end_0 = const()[name = tensor("op_8632_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_8632_end_mask_0 = const()[name = tensor("op_8632_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8632_cast_fp16 = slice_by_index(begin = var_8632_begin_0, end = var_8632_end_0, end_mask = var_8632_end_mask_0, x = q_43_cast_fp16)[name = tensor("op_8632_cast_fp16")]; tensor var_8636_begin_0 = const()[name = tensor("op_8636_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_8636_end_0 = const()[name = tensor("op_8636_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_8636_end_mask_0 = const()[name = tensor("op_8636_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8636_cast_fp16 = slice_by_index(begin = var_8636_begin_0, end = var_8636_end_0, end_mask = var_8636_end_mask_0, x = q_43_cast_fp16)[name = tensor("op_8636_cast_fp16")]; tensor var_8639_begin_0 = const()[name = tensor("op_8639_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8639_end_0 = const()[name = tensor("op_8639_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8639_end_mask_0 = const()[name = tensor("op_8639_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8639_cast_fp16 = slice_by_index(begin = var_8639_begin_0, end = var_8639_end_0, end_mask = var_8639_end_mask_0, x = var_8608_cast_fp16)[name = tensor("op_8639_cast_fp16")]; tensor var_8640_begin_0 = const()[name = tensor("op_8640_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8640_end_0 = const()[name = tensor("op_8640_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8640_end_mask_0 = const()[name = tensor("op_8640_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8640_cast_fp16 = slice_by_index(begin = var_8640_begin_0, end = var_8640_end_0, end_mask = var_8640_end_mask_0, x = var_8608_cast_fp16)[name = tensor("op_8640_cast_fp16")]; tensor var_8641_begin_0 = const()[name = tensor("op_8641_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8641_end_0 = const()[name = tensor("op_8641_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8641_end_mask_0 = const()[name = tensor("op_8641_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8641_cast_fp16 = slice_by_index(begin = var_8641_begin_0, end = var_8641_end_0, end_mask = var_8641_end_mask_0, x = var_8612_cast_fp16)[name = tensor("op_8641_cast_fp16")]; tensor var_8642_begin_0 = const()[name = tensor("op_8642_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8642_end_0 = const()[name = tensor("op_8642_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8642_end_mask_0 = const()[name = tensor("op_8642_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8642_cast_fp16 = slice_by_index(begin = var_8642_begin_0, end = var_8642_end_0, end_mask = var_8642_end_mask_0, x = var_8612_cast_fp16)[name = tensor("op_8642_cast_fp16")]; tensor var_8643_begin_0 = const()[name = tensor("op_8643_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8643_end_0 = const()[name = tensor("op_8643_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8643_end_mask_0 = const()[name = tensor("op_8643_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8643_cast_fp16 = slice_by_index(begin = var_8643_begin_0, end = var_8643_end_0, end_mask = var_8643_end_mask_0, x = var_8616_cast_fp16)[name = tensor("op_8643_cast_fp16")]; tensor var_8644_begin_0 = const()[name = tensor("op_8644_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8644_end_0 = const()[name = tensor("op_8644_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8644_end_mask_0 = const()[name = tensor("op_8644_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8644_cast_fp16 = slice_by_index(begin = var_8644_begin_0, end = var_8644_end_0, end_mask = var_8644_end_mask_0, x = var_8616_cast_fp16)[name = tensor("op_8644_cast_fp16")]; tensor var_8645_begin_0 = const()[name = tensor("op_8645_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8645_end_0 = const()[name = tensor("op_8645_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8645_end_mask_0 = const()[name = tensor("op_8645_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8645_cast_fp16 = slice_by_index(begin = var_8645_begin_0, end = var_8645_end_0, end_mask = var_8645_end_mask_0, x = var_8620_cast_fp16)[name = tensor("op_8645_cast_fp16")]; tensor var_8646_begin_0 = const()[name = tensor("op_8646_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8646_end_0 = const()[name = tensor("op_8646_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8646_end_mask_0 = const()[name = tensor("op_8646_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8646_cast_fp16 = slice_by_index(begin = var_8646_begin_0, end = var_8646_end_0, end_mask = var_8646_end_mask_0, x = var_8620_cast_fp16)[name = tensor("op_8646_cast_fp16")]; tensor var_8647_begin_0 = const()[name = tensor("op_8647_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8647_end_0 = const()[name = tensor("op_8647_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8647_end_mask_0 = const()[name = tensor("op_8647_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8647_cast_fp16 = slice_by_index(begin = var_8647_begin_0, end = var_8647_end_0, end_mask = var_8647_end_mask_0, x = var_8624_cast_fp16)[name = tensor("op_8647_cast_fp16")]; tensor var_8648_begin_0 = const()[name = tensor("op_8648_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8648_end_0 = const()[name = tensor("op_8648_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8648_end_mask_0 = const()[name = tensor("op_8648_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8648_cast_fp16 = slice_by_index(begin = var_8648_begin_0, end = var_8648_end_0, end_mask = var_8648_end_mask_0, x = var_8624_cast_fp16)[name = tensor("op_8648_cast_fp16")]; tensor var_8649_begin_0 = const()[name = tensor("op_8649_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8649_end_0 = const()[name = tensor("op_8649_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8649_end_mask_0 = const()[name = tensor("op_8649_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8649_cast_fp16 = slice_by_index(begin = var_8649_begin_0, end = var_8649_end_0, end_mask = var_8649_end_mask_0, x = var_8628_cast_fp16)[name = tensor("op_8649_cast_fp16")]; tensor var_8650_begin_0 = const()[name = tensor("op_8650_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8650_end_0 = const()[name = tensor("op_8650_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8650_end_mask_0 = const()[name = tensor("op_8650_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8650_cast_fp16 = slice_by_index(begin = var_8650_begin_0, end = var_8650_end_0, end_mask = var_8650_end_mask_0, x = var_8628_cast_fp16)[name = tensor("op_8650_cast_fp16")]; tensor var_8651_begin_0 = const()[name = tensor("op_8651_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8651_end_0 = const()[name = tensor("op_8651_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8651_end_mask_0 = const()[name = tensor("op_8651_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8651_cast_fp16 = slice_by_index(begin = var_8651_begin_0, end = var_8651_end_0, end_mask = var_8651_end_mask_0, x = var_8632_cast_fp16)[name = tensor("op_8651_cast_fp16")]; tensor var_8652_begin_0 = const()[name = tensor("op_8652_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8652_end_0 = const()[name = tensor("op_8652_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8652_end_mask_0 = const()[name = tensor("op_8652_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8652_cast_fp16 = slice_by_index(begin = var_8652_begin_0, end = var_8652_end_0, end_mask = var_8652_end_mask_0, x = var_8632_cast_fp16)[name = tensor("op_8652_cast_fp16")]; tensor var_8653_begin_0 = const()[name = tensor("op_8653_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8653_end_0 = const()[name = tensor("op_8653_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_8653_end_mask_0 = const()[name = tensor("op_8653_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8653_cast_fp16 = slice_by_index(begin = var_8653_begin_0, end = var_8653_end_0, end_mask = var_8653_end_mask_0, x = var_8636_cast_fp16)[name = tensor("op_8653_cast_fp16")]; tensor var_8654_begin_0 = const()[name = tensor("op_8654_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8654_end_0 = const()[name = tensor("op_8654_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_8654_end_mask_0 = const()[name = tensor("op_8654_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8654_cast_fp16 = slice_by_index(begin = var_8654_begin_0, end = var_8654_end_0, end_mask = var_8654_end_mask_0, x = var_8636_cast_fp16)[name = tensor("op_8654_cast_fp16")]; tensor k_87_perm_0 = const()[name = tensor("k_87_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_8659_begin_0 = const()[name = tensor("op_8659_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8659_end_0 = const()[name = tensor("op_8659_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_8659_end_mask_0 = const()[name = tensor("op_8659_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_10 = transpose(perm = k_87_perm_0, x = k_85_cast_fp16)[name = tensor("transpose_10")]; tensor var_8659_cast_fp16 = slice_by_index(begin = var_8659_begin_0, end = var_8659_end_0, end_mask = var_8659_end_mask_0, x = transpose_10)[name = tensor("op_8659_cast_fp16")]; tensor var_8663_begin_0 = const()[name = tensor("op_8663_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_8663_end_0 = const()[name = tensor("op_8663_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_8663_end_mask_0 = const()[name = tensor("op_8663_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8663_cast_fp16 = slice_by_index(begin = var_8663_begin_0, end = var_8663_end_0, end_mask = var_8663_end_mask_0, x = transpose_10)[name = tensor("op_8663_cast_fp16")]; tensor var_8667_begin_0 = const()[name = tensor("op_8667_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_8667_end_0 = const()[name = tensor("op_8667_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_8667_end_mask_0 = const()[name = tensor("op_8667_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8667_cast_fp16 = slice_by_index(begin = var_8667_begin_0, end = var_8667_end_0, end_mask = var_8667_end_mask_0, x = transpose_10)[name = tensor("op_8667_cast_fp16")]; tensor var_8671_begin_0 = const()[name = tensor("op_8671_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_8671_end_0 = const()[name = tensor("op_8671_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_8671_end_mask_0 = const()[name = tensor("op_8671_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8671_cast_fp16 = slice_by_index(begin = var_8671_begin_0, end = var_8671_end_0, end_mask = var_8671_end_mask_0, x = transpose_10)[name = tensor("op_8671_cast_fp16")]; tensor var_8675_begin_0 = const()[name = tensor("op_8675_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_8675_end_0 = const()[name = tensor("op_8675_end_0"), val = tensor([2, 77, 1, 400])]; tensor var_8675_end_mask_0 = const()[name = tensor("op_8675_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8675_cast_fp16 = slice_by_index(begin = var_8675_begin_0, end = var_8675_end_0, end_mask = var_8675_end_mask_0, x = transpose_10)[name = tensor("op_8675_cast_fp16")]; tensor var_8679_begin_0 = const()[name = tensor("op_8679_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_8679_end_0 = const()[name = tensor("op_8679_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_8679_end_mask_0 = const()[name = tensor("op_8679_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8679_cast_fp16 = slice_by_index(begin = var_8679_begin_0, end = var_8679_end_0, end_mask = var_8679_end_mask_0, x = transpose_10)[name = tensor("op_8679_cast_fp16")]; tensor var_8683_begin_0 = const()[name = tensor("op_8683_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_8683_end_0 = const()[name = tensor("op_8683_end_0"), val = tensor([2, 77, 1, 560])]; tensor var_8683_end_mask_0 = const()[name = tensor("op_8683_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8683_cast_fp16 = slice_by_index(begin = var_8683_begin_0, end = var_8683_end_0, end_mask = var_8683_end_mask_0, x = transpose_10)[name = tensor("op_8683_cast_fp16")]; tensor var_8687_begin_0 = const()[name = tensor("op_8687_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_8687_end_0 = const()[name = tensor("op_8687_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_8687_end_mask_0 = const()[name = tensor("op_8687_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8687_cast_fp16 = slice_by_index(begin = var_8687_begin_0, end = var_8687_end_0, end_mask = var_8687_end_mask_0, x = transpose_10)[name = tensor("op_8687_cast_fp16")]; tensor var_8689_begin_0 = const()[name = tensor("op_8689_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8689_end_0 = const()[name = tensor("op_8689_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_8689_end_mask_0 = const()[name = tensor("op_8689_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8689_cast_fp16 = slice_by_index(begin = var_8689_begin_0, end = var_8689_end_0, end_mask = var_8689_end_mask_0, x = v_43_cast_fp16)[name = tensor("op_8689_cast_fp16")]; tensor var_8693_begin_0 = const()[name = tensor("op_8693_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_8693_end_0 = const()[name = tensor("op_8693_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_8693_end_mask_0 = const()[name = tensor("op_8693_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8693_cast_fp16 = slice_by_index(begin = var_8693_begin_0, end = var_8693_end_0, end_mask = var_8693_end_mask_0, x = v_43_cast_fp16)[name = tensor("op_8693_cast_fp16")]; tensor var_8697_begin_0 = const()[name = tensor("op_8697_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_8697_end_0 = const()[name = tensor("op_8697_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_8697_end_mask_0 = const()[name = tensor("op_8697_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8697_cast_fp16 = slice_by_index(begin = var_8697_begin_0, end = var_8697_end_0, end_mask = var_8697_end_mask_0, x = v_43_cast_fp16)[name = tensor("op_8697_cast_fp16")]; tensor var_8701_begin_0 = const()[name = tensor("op_8701_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_8701_end_0 = const()[name = tensor("op_8701_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_8701_end_mask_0 = const()[name = tensor("op_8701_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8701_cast_fp16 = slice_by_index(begin = var_8701_begin_0, end = var_8701_end_0, end_mask = var_8701_end_mask_0, x = v_43_cast_fp16)[name = tensor("op_8701_cast_fp16")]; tensor var_8705_begin_0 = const()[name = tensor("op_8705_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_8705_end_0 = const()[name = tensor("op_8705_end_0"), val = tensor([2, 400, 1, 77])]; tensor var_8705_end_mask_0 = const()[name = tensor("op_8705_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8705_cast_fp16 = slice_by_index(begin = var_8705_begin_0, end = var_8705_end_0, end_mask = var_8705_end_mask_0, x = v_43_cast_fp16)[name = tensor("op_8705_cast_fp16")]; tensor var_8709_begin_0 = const()[name = tensor("op_8709_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_8709_end_0 = const()[name = tensor("op_8709_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_8709_end_mask_0 = const()[name = tensor("op_8709_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8709_cast_fp16 = slice_by_index(begin = var_8709_begin_0, end = var_8709_end_0, end_mask = var_8709_end_mask_0, x = v_43_cast_fp16)[name = tensor("op_8709_cast_fp16")]; tensor var_8713_begin_0 = const()[name = tensor("op_8713_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_8713_end_0 = const()[name = tensor("op_8713_end_0"), val = tensor([2, 560, 1, 77])]; tensor var_8713_end_mask_0 = const()[name = tensor("op_8713_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8713_cast_fp16 = slice_by_index(begin = var_8713_begin_0, end = var_8713_end_0, end_mask = var_8713_end_mask_0, x = v_43_cast_fp16)[name = tensor("op_8713_cast_fp16")]; tensor var_8717_begin_0 = const()[name = tensor("op_8717_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_8717_end_0 = const()[name = tensor("op_8717_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_8717_end_mask_0 = const()[name = tensor("op_8717_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8717_cast_fp16 = slice_by_index(begin = var_8717_begin_0, end = var_8717_end_0, end_mask = var_8717_end_mask_0, x = v_43_cast_fp16)[name = tensor("op_8717_cast_fp16")]; tensor var_8721_equation_0 = const()[name = tensor("op_8721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8721_cast_fp16 = einsum(equation = var_8721_equation_0, values = (var_8659_cast_fp16, var_8639_cast_fp16))[name = tensor("op_8721_cast_fp16")]; tensor var_8722_to_fp16 = const()[name = tensor("op_8722_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_673_cast_fp16 = mul(x = var_8721_cast_fp16, y = var_8722_to_fp16)[name = tensor("aw_chunk_673_cast_fp16")]; tensor var_8725_equation_0 = const()[name = tensor("op_8725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8725_cast_fp16 = einsum(equation = var_8725_equation_0, values = (var_8659_cast_fp16, var_8640_cast_fp16))[name = tensor("op_8725_cast_fp16")]; tensor var_8726_to_fp16 = const()[name = tensor("op_8726_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_675_cast_fp16 = mul(x = var_8725_cast_fp16, y = var_8726_to_fp16)[name = tensor("aw_chunk_675_cast_fp16")]; tensor var_8729_equation_0 = const()[name = tensor("op_8729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8729_cast_fp16 = einsum(equation = var_8729_equation_0, values = (var_8663_cast_fp16, var_8641_cast_fp16))[name = tensor("op_8729_cast_fp16")]; tensor var_8730_to_fp16 = const()[name = tensor("op_8730_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_677_cast_fp16 = mul(x = var_8729_cast_fp16, y = var_8730_to_fp16)[name = tensor("aw_chunk_677_cast_fp16")]; tensor var_8733_equation_0 = const()[name = tensor("op_8733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8733_cast_fp16 = einsum(equation = var_8733_equation_0, values = (var_8663_cast_fp16, var_8642_cast_fp16))[name = tensor("op_8733_cast_fp16")]; tensor var_8734_to_fp16 = const()[name = tensor("op_8734_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_679_cast_fp16 = mul(x = var_8733_cast_fp16, y = var_8734_to_fp16)[name = tensor("aw_chunk_679_cast_fp16")]; tensor var_8737_equation_0 = const()[name = tensor("op_8737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8737_cast_fp16 = einsum(equation = var_8737_equation_0, values = (var_8667_cast_fp16, var_8643_cast_fp16))[name = tensor("op_8737_cast_fp16")]; tensor var_8738_to_fp16 = const()[name = tensor("op_8738_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_681_cast_fp16 = mul(x = var_8737_cast_fp16, y = var_8738_to_fp16)[name = tensor("aw_chunk_681_cast_fp16")]; tensor var_8741_equation_0 = const()[name = tensor("op_8741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8741_cast_fp16 = einsum(equation = var_8741_equation_0, values = (var_8667_cast_fp16, var_8644_cast_fp16))[name = tensor("op_8741_cast_fp16")]; tensor var_8742_to_fp16 = const()[name = tensor("op_8742_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_683_cast_fp16 = mul(x = var_8741_cast_fp16, y = var_8742_to_fp16)[name = tensor("aw_chunk_683_cast_fp16")]; tensor var_8745_equation_0 = const()[name = tensor("op_8745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8745_cast_fp16 = einsum(equation = var_8745_equation_0, values = (var_8671_cast_fp16, var_8645_cast_fp16))[name = tensor("op_8745_cast_fp16")]; tensor var_8746_to_fp16 = const()[name = tensor("op_8746_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_685_cast_fp16 = mul(x = var_8745_cast_fp16, y = var_8746_to_fp16)[name = tensor("aw_chunk_685_cast_fp16")]; tensor var_8749_equation_0 = const()[name = tensor("op_8749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8749_cast_fp16 = einsum(equation = var_8749_equation_0, values = (var_8671_cast_fp16, var_8646_cast_fp16))[name = tensor("op_8749_cast_fp16")]; tensor var_8750_to_fp16 = const()[name = tensor("op_8750_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_687_cast_fp16 = mul(x = var_8749_cast_fp16, y = var_8750_to_fp16)[name = tensor("aw_chunk_687_cast_fp16")]; tensor var_8753_equation_0 = const()[name = tensor("op_8753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8753_cast_fp16 = einsum(equation = var_8753_equation_0, values = (var_8675_cast_fp16, var_8647_cast_fp16))[name = tensor("op_8753_cast_fp16")]; tensor var_8754_to_fp16 = const()[name = tensor("op_8754_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_689_cast_fp16 = mul(x = var_8753_cast_fp16, y = var_8754_to_fp16)[name = tensor("aw_chunk_689_cast_fp16")]; tensor var_8757_equation_0 = const()[name = tensor("op_8757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8757_cast_fp16 = einsum(equation = var_8757_equation_0, values = (var_8675_cast_fp16, var_8648_cast_fp16))[name = tensor("op_8757_cast_fp16")]; tensor var_8758_to_fp16 = const()[name = tensor("op_8758_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_691_cast_fp16 = mul(x = var_8757_cast_fp16, y = var_8758_to_fp16)[name = tensor("aw_chunk_691_cast_fp16")]; tensor var_8761_equation_0 = const()[name = tensor("op_8761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8761_cast_fp16 = einsum(equation = var_8761_equation_0, values = (var_8679_cast_fp16, var_8649_cast_fp16))[name = tensor("op_8761_cast_fp16")]; tensor var_8762_to_fp16 = const()[name = tensor("op_8762_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_693_cast_fp16 = mul(x = var_8761_cast_fp16, y = var_8762_to_fp16)[name = tensor("aw_chunk_693_cast_fp16")]; tensor var_8765_equation_0 = const()[name = tensor("op_8765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8765_cast_fp16 = einsum(equation = var_8765_equation_0, values = (var_8679_cast_fp16, var_8650_cast_fp16))[name = tensor("op_8765_cast_fp16")]; tensor var_8766_to_fp16 = const()[name = tensor("op_8766_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_695_cast_fp16 = mul(x = var_8765_cast_fp16, y = var_8766_to_fp16)[name = tensor("aw_chunk_695_cast_fp16")]; tensor var_8769_equation_0 = const()[name = tensor("op_8769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8769_cast_fp16 = einsum(equation = var_8769_equation_0, values = (var_8683_cast_fp16, var_8651_cast_fp16))[name = tensor("op_8769_cast_fp16")]; tensor var_8770_to_fp16 = const()[name = tensor("op_8770_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_697_cast_fp16 = mul(x = var_8769_cast_fp16, y = var_8770_to_fp16)[name = tensor("aw_chunk_697_cast_fp16")]; tensor var_8773_equation_0 = const()[name = tensor("op_8773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8773_cast_fp16 = einsum(equation = var_8773_equation_0, values = (var_8683_cast_fp16, var_8652_cast_fp16))[name = tensor("op_8773_cast_fp16")]; tensor var_8774_to_fp16 = const()[name = tensor("op_8774_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_699_cast_fp16 = mul(x = var_8773_cast_fp16, y = var_8774_to_fp16)[name = tensor("aw_chunk_699_cast_fp16")]; tensor var_8777_equation_0 = const()[name = tensor("op_8777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8777_cast_fp16 = einsum(equation = var_8777_equation_0, values = (var_8687_cast_fp16, var_8653_cast_fp16))[name = tensor("op_8777_cast_fp16")]; tensor var_8778_to_fp16 = const()[name = tensor("op_8778_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_701_cast_fp16 = mul(x = var_8777_cast_fp16, y = var_8778_to_fp16)[name = tensor("aw_chunk_701_cast_fp16")]; tensor var_8781_equation_0 = const()[name = tensor("op_8781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_8781_cast_fp16 = einsum(equation = var_8781_equation_0, values = (var_8687_cast_fp16, var_8654_cast_fp16))[name = tensor("op_8781_cast_fp16")]; tensor var_8782_to_fp16 = const()[name = tensor("op_8782_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_703_cast_fp16 = mul(x = var_8781_cast_fp16, y = var_8782_to_fp16)[name = tensor("aw_chunk_703_cast_fp16")]; tensor var_8784_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_673_cast_fp16)[name = tensor("op_8784_cast_fp16")]; tensor var_8785_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_675_cast_fp16)[name = tensor("op_8785_cast_fp16")]; tensor var_8786_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_677_cast_fp16)[name = tensor("op_8786_cast_fp16")]; tensor var_8787_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_679_cast_fp16)[name = tensor("op_8787_cast_fp16")]; tensor var_8788_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_681_cast_fp16)[name = tensor("op_8788_cast_fp16")]; tensor var_8789_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_683_cast_fp16)[name = tensor("op_8789_cast_fp16")]; tensor var_8790_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_685_cast_fp16)[name = tensor("op_8790_cast_fp16")]; tensor var_8791_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_687_cast_fp16)[name = tensor("op_8791_cast_fp16")]; tensor var_8792_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_689_cast_fp16)[name = tensor("op_8792_cast_fp16")]; tensor var_8793_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_691_cast_fp16)[name = tensor("op_8793_cast_fp16")]; tensor var_8794_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_693_cast_fp16)[name = tensor("op_8794_cast_fp16")]; tensor var_8795_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_695_cast_fp16)[name = tensor("op_8795_cast_fp16")]; tensor var_8796_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_697_cast_fp16)[name = tensor("op_8796_cast_fp16")]; tensor var_8797_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_699_cast_fp16)[name = tensor("op_8797_cast_fp16")]; tensor var_8798_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_701_cast_fp16)[name = tensor("op_8798_cast_fp16")]; tensor var_8799_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_703_cast_fp16)[name = tensor("op_8799_cast_fp16")]; tensor var_8801_equation_0 = const()[name = tensor("op_8801_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8801_cast_fp16 = einsum(equation = var_8801_equation_0, values = (var_8689_cast_fp16, var_8784_cast_fp16))[name = tensor("op_8801_cast_fp16")]; tensor var_8803_equation_0 = const()[name = tensor("op_8803_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8803_cast_fp16 = einsum(equation = var_8803_equation_0, values = (var_8689_cast_fp16, var_8785_cast_fp16))[name = tensor("op_8803_cast_fp16")]; tensor var_8805_equation_0 = const()[name = tensor("op_8805_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8805_cast_fp16 = einsum(equation = var_8805_equation_0, values = (var_8693_cast_fp16, var_8786_cast_fp16))[name = tensor("op_8805_cast_fp16")]; tensor var_8807_equation_0 = const()[name = tensor("op_8807_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8807_cast_fp16 = einsum(equation = var_8807_equation_0, values = (var_8693_cast_fp16, var_8787_cast_fp16))[name = tensor("op_8807_cast_fp16")]; tensor var_8809_equation_0 = const()[name = tensor("op_8809_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8809_cast_fp16 = einsum(equation = var_8809_equation_0, values = (var_8697_cast_fp16, var_8788_cast_fp16))[name = tensor("op_8809_cast_fp16")]; tensor var_8811_equation_0 = const()[name = tensor("op_8811_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8811_cast_fp16 = einsum(equation = var_8811_equation_0, values = (var_8697_cast_fp16, var_8789_cast_fp16))[name = tensor("op_8811_cast_fp16")]; tensor var_8813_equation_0 = const()[name = tensor("op_8813_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8813_cast_fp16 = einsum(equation = var_8813_equation_0, values = (var_8701_cast_fp16, var_8790_cast_fp16))[name = tensor("op_8813_cast_fp16")]; tensor var_8815_equation_0 = const()[name = tensor("op_8815_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8815_cast_fp16 = einsum(equation = var_8815_equation_0, values = (var_8701_cast_fp16, var_8791_cast_fp16))[name = tensor("op_8815_cast_fp16")]; tensor var_8817_equation_0 = const()[name = tensor("op_8817_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8817_cast_fp16 = einsum(equation = var_8817_equation_0, values = (var_8705_cast_fp16, var_8792_cast_fp16))[name = tensor("op_8817_cast_fp16")]; tensor var_8819_equation_0 = const()[name = tensor("op_8819_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8819_cast_fp16 = einsum(equation = var_8819_equation_0, values = (var_8705_cast_fp16, var_8793_cast_fp16))[name = tensor("op_8819_cast_fp16")]; tensor var_8821_equation_0 = const()[name = tensor("op_8821_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8821_cast_fp16 = einsum(equation = var_8821_equation_0, values = (var_8709_cast_fp16, var_8794_cast_fp16))[name = tensor("op_8821_cast_fp16")]; tensor var_8823_equation_0 = const()[name = tensor("op_8823_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8823_cast_fp16 = einsum(equation = var_8823_equation_0, values = (var_8709_cast_fp16, var_8795_cast_fp16))[name = tensor("op_8823_cast_fp16")]; tensor var_8825_equation_0 = const()[name = tensor("op_8825_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8825_cast_fp16 = einsum(equation = var_8825_equation_0, values = (var_8713_cast_fp16, var_8796_cast_fp16))[name = tensor("op_8825_cast_fp16")]; tensor var_8827_equation_0 = const()[name = tensor("op_8827_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8827_cast_fp16 = einsum(equation = var_8827_equation_0, values = (var_8713_cast_fp16, var_8797_cast_fp16))[name = tensor("op_8827_cast_fp16")]; tensor var_8829_equation_0 = const()[name = tensor("op_8829_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8829_cast_fp16 = einsum(equation = var_8829_equation_0, values = (var_8717_cast_fp16, var_8798_cast_fp16))[name = tensor("op_8829_cast_fp16")]; tensor var_8831_equation_0 = const()[name = tensor("op_8831_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8831_cast_fp16 = einsum(equation = var_8831_equation_0, values = (var_8717_cast_fp16, var_8799_cast_fp16))[name = tensor("op_8831_cast_fp16")]; tensor var_8833_interleave_0 = const()[name = tensor("op_8833_interleave_0"), val = tensor(false)]; tensor var_8833_cast_fp16 = concat(axis = var_8160, interleave = var_8833_interleave_0, values = (var_8801_cast_fp16, var_8803_cast_fp16))[name = tensor("op_8833_cast_fp16")]; tensor var_8835_interleave_0 = const()[name = tensor("op_8835_interleave_0"), val = tensor(false)]; tensor var_8835_cast_fp16 = concat(axis = var_8160, interleave = var_8835_interleave_0, values = (var_8805_cast_fp16, var_8807_cast_fp16))[name = tensor("op_8835_cast_fp16")]; tensor var_8837_interleave_0 = const()[name = tensor("op_8837_interleave_0"), val = tensor(false)]; tensor var_8837_cast_fp16 = concat(axis = var_8160, interleave = var_8837_interleave_0, values = (var_8809_cast_fp16, var_8811_cast_fp16))[name = tensor("op_8837_cast_fp16")]; tensor var_8839_interleave_0 = const()[name = tensor("op_8839_interleave_0"), val = tensor(false)]; tensor var_8839_cast_fp16 = concat(axis = var_8160, interleave = var_8839_interleave_0, values = (var_8813_cast_fp16, var_8815_cast_fp16))[name = tensor("op_8839_cast_fp16")]; tensor var_8841_interleave_0 = const()[name = tensor("op_8841_interleave_0"), val = tensor(false)]; tensor var_8841_cast_fp16 = concat(axis = var_8160, interleave = var_8841_interleave_0, values = (var_8817_cast_fp16, var_8819_cast_fp16))[name = tensor("op_8841_cast_fp16")]; tensor var_8843_interleave_0 = const()[name = tensor("op_8843_interleave_0"), val = tensor(false)]; tensor var_8843_cast_fp16 = concat(axis = var_8160, interleave = var_8843_interleave_0, values = (var_8821_cast_fp16, var_8823_cast_fp16))[name = tensor("op_8843_cast_fp16")]; tensor var_8845_interleave_0 = const()[name = tensor("op_8845_interleave_0"), val = tensor(false)]; tensor var_8845_cast_fp16 = concat(axis = var_8160, interleave = var_8845_interleave_0, values = (var_8825_cast_fp16, var_8827_cast_fp16))[name = tensor("op_8845_cast_fp16")]; tensor var_8847_interleave_0 = const()[name = tensor("op_8847_interleave_0"), val = tensor(false)]; tensor var_8847_cast_fp16 = concat(axis = var_8160, interleave = var_8847_interleave_0, values = (var_8829_cast_fp16, var_8831_cast_fp16))[name = tensor("op_8847_cast_fp16")]; tensor input_385_interleave_0 = const()[name = tensor("input_385_interleave_0"), val = tensor(false)]; tensor input_385_cast_fp16 = concat(axis = var_8182, interleave = input_385_interleave_0, values = (var_8833_cast_fp16, var_8835_cast_fp16, var_8837_cast_fp16, var_8839_cast_fp16, var_8841_cast_fp16, var_8843_cast_fp16, var_8845_cast_fp16, var_8847_cast_fp16))[name = tensor("input_385_cast_fp16")]; tensor var_8853 = const()[name = tensor("op_8853"), val = tensor([1, 1])]; tensor var_8855 = const()[name = tensor("op_8855"), val = tensor([1, 1])]; tensor var_8857_pad_type_0 = const()[name = tensor("op_8857_pad_type_0"), val = tensor("custom")]; tensor var_8857_pad_0 = const()[name = tensor("op_8857_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1579123008)))]; tensor up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1579942272)))]; tensor var_8857_cast_fp16 = conv(bias = up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_8855, groups = var_8182, pad = var_8857_pad_0, pad_type = var_8857_pad_type_0, strides = var_8853, weight = up_blocks_2_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_385_cast_fp16)[name = tensor("op_8857_cast_fp16")]; tensor inputs_65_cast_fp16 = add(x = var_8857_cast_fp16, y = inputs_63_cast_fp16)[name = tensor("inputs_65_cast_fp16")]; tensor input_387_axes_0 = const()[name = tensor("input_387_axes_0"), val = tensor([1])]; tensor input_387_gamma_0_to_fp16 = const()[name = tensor("input_387_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1579943616)))]; tensor input_387_beta_0_to_fp16 = const()[name = tensor("input_387_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1579944960)))]; tensor var_8867_to_fp16 = const()[name = tensor("op_8867_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_387_cast_fp16 = layer_norm(axes = input_387_axes_0, beta = input_387_beta_0_to_fp16, epsilon = var_8867_to_fp16, gamma = input_387_gamma_0_to_fp16, x = inputs_65_cast_fp16)[name = tensor("input_387_cast_fp16")]; tensor var_8883 = const()[name = tensor("op_8883"), val = tensor([1, 1])]; tensor var_8885 = const()[name = tensor("op_8885"), val = tensor([1, 1])]; tensor var_8887_pad_type_0 = const()[name = tensor("op_8887_pad_type_0"), val = tensor("custom")]; tensor var_8887_pad_0 = const()[name = tensor("op_8887_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1579946304)))]; tensor up_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1586499968)))]; tensor var_8887_cast_fp16 = conv(bias = up_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_8885, groups = var_8182, pad = var_8887_pad_0, pad_type = var_8887_pad_type_0, strides = var_8883, weight = up_blocks_2_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_387_cast_fp16)[name = tensor("op_8887_cast_fp16")]; tensor var_8888_split_sizes_0 = const()[name = tensor("op_8888_split_sizes_0"), val = tensor([2560, 2560])]; tensor var_8888_axis_0 = const()[name = tensor("op_8888_axis_0"), val = tensor(1)]; tensor var_8888_cast_fp16_0, tensor var_8888_cast_fp16_1 = split(axis = var_8888_axis_0, split_sizes = var_8888_split_sizes_0, x = var_8887_cast_fp16)[name = tensor("op_8888_cast_fp16")]; tensor var_8890_mode_0 = const()[name = tensor("op_8890_mode_0"), val = tensor("EXACT")]; tensor var_8890_cast_fp16 = gelu(mode = var_8890_mode_0, x = var_8888_cast_fp16_1)[name = tensor("op_8890_cast_fp16")]; tensor input_389_cast_fp16 = mul(x = var_8888_cast_fp16_0, y = var_8890_cast_fp16)[name = tensor("input_389_cast_fp16")]; tensor var_8894 = const()[name = tensor("op_8894"), val = tensor([1, 1])]; tensor var_8896 = const()[name = tensor("op_8896"), val = tensor([1, 1])]; tensor var_8898_pad_type_0 = const()[name = tensor("op_8898_pad_type_0"), val = tensor("custom")]; tensor var_8898_pad_0 = const()[name = tensor("op_8898_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1586510272)))]; tensor up_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1589787136)))]; tensor var_8898_cast_fp16 = conv(bias = up_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_8896, groups = var_8182, pad = var_8898_pad_0, pad_type = var_8898_pad_type_0, strides = var_8894, weight = up_blocks_2_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_389_cast_fp16)[name = tensor("op_8898_cast_fp16")]; tensor hidden_states_233_cast_fp16 = add(x = var_8898_cast_fp16, y = inputs_65_cast_fp16)[name = tensor("hidden_states_233_cast_fp16")]; tensor var_8900 = const()[name = tensor("op_8900"), val = tensor([2, 640, 32, 32])]; tensor input_391_cast_fp16 = reshape(shape = var_8900, x = hidden_states_233_cast_fp16)[name = tensor("input_391_cast_fp16")]; tensor var_8904 = const()[name = tensor("op_8904"), val = tensor([1, 1])]; tensor var_8906 = const()[name = tensor("op_8906"), val = tensor([1, 1])]; tensor hidden_states_235_pad_type_0 = const()[name = tensor("hidden_states_235_pad_type_0"), val = tensor("custom")]; tensor hidden_states_235_pad_0 = const()[name = tensor("hidden_states_235_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_0_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1589788480)))]; tensor up_blocks_2_attentions_0_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_0_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1590607744)))]; tensor hidden_states_235_cast_fp16 = conv(bias = up_blocks_2_attentions_0_proj_out_bias_to_fp16, dilations = var_8906, groups = var_8182, pad = hidden_states_235_pad_0, pad_type = hidden_states_235_pad_type_0, strides = var_8904, weight = up_blocks_2_attentions_0_proj_out_weight_to_fp16, x = input_391_cast_fp16)[name = tensor("hidden_states_235_cast_fp16")]; tensor hidden_states_237_cast_fp16 = add(x = hidden_states_235_cast_fp16, y = hidden_states_223_cast_fp16)[name = tensor("hidden_states_237_cast_fp16")]; tensor input_393_interleave_0 = const()[name = tensor("input_393_interleave_0"), val = tensor(false)]; tensor input_393_cast_fp16 = concat(axis = var_8182, interleave = input_393_interleave_0, values = (hidden_states_237_cast_fp16, input_89_cast_fp16))[name = tensor("input_393_cast_fp16")]; tensor reshape_180_shape_0 = const()[name = tensor("reshape_180_shape_0"), val = tensor([2, 32, 40, 32, 32])]; tensor reshape_180_cast_fp16 = reshape(shape = reshape_180_shape_0, x = input_393_cast_fp16)[name = tensor("reshape_180_cast_fp16")]; tensor reduce_mean_135_axes_0 = const()[name = tensor("reduce_mean_135_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_135_keep_dims_0 = const()[name = tensor("reduce_mean_135_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_135_cast_fp16 = reduce_mean(axes = reduce_mean_135_axes_0, keep_dims = reduce_mean_135_keep_dims_0, x = reshape_180_cast_fp16)[name = tensor("reduce_mean_135_cast_fp16")]; tensor sub_90_cast_fp16 = sub(x = reshape_180_cast_fp16, y = reduce_mean_135_cast_fp16)[name = tensor("sub_90_cast_fp16")]; tensor square_45_cast_fp16 = square(x = sub_90_cast_fp16)[name = tensor("square_45_cast_fp16")]; tensor reduce_mean_137_axes_0 = const()[name = tensor("reduce_mean_137_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_137_keep_dims_0 = const()[name = tensor("reduce_mean_137_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_137_cast_fp16 = reduce_mean(axes = reduce_mean_137_axes_0, keep_dims = reduce_mean_137_keep_dims_0, x = square_45_cast_fp16)[name = tensor("reduce_mean_137_cast_fp16")]; tensor add_90_y_0_to_fp16 = const()[name = tensor("add_90_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_90_cast_fp16 = add(x = reduce_mean_137_cast_fp16, y = add_90_y_0_to_fp16)[name = tensor("add_90_cast_fp16")]; tensor sqrt_45_cast_fp16 = sqrt(x = add_90_cast_fp16)[name = tensor("sqrt_45_cast_fp16")]; tensor real_div_45_cast_fp16 = real_div(x = sub_90_cast_fp16, y = sqrt_45_cast_fp16)[name = tensor("real_div_45_cast_fp16")]; tensor reshape_181_shape_0 = const()[name = tensor("reshape_181_shape_0"), val = tensor([2, 1280, 32, 32])]; tensor reshape_181_cast_fp16 = reshape(shape = reshape_181_shape_0, x = real_div_45_cast_fp16)[name = tensor("reshape_181_cast_fp16")]; tensor add_91_gamma_0_to_fp16 = const()[name = tensor("add_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1590609088)))]; tensor add_91_beta_0_to_fp16 = const()[name = tensor("add_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1590611712)))]; tensor add_91_epsilon_0_to_fp16 = const()[name = tensor("add_91_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_91_cast_fp16 = batch_norm(beta = add_91_beta_0_to_fp16, epsilon = add_91_epsilon_0_to_fp16, gamma = add_91_gamma_0_to_fp16, mean = add_27_mean_0_to_fp16, variance = add_27_variance_0_to_fp16, x = reshape_181_cast_fp16)[name = tensor("add_91_cast_fp16")]; tensor input_397_cast_fp16 = silu(x = add_91_cast_fp16)[name = tensor("input_397_cast_fp16")]; tensor var_8924 = const()[name = tensor("op_8924"), val = tensor([1, 1])]; tensor var_8926 = const()[name = tensor("op_8926"), val = tensor([1, 1])]; tensor hidden_states_239_pad_type_0 = const()[name = tensor("hidden_states_239_pad_type_0"), val = tensor("custom")]; tensor hidden_states_239_pad_0 = const()[name = tensor("hidden_states_239_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_2_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1590614336)))]; tensor up_blocks_2_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1605360000)))]; tensor hidden_states_239_cast_fp16 = conv(bias = up_blocks_2_resnets_1_conv1_bias_to_fp16, dilations = var_8926, groups = var_8182, pad = hidden_states_239_pad_0, pad_type = hidden_states_239_pad_type_0, strides = var_8924, weight = up_blocks_2_resnets_1_conv1_weight_to_fp16, x = input_397_cast_fp16)[name = tensor("hidden_states_239_cast_fp16")]; tensor var_8932 = const()[name = tensor("op_8932"), val = tensor([1, 1])]; tensor var_8934 = const()[name = tensor("op_8934"), val = tensor([1, 1])]; tensor temb_35_pad_type_0 = const()[name = tensor("temb_35_pad_type_0"), val = tensor("custom")]; tensor temb_35_pad_0 = const()[name = tensor("temb_35_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1605361344)))]; tensor up_blocks_2_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1606999808)))]; tensor temb_35_cast_fp16 = conv(bias = up_blocks_2_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_8934, groups = var_8182, pad = temb_35_pad_0, pad_type = temb_35_pad_type_0, strides = var_8932, weight = up_blocks_2_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_35_cast_fp16")]; tensor input_401_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = temb_35_cast_fp16)[name = tensor("input_401_cast_fp16")]; tensor reshape_184_shape_0 = const()[name = tensor("reshape_184_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_184_cast_fp16 = reshape(shape = reshape_184_shape_0, x = input_401_cast_fp16)[name = tensor("reshape_184_cast_fp16")]; tensor reduce_mean_138_axes_0 = const()[name = tensor("reduce_mean_138_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_138_keep_dims_0 = const()[name = tensor("reduce_mean_138_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_138_cast_fp16 = reduce_mean(axes = reduce_mean_138_axes_0, keep_dims = reduce_mean_138_keep_dims_0, x = reshape_184_cast_fp16)[name = tensor("reduce_mean_138_cast_fp16")]; tensor sub_92_cast_fp16 = sub(x = reshape_184_cast_fp16, y = reduce_mean_138_cast_fp16)[name = tensor("sub_92_cast_fp16")]; tensor square_46_cast_fp16 = square(x = sub_92_cast_fp16)[name = tensor("square_46_cast_fp16")]; tensor reduce_mean_140_axes_0 = const()[name = tensor("reduce_mean_140_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_140_keep_dims_0 = const()[name = tensor("reduce_mean_140_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_140_cast_fp16 = reduce_mean(axes = reduce_mean_140_axes_0, keep_dims = reduce_mean_140_keep_dims_0, x = square_46_cast_fp16)[name = tensor("reduce_mean_140_cast_fp16")]; tensor add_92_y_0_to_fp16 = const()[name = tensor("add_92_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_92_cast_fp16 = add(x = reduce_mean_140_cast_fp16, y = add_92_y_0_to_fp16)[name = tensor("add_92_cast_fp16")]; tensor sqrt_46_cast_fp16 = sqrt(x = add_92_cast_fp16)[name = tensor("sqrt_46_cast_fp16")]; tensor real_div_46_cast_fp16 = real_div(x = sub_92_cast_fp16, y = sqrt_46_cast_fp16)[name = tensor("real_div_46_cast_fp16")]; tensor reshape_185_shape_0 = const()[name = tensor("reshape_185_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_185_cast_fp16 = reshape(shape = reshape_185_shape_0, x = real_div_46_cast_fp16)[name = tensor("reshape_185_cast_fp16")]; tensor add_93_gamma_0_to_fp16 = const()[name = tensor("add_93_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1607001152)))]; tensor add_93_beta_0_to_fp16 = const()[name = tensor("add_93_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1607002496)))]; tensor add_93_epsilon_0_to_fp16 = const()[name = tensor("add_93_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_93_cast_fp16 = batch_norm(beta = add_93_beta_0_to_fp16, epsilon = add_93_epsilon_0_to_fp16, gamma = add_93_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_185_cast_fp16)[name = tensor("add_93_cast_fp16")]; tensor input_405_cast_fp16 = silu(x = add_93_cast_fp16)[name = tensor("input_405_cast_fp16")]; tensor var_8944 = const()[name = tensor("op_8944"), val = tensor([1, 1])]; tensor var_8946 = const()[name = tensor("op_8946"), val = tensor([1, 1])]; tensor hidden_states_241_pad_type_0 = const()[name = tensor("hidden_states_241_pad_type_0"), val = tensor("custom")]; tensor hidden_states_241_pad_0 = const()[name = tensor("hidden_states_241_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_2_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1607003840)))]; tensor up_blocks_2_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1614376704)))]; tensor hidden_states_241_cast_fp16 = conv(bias = up_blocks_2_resnets_1_conv2_bias_to_fp16, dilations = var_8946, groups = var_8182, pad = hidden_states_241_pad_0, pad_type = hidden_states_241_pad_type_0, strides = var_8944, weight = up_blocks_2_resnets_1_conv2_weight_to_fp16, x = input_405_cast_fp16)[name = tensor("hidden_states_241_cast_fp16")]; tensor var_8951 = const()[name = tensor("op_8951"), val = tensor([1, 1])]; tensor var_8953 = const()[name = tensor("op_8953"), val = tensor([1, 1])]; tensor x_19_pad_type_0 = const()[name = tensor("x_19_pad_type_0"), val = tensor("custom")]; tensor x_19_pad_0 = const()[name = tensor("x_19_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_resnets_1_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_1_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1614378048)))]; tensor up_blocks_2_resnets_1_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_1_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1616016512)))]; tensor x_19_cast_fp16 = conv(bias = up_blocks_2_resnets_1_conv_shortcut_bias_to_fp16, dilations = var_8953, groups = var_8182, pad = x_19_pad_0, pad_type = x_19_pad_type_0, strides = var_8951, weight = up_blocks_2_resnets_1_conv_shortcut_weight_to_fp16, x = input_393_cast_fp16)[name = tensor("x_19_cast_fp16")]; tensor hidden_states_243_cast_fp16 = add(x = x_19_cast_fp16, y = hidden_states_241_cast_fp16)[name = tensor("hidden_states_243_cast_fp16")]; tensor reshape_188_shape_0 = const()[name = tensor("reshape_188_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_188_cast_fp16 = reshape(shape = reshape_188_shape_0, x = hidden_states_243_cast_fp16)[name = tensor("reshape_188_cast_fp16")]; tensor reduce_mean_141_axes_0 = const()[name = tensor("reduce_mean_141_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_141_keep_dims_0 = const()[name = tensor("reduce_mean_141_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_141_cast_fp16 = reduce_mean(axes = reduce_mean_141_axes_0, keep_dims = reduce_mean_141_keep_dims_0, x = reshape_188_cast_fp16)[name = tensor("reduce_mean_141_cast_fp16")]; tensor sub_94_cast_fp16 = sub(x = reshape_188_cast_fp16, y = reduce_mean_141_cast_fp16)[name = tensor("sub_94_cast_fp16")]; tensor square_47_cast_fp16 = square(x = sub_94_cast_fp16)[name = tensor("square_47_cast_fp16")]; tensor reduce_mean_143_axes_0 = const()[name = tensor("reduce_mean_143_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_143_keep_dims_0 = const()[name = tensor("reduce_mean_143_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_143_cast_fp16 = reduce_mean(axes = reduce_mean_143_axes_0, keep_dims = reduce_mean_143_keep_dims_0, x = square_47_cast_fp16)[name = tensor("reduce_mean_143_cast_fp16")]; tensor add_94_y_0_to_fp16 = const()[name = tensor("add_94_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_94_cast_fp16 = add(x = reduce_mean_143_cast_fp16, y = add_94_y_0_to_fp16)[name = tensor("add_94_cast_fp16")]; tensor sqrt_47_cast_fp16 = sqrt(x = add_94_cast_fp16)[name = tensor("sqrt_47_cast_fp16")]; tensor real_div_47_cast_fp16 = real_div(x = sub_94_cast_fp16, y = sqrt_47_cast_fp16)[name = tensor("real_div_47_cast_fp16")]; tensor reshape_189_shape_0 = const()[name = tensor("reshape_189_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_189_cast_fp16 = reshape(shape = reshape_189_shape_0, x = real_div_47_cast_fp16)[name = tensor("reshape_189_cast_fp16")]; tensor add_95_gamma_0_to_fp16 = const()[name = tensor("add_95_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1616017856)))]; tensor add_95_beta_0_to_fp16 = const()[name = tensor("add_95_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1616019200)))]; tensor add_95_epsilon_0_to_fp16 = const()[name = tensor("add_95_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_95_cast_fp16 = batch_norm(beta = add_95_beta_0_to_fp16, epsilon = add_95_epsilon_0_to_fp16, gamma = add_95_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_189_cast_fp16)[name = tensor("add_95_cast_fp16")]; tensor var_8973 = const()[name = tensor("op_8973"), val = tensor([1, 1])]; tensor var_8975 = const()[name = tensor("op_8975"), val = tensor([1, 1])]; tensor hidden_states_245_pad_type_0 = const()[name = tensor("hidden_states_245_pad_type_0"), val = tensor("custom")]; tensor hidden_states_245_pad_0 = const()[name = tensor("hidden_states_245_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1616020544)))]; tensor up_blocks_2_attentions_1_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1616839808)))]; tensor hidden_states_245_cast_fp16 = conv(bias = up_blocks_2_attentions_1_proj_in_bias_to_fp16, dilations = var_8975, groups = var_8182, pad = hidden_states_245_pad_0, pad_type = hidden_states_245_pad_type_0, strides = var_8973, weight = up_blocks_2_attentions_1_proj_in_weight_to_fp16, x = add_95_cast_fp16)[name = tensor("hidden_states_245_cast_fp16")]; tensor var_8980 = const()[name = tensor("op_8980"), val = tensor([2, 640, 1, 1024])]; tensor inputs_67_cast_fp16 = reshape(shape = var_8980, x = hidden_states_245_cast_fp16)[name = tensor("inputs_67_cast_fp16")]; tensor hidden_states_247_axes_0 = const()[name = tensor("hidden_states_247_axes_0"), val = tensor([1])]; tensor hidden_states_247_gamma_0_to_fp16 = const()[name = tensor("hidden_states_247_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1616841152)))]; tensor hidden_states_247_beta_0_to_fp16 = const()[name = tensor("hidden_states_247_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1616842496)))]; tensor var_8996_to_fp16 = const()[name = tensor("op_8996_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_247_cast_fp16 = layer_norm(axes = hidden_states_247_axes_0, beta = hidden_states_247_beta_0_to_fp16, epsilon = var_8996_to_fp16, gamma = hidden_states_247_gamma_0_to_fp16, x = inputs_67_cast_fp16)[name = tensor("hidden_states_247_cast_fp16")]; tensor var_9011 = const()[name = tensor("op_9011"), val = tensor([1, 1])]; tensor var_9013 = const()[name = tensor("op_9013"), val = tensor([1, 1])]; tensor q_45_pad_type_0 = const()[name = tensor("q_45_pad_type_0"), val = tensor("custom")]; tensor q_45_pad_0 = const()[name = tensor("q_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1616843840)))]; tensor q_45_cast_fp16 = conv(dilations = var_9013, groups = var_8182, pad = q_45_pad_0, pad_type = q_45_pad_type_0, strides = var_9011, weight = up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_247_cast_fp16)[name = tensor("q_45_cast_fp16")]; tensor var_9017 = const()[name = tensor("op_9017"), val = tensor([1, 1])]; tensor var_9019 = const()[name = tensor("op_9019"), val = tensor([1, 1])]; tensor k_89_pad_type_0 = const()[name = tensor("k_89_pad_type_0"), val = tensor("custom")]; tensor k_89_pad_0 = const()[name = tensor("k_89_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1617663104)))]; tensor k_89_cast_fp16 = conv(dilations = var_9019, groups = var_8182, pad = k_89_pad_0, pad_type = k_89_pad_type_0, strides = var_9017, weight = up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_247_cast_fp16)[name = tensor("k_89_cast_fp16")]; tensor var_9023 = const()[name = tensor("op_9023"), val = tensor([1, 1])]; tensor var_9025 = const()[name = tensor("op_9025"), val = tensor([1, 1])]; tensor v_45_pad_type_0 = const()[name = tensor("v_45_pad_type_0"), val = tensor("custom")]; tensor v_45_pad_0 = const()[name = tensor("v_45_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1618482368)))]; tensor v_45_cast_fp16 = conv(dilations = var_9025, groups = var_8182, pad = v_45_pad_0, pad_type = v_45_pad_type_0, strides = var_9023, weight = up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_247_cast_fp16)[name = tensor("v_45_cast_fp16")]; tensor var_9029_begin_0 = const()[name = tensor("op_9029_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9029_end_0 = const()[name = tensor("op_9029_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9029_end_mask_0 = const()[name = tensor("op_9029_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9029_cast_fp16 = slice_by_index(begin = var_9029_begin_0, end = var_9029_end_0, end_mask = var_9029_end_mask_0, x = q_45_cast_fp16)[name = tensor("op_9029_cast_fp16")]; tensor var_9033_begin_0 = const()[name = tensor("op_9033_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_9033_end_0 = const()[name = tensor("op_9033_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_9033_end_mask_0 = const()[name = tensor("op_9033_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9033_cast_fp16 = slice_by_index(begin = var_9033_begin_0, end = var_9033_end_0, end_mask = var_9033_end_mask_0, x = q_45_cast_fp16)[name = tensor("op_9033_cast_fp16")]; tensor var_9037_begin_0 = const()[name = tensor("op_9037_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_9037_end_0 = const()[name = tensor("op_9037_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_9037_end_mask_0 = const()[name = tensor("op_9037_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9037_cast_fp16 = slice_by_index(begin = var_9037_begin_0, end = var_9037_end_0, end_mask = var_9037_end_mask_0, x = q_45_cast_fp16)[name = tensor("op_9037_cast_fp16")]; tensor var_9041_begin_0 = const()[name = tensor("op_9041_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_9041_end_0 = const()[name = tensor("op_9041_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_9041_end_mask_0 = const()[name = tensor("op_9041_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9041_cast_fp16 = slice_by_index(begin = var_9041_begin_0, end = var_9041_end_0, end_mask = var_9041_end_mask_0, x = q_45_cast_fp16)[name = tensor("op_9041_cast_fp16")]; tensor var_9045_begin_0 = const()[name = tensor("op_9045_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9045_end_0 = const()[name = tensor("op_9045_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_9045_end_mask_0 = const()[name = tensor("op_9045_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9045_cast_fp16 = slice_by_index(begin = var_9045_begin_0, end = var_9045_end_0, end_mask = var_9045_end_mask_0, x = q_45_cast_fp16)[name = tensor("op_9045_cast_fp16")]; tensor var_9049_begin_0 = const()[name = tensor("op_9049_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_9049_end_0 = const()[name = tensor("op_9049_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_9049_end_mask_0 = const()[name = tensor("op_9049_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9049_cast_fp16 = slice_by_index(begin = var_9049_begin_0, end = var_9049_end_0, end_mask = var_9049_end_mask_0, x = q_45_cast_fp16)[name = tensor("op_9049_cast_fp16")]; tensor var_9053_begin_0 = const()[name = tensor("op_9053_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_9053_end_0 = const()[name = tensor("op_9053_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_9053_end_mask_0 = const()[name = tensor("op_9053_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9053_cast_fp16 = slice_by_index(begin = var_9053_begin_0, end = var_9053_end_0, end_mask = var_9053_end_mask_0, x = q_45_cast_fp16)[name = tensor("op_9053_cast_fp16")]; tensor var_9057_begin_0 = const()[name = tensor("op_9057_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_9057_end_0 = const()[name = tensor("op_9057_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_9057_end_mask_0 = const()[name = tensor("op_9057_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9057_cast_fp16 = slice_by_index(begin = var_9057_begin_0, end = var_9057_end_0, end_mask = var_9057_end_mask_0, x = q_45_cast_fp16)[name = tensor("op_9057_cast_fp16")]; tensor var_9060_begin_0 = const()[name = tensor("op_9060_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9060_end_0 = const()[name = tensor("op_9060_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9060_end_mask_0 = const()[name = tensor("op_9060_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9060_cast_fp16 = slice_by_index(begin = var_9060_begin_0, end = var_9060_end_0, end_mask = var_9060_end_mask_0, x = var_9029_cast_fp16)[name = tensor("op_9060_cast_fp16")]; tensor var_9061_begin_0 = const()[name = tensor("op_9061_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9061_end_0 = const()[name = tensor("op_9061_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9061_end_mask_0 = const()[name = tensor("op_9061_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9061_cast_fp16 = slice_by_index(begin = var_9061_begin_0, end = var_9061_end_0, end_mask = var_9061_end_mask_0, x = var_9029_cast_fp16)[name = tensor("op_9061_cast_fp16")]; tensor var_9062_begin_0 = const()[name = tensor("op_9062_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9062_end_0 = const()[name = tensor("op_9062_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9062_end_mask_0 = const()[name = tensor("op_9062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9062_cast_fp16 = slice_by_index(begin = var_9062_begin_0, end = var_9062_end_0, end_mask = var_9062_end_mask_0, x = var_9033_cast_fp16)[name = tensor("op_9062_cast_fp16")]; tensor var_9063_begin_0 = const()[name = tensor("op_9063_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9063_end_0 = const()[name = tensor("op_9063_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9063_end_mask_0 = const()[name = tensor("op_9063_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9063_cast_fp16 = slice_by_index(begin = var_9063_begin_0, end = var_9063_end_0, end_mask = var_9063_end_mask_0, x = var_9033_cast_fp16)[name = tensor("op_9063_cast_fp16")]; tensor var_9064_begin_0 = const()[name = tensor("op_9064_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9064_end_0 = const()[name = tensor("op_9064_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9064_end_mask_0 = const()[name = tensor("op_9064_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9064_cast_fp16 = slice_by_index(begin = var_9064_begin_0, end = var_9064_end_0, end_mask = var_9064_end_mask_0, x = var_9037_cast_fp16)[name = tensor("op_9064_cast_fp16")]; tensor var_9065_begin_0 = const()[name = tensor("op_9065_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9065_end_0 = const()[name = tensor("op_9065_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9065_end_mask_0 = const()[name = tensor("op_9065_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9065_cast_fp16 = slice_by_index(begin = var_9065_begin_0, end = var_9065_end_0, end_mask = var_9065_end_mask_0, x = var_9037_cast_fp16)[name = tensor("op_9065_cast_fp16")]; tensor var_9066_begin_0 = const()[name = tensor("op_9066_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9066_end_0 = const()[name = tensor("op_9066_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9066_end_mask_0 = const()[name = tensor("op_9066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9066_cast_fp16 = slice_by_index(begin = var_9066_begin_0, end = var_9066_end_0, end_mask = var_9066_end_mask_0, x = var_9041_cast_fp16)[name = tensor("op_9066_cast_fp16")]; tensor var_9067_begin_0 = const()[name = tensor("op_9067_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9067_end_0 = const()[name = tensor("op_9067_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9067_end_mask_0 = const()[name = tensor("op_9067_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9067_cast_fp16 = slice_by_index(begin = var_9067_begin_0, end = var_9067_end_0, end_mask = var_9067_end_mask_0, x = var_9041_cast_fp16)[name = tensor("op_9067_cast_fp16")]; tensor var_9068_begin_0 = const()[name = tensor("op_9068_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9068_end_0 = const()[name = tensor("op_9068_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9068_end_mask_0 = const()[name = tensor("op_9068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9068_cast_fp16 = slice_by_index(begin = var_9068_begin_0, end = var_9068_end_0, end_mask = var_9068_end_mask_0, x = var_9045_cast_fp16)[name = tensor("op_9068_cast_fp16")]; tensor var_9069_begin_0 = const()[name = tensor("op_9069_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9069_end_0 = const()[name = tensor("op_9069_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9069_end_mask_0 = const()[name = tensor("op_9069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9069_cast_fp16 = slice_by_index(begin = var_9069_begin_0, end = var_9069_end_0, end_mask = var_9069_end_mask_0, x = var_9045_cast_fp16)[name = tensor("op_9069_cast_fp16")]; tensor var_9070_begin_0 = const()[name = tensor("op_9070_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9070_end_0 = const()[name = tensor("op_9070_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9070_end_mask_0 = const()[name = tensor("op_9070_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9070_cast_fp16 = slice_by_index(begin = var_9070_begin_0, end = var_9070_end_0, end_mask = var_9070_end_mask_0, x = var_9049_cast_fp16)[name = tensor("op_9070_cast_fp16")]; tensor var_9071_begin_0 = const()[name = tensor("op_9071_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9071_end_0 = const()[name = tensor("op_9071_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9071_end_mask_0 = const()[name = tensor("op_9071_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9071_cast_fp16 = slice_by_index(begin = var_9071_begin_0, end = var_9071_end_0, end_mask = var_9071_end_mask_0, x = var_9049_cast_fp16)[name = tensor("op_9071_cast_fp16")]; tensor var_9072_begin_0 = const()[name = tensor("op_9072_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9072_end_0 = const()[name = tensor("op_9072_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9072_end_mask_0 = const()[name = tensor("op_9072_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9072_cast_fp16 = slice_by_index(begin = var_9072_begin_0, end = var_9072_end_0, end_mask = var_9072_end_mask_0, x = var_9053_cast_fp16)[name = tensor("op_9072_cast_fp16")]; tensor var_9073_begin_0 = const()[name = tensor("op_9073_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9073_end_0 = const()[name = tensor("op_9073_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9073_end_mask_0 = const()[name = tensor("op_9073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9073_cast_fp16 = slice_by_index(begin = var_9073_begin_0, end = var_9073_end_0, end_mask = var_9073_end_mask_0, x = var_9053_cast_fp16)[name = tensor("op_9073_cast_fp16")]; tensor var_9074_begin_0 = const()[name = tensor("op_9074_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9074_end_0 = const()[name = tensor("op_9074_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9074_end_mask_0 = const()[name = tensor("op_9074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9074_cast_fp16 = slice_by_index(begin = var_9074_begin_0, end = var_9074_end_0, end_mask = var_9074_end_mask_0, x = var_9057_cast_fp16)[name = tensor("op_9074_cast_fp16")]; tensor var_9075_begin_0 = const()[name = tensor("op_9075_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9075_end_0 = const()[name = tensor("op_9075_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9075_end_mask_0 = const()[name = tensor("op_9075_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9075_cast_fp16 = slice_by_index(begin = var_9075_begin_0, end = var_9075_end_0, end_mask = var_9075_end_mask_0, x = var_9057_cast_fp16)[name = tensor("op_9075_cast_fp16")]; tensor k_91_perm_0 = const()[name = tensor("k_91_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_9080_begin_0 = const()[name = tensor("op_9080_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9080_end_0 = const()[name = tensor("op_9080_end_0"), val = tensor([2, 1024, 1, 80])]; tensor var_9080_end_mask_0 = const()[name = tensor("op_9080_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_9 = transpose(perm = k_91_perm_0, x = k_89_cast_fp16)[name = tensor("transpose_9")]; tensor var_9080_cast_fp16 = slice_by_index(begin = var_9080_begin_0, end = var_9080_end_0, end_mask = var_9080_end_mask_0, x = transpose_9)[name = tensor("op_9080_cast_fp16")]; tensor var_9084_begin_0 = const()[name = tensor("op_9084_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_9084_end_0 = const()[name = tensor("op_9084_end_0"), val = tensor([2, 1024, 1, 160])]; tensor var_9084_end_mask_0 = const()[name = tensor("op_9084_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9084_cast_fp16 = slice_by_index(begin = var_9084_begin_0, end = var_9084_end_0, end_mask = var_9084_end_mask_0, x = transpose_9)[name = tensor("op_9084_cast_fp16")]; tensor var_9088_begin_0 = const()[name = tensor("op_9088_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_9088_end_0 = const()[name = tensor("op_9088_end_0"), val = tensor([2, 1024, 1, 240])]; tensor var_9088_end_mask_0 = const()[name = tensor("op_9088_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9088_cast_fp16 = slice_by_index(begin = var_9088_begin_0, end = var_9088_end_0, end_mask = var_9088_end_mask_0, x = transpose_9)[name = tensor("op_9088_cast_fp16")]; tensor var_9092_begin_0 = const()[name = tensor("op_9092_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_9092_end_0 = const()[name = tensor("op_9092_end_0"), val = tensor([2, 1024, 1, 320])]; tensor var_9092_end_mask_0 = const()[name = tensor("op_9092_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9092_cast_fp16 = slice_by_index(begin = var_9092_begin_0, end = var_9092_end_0, end_mask = var_9092_end_mask_0, x = transpose_9)[name = tensor("op_9092_cast_fp16")]; tensor var_9096_begin_0 = const()[name = tensor("op_9096_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_9096_end_0 = const()[name = tensor("op_9096_end_0"), val = tensor([2, 1024, 1, 400])]; tensor var_9096_end_mask_0 = const()[name = tensor("op_9096_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9096_cast_fp16 = slice_by_index(begin = var_9096_begin_0, end = var_9096_end_0, end_mask = var_9096_end_mask_0, x = transpose_9)[name = tensor("op_9096_cast_fp16")]; tensor var_9100_begin_0 = const()[name = tensor("op_9100_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_9100_end_0 = const()[name = tensor("op_9100_end_0"), val = tensor([2, 1024, 1, 480])]; tensor var_9100_end_mask_0 = const()[name = tensor("op_9100_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9100_cast_fp16 = slice_by_index(begin = var_9100_begin_0, end = var_9100_end_0, end_mask = var_9100_end_mask_0, x = transpose_9)[name = tensor("op_9100_cast_fp16")]; tensor var_9104_begin_0 = const()[name = tensor("op_9104_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_9104_end_0 = const()[name = tensor("op_9104_end_0"), val = tensor([2, 1024, 1, 560])]; tensor var_9104_end_mask_0 = const()[name = tensor("op_9104_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9104_cast_fp16 = slice_by_index(begin = var_9104_begin_0, end = var_9104_end_0, end_mask = var_9104_end_mask_0, x = transpose_9)[name = tensor("op_9104_cast_fp16")]; tensor var_9108_begin_0 = const()[name = tensor("op_9108_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_9108_end_0 = const()[name = tensor("op_9108_end_0"), val = tensor([2, 1024, 1, 640])]; tensor var_9108_end_mask_0 = const()[name = tensor("op_9108_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9108_cast_fp16 = slice_by_index(begin = var_9108_begin_0, end = var_9108_end_0, end_mask = var_9108_end_mask_0, x = transpose_9)[name = tensor("op_9108_cast_fp16")]; tensor var_9110_begin_0 = const()[name = tensor("op_9110_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9110_end_0 = const()[name = tensor("op_9110_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9110_end_mask_0 = const()[name = tensor("op_9110_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9110_cast_fp16 = slice_by_index(begin = var_9110_begin_0, end = var_9110_end_0, end_mask = var_9110_end_mask_0, x = v_45_cast_fp16)[name = tensor("op_9110_cast_fp16")]; tensor var_9114_begin_0 = const()[name = tensor("op_9114_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_9114_end_0 = const()[name = tensor("op_9114_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_9114_end_mask_0 = const()[name = tensor("op_9114_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9114_cast_fp16 = slice_by_index(begin = var_9114_begin_0, end = var_9114_end_0, end_mask = var_9114_end_mask_0, x = v_45_cast_fp16)[name = tensor("op_9114_cast_fp16")]; tensor var_9118_begin_0 = const()[name = tensor("op_9118_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_9118_end_0 = const()[name = tensor("op_9118_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_9118_end_mask_0 = const()[name = tensor("op_9118_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9118_cast_fp16 = slice_by_index(begin = var_9118_begin_0, end = var_9118_end_0, end_mask = var_9118_end_mask_0, x = v_45_cast_fp16)[name = tensor("op_9118_cast_fp16")]; tensor var_9122_begin_0 = const()[name = tensor("op_9122_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_9122_end_0 = const()[name = tensor("op_9122_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_9122_end_mask_0 = const()[name = tensor("op_9122_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9122_cast_fp16 = slice_by_index(begin = var_9122_begin_0, end = var_9122_end_0, end_mask = var_9122_end_mask_0, x = v_45_cast_fp16)[name = tensor("op_9122_cast_fp16")]; tensor var_9126_begin_0 = const()[name = tensor("op_9126_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9126_end_0 = const()[name = tensor("op_9126_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_9126_end_mask_0 = const()[name = tensor("op_9126_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9126_cast_fp16 = slice_by_index(begin = var_9126_begin_0, end = var_9126_end_0, end_mask = var_9126_end_mask_0, x = v_45_cast_fp16)[name = tensor("op_9126_cast_fp16")]; tensor var_9130_begin_0 = const()[name = tensor("op_9130_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_9130_end_0 = const()[name = tensor("op_9130_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_9130_end_mask_0 = const()[name = tensor("op_9130_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9130_cast_fp16 = slice_by_index(begin = var_9130_begin_0, end = var_9130_end_0, end_mask = var_9130_end_mask_0, x = v_45_cast_fp16)[name = tensor("op_9130_cast_fp16")]; tensor var_9134_begin_0 = const()[name = tensor("op_9134_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_9134_end_0 = const()[name = tensor("op_9134_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_9134_end_mask_0 = const()[name = tensor("op_9134_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9134_cast_fp16 = slice_by_index(begin = var_9134_begin_0, end = var_9134_end_0, end_mask = var_9134_end_mask_0, x = v_45_cast_fp16)[name = tensor("op_9134_cast_fp16")]; tensor var_9138_begin_0 = const()[name = tensor("op_9138_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_9138_end_0 = const()[name = tensor("op_9138_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_9138_end_mask_0 = const()[name = tensor("op_9138_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9138_cast_fp16 = slice_by_index(begin = var_9138_begin_0, end = var_9138_end_0, end_mask = var_9138_end_mask_0, x = v_45_cast_fp16)[name = tensor("op_9138_cast_fp16")]; tensor var_9142_equation_0 = const()[name = tensor("op_9142_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9142_cast_fp16 = einsum(equation = var_9142_equation_0, values = (var_9080_cast_fp16, var_9060_cast_fp16))[name = tensor("op_9142_cast_fp16")]; tensor var_9143_to_fp16 = const()[name = tensor("op_9143_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_705_cast_fp16 = mul(x = var_9142_cast_fp16, y = var_9143_to_fp16)[name = tensor("aw_chunk_705_cast_fp16")]; tensor var_9146_equation_0 = const()[name = tensor("op_9146_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9146_cast_fp16 = einsum(equation = var_9146_equation_0, values = (var_9080_cast_fp16, var_9061_cast_fp16))[name = tensor("op_9146_cast_fp16")]; tensor var_9147_to_fp16 = const()[name = tensor("op_9147_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_707_cast_fp16 = mul(x = var_9146_cast_fp16, y = var_9147_to_fp16)[name = tensor("aw_chunk_707_cast_fp16")]; tensor var_9150_equation_0 = const()[name = tensor("op_9150_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9150_cast_fp16 = einsum(equation = var_9150_equation_0, values = (var_9084_cast_fp16, var_9062_cast_fp16))[name = tensor("op_9150_cast_fp16")]; tensor var_9151_to_fp16 = const()[name = tensor("op_9151_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_709_cast_fp16 = mul(x = var_9150_cast_fp16, y = var_9151_to_fp16)[name = tensor("aw_chunk_709_cast_fp16")]; tensor var_9154_equation_0 = const()[name = tensor("op_9154_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9154_cast_fp16 = einsum(equation = var_9154_equation_0, values = (var_9084_cast_fp16, var_9063_cast_fp16))[name = tensor("op_9154_cast_fp16")]; tensor var_9155_to_fp16 = const()[name = tensor("op_9155_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_711_cast_fp16 = mul(x = var_9154_cast_fp16, y = var_9155_to_fp16)[name = tensor("aw_chunk_711_cast_fp16")]; tensor var_9158_equation_0 = const()[name = tensor("op_9158_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9158_cast_fp16 = einsum(equation = var_9158_equation_0, values = (var_9088_cast_fp16, var_9064_cast_fp16))[name = tensor("op_9158_cast_fp16")]; tensor var_9159_to_fp16 = const()[name = tensor("op_9159_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_713_cast_fp16 = mul(x = var_9158_cast_fp16, y = var_9159_to_fp16)[name = tensor("aw_chunk_713_cast_fp16")]; tensor var_9162_equation_0 = const()[name = tensor("op_9162_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9162_cast_fp16 = einsum(equation = var_9162_equation_0, values = (var_9088_cast_fp16, var_9065_cast_fp16))[name = tensor("op_9162_cast_fp16")]; tensor var_9163_to_fp16 = const()[name = tensor("op_9163_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_715_cast_fp16 = mul(x = var_9162_cast_fp16, y = var_9163_to_fp16)[name = tensor("aw_chunk_715_cast_fp16")]; tensor var_9166_equation_0 = const()[name = tensor("op_9166_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9166_cast_fp16 = einsum(equation = var_9166_equation_0, values = (var_9092_cast_fp16, var_9066_cast_fp16))[name = tensor("op_9166_cast_fp16")]; tensor var_9167_to_fp16 = const()[name = tensor("op_9167_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_717_cast_fp16 = mul(x = var_9166_cast_fp16, y = var_9167_to_fp16)[name = tensor("aw_chunk_717_cast_fp16")]; tensor var_9170_equation_0 = const()[name = tensor("op_9170_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9170_cast_fp16 = einsum(equation = var_9170_equation_0, values = (var_9092_cast_fp16, var_9067_cast_fp16))[name = tensor("op_9170_cast_fp16")]; tensor var_9171_to_fp16 = const()[name = tensor("op_9171_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_719_cast_fp16 = mul(x = var_9170_cast_fp16, y = var_9171_to_fp16)[name = tensor("aw_chunk_719_cast_fp16")]; tensor var_9174_equation_0 = const()[name = tensor("op_9174_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9174_cast_fp16 = einsum(equation = var_9174_equation_0, values = (var_9096_cast_fp16, var_9068_cast_fp16))[name = tensor("op_9174_cast_fp16")]; tensor var_9175_to_fp16 = const()[name = tensor("op_9175_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_721_cast_fp16 = mul(x = var_9174_cast_fp16, y = var_9175_to_fp16)[name = tensor("aw_chunk_721_cast_fp16")]; tensor var_9178_equation_0 = const()[name = tensor("op_9178_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9178_cast_fp16 = einsum(equation = var_9178_equation_0, values = (var_9096_cast_fp16, var_9069_cast_fp16))[name = tensor("op_9178_cast_fp16")]; tensor var_9179_to_fp16 = const()[name = tensor("op_9179_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_723_cast_fp16 = mul(x = var_9178_cast_fp16, y = var_9179_to_fp16)[name = tensor("aw_chunk_723_cast_fp16")]; tensor var_9182_equation_0 = const()[name = tensor("op_9182_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9182_cast_fp16 = einsum(equation = var_9182_equation_0, values = (var_9100_cast_fp16, var_9070_cast_fp16))[name = tensor("op_9182_cast_fp16")]; tensor var_9183_to_fp16 = const()[name = tensor("op_9183_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_725_cast_fp16 = mul(x = var_9182_cast_fp16, y = var_9183_to_fp16)[name = tensor("aw_chunk_725_cast_fp16")]; tensor var_9186_equation_0 = const()[name = tensor("op_9186_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9186_cast_fp16 = einsum(equation = var_9186_equation_0, values = (var_9100_cast_fp16, var_9071_cast_fp16))[name = tensor("op_9186_cast_fp16")]; tensor var_9187_to_fp16 = const()[name = tensor("op_9187_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_727_cast_fp16 = mul(x = var_9186_cast_fp16, y = var_9187_to_fp16)[name = tensor("aw_chunk_727_cast_fp16")]; tensor var_9190_equation_0 = const()[name = tensor("op_9190_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9190_cast_fp16 = einsum(equation = var_9190_equation_0, values = (var_9104_cast_fp16, var_9072_cast_fp16))[name = tensor("op_9190_cast_fp16")]; tensor var_9191_to_fp16 = const()[name = tensor("op_9191_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_729_cast_fp16 = mul(x = var_9190_cast_fp16, y = var_9191_to_fp16)[name = tensor("aw_chunk_729_cast_fp16")]; tensor var_9194_equation_0 = const()[name = tensor("op_9194_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9194_cast_fp16 = einsum(equation = var_9194_equation_0, values = (var_9104_cast_fp16, var_9073_cast_fp16))[name = tensor("op_9194_cast_fp16")]; tensor var_9195_to_fp16 = const()[name = tensor("op_9195_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_731_cast_fp16 = mul(x = var_9194_cast_fp16, y = var_9195_to_fp16)[name = tensor("aw_chunk_731_cast_fp16")]; tensor var_9198_equation_0 = const()[name = tensor("op_9198_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9198_cast_fp16 = einsum(equation = var_9198_equation_0, values = (var_9108_cast_fp16, var_9074_cast_fp16))[name = tensor("op_9198_cast_fp16")]; tensor var_9199_to_fp16 = const()[name = tensor("op_9199_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_733_cast_fp16 = mul(x = var_9198_cast_fp16, y = var_9199_to_fp16)[name = tensor("aw_chunk_733_cast_fp16")]; tensor var_9202_equation_0 = const()[name = tensor("op_9202_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9202_cast_fp16 = einsum(equation = var_9202_equation_0, values = (var_9108_cast_fp16, var_9075_cast_fp16))[name = tensor("op_9202_cast_fp16")]; tensor var_9203_to_fp16 = const()[name = tensor("op_9203_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_735_cast_fp16 = mul(x = var_9202_cast_fp16, y = var_9203_to_fp16)[name = tensor("aw_chunk_735_cast_fp16")]; tensor var_9205_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_705_cast_fp16)[name = tensor("op_9205_cast_fp16")]; tensor var_9206_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_707_cast_fp16)[name = tensor("op_9206_cast_fp16")]; tensor var_9207_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_709_cast_fp16)[name = tensor("op_9207_cast_fp16")]; tensor var_9208_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_711_cast_fp16)[name = tensor("op_9208_cast_fp16")]; tensor var_9209_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_713_cast_fp16)[name = tensor("op_9209_cast_fp16")]; tensor var_9210_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_715_cast_fp16)[name = tensor("op_9210_cast_fp16")]; tensor var_9211_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_717_cast_fp16)[name = tensor("op_9211_cast_fp16")]; tensor var_9212_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_719_cast_fp16)[name = tensor("op_9212_cast_fp16")]; tensor var_9213_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_721_cast_fp16)[name = tensor("op_9213_cast_fp16")]; tensor var_9214_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_723_cast_fp16)[name = tensor("op_9214_cast_fp16")]; tensor var_9215_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_725_cast_fp16)[name = tensor("op_9215_cast_fp16")]; tensor var_9216_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_727_cast_fp16)[name = tensor("op_9216_cast_fp16")]; tensor var_9217_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_729_cast_fp16)[name = tensor("op_9217_cast_fp16")]; tensor var_9218_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_731_cast_fp16)[name = tensor("op_9218_cast_fp16")]; tensor var_9219_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_733_cast_fp16)[name = tensor("op_9219_cast_fp16")]; tensor var_9220_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_735_cast_fp16)[name = tensor("op_9220_cast_fp16")]; tensor var_9222_equation_0 = const()[name = tensor("op_9222_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9222_cast_fp16 = einsum(equation = var_9222_equation_0, values = (var_9110_cast_fp16, var_9205_cast_fp16))[name = tensor("op_9222_cast_fp16")]; tensor var_9224_equation_0 = const()[name = tensor("op_9224_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9224_cast_fp16 = einsum(equation = var_9224_equation_0, values = (var_9110_cast_fp16, var_9206_cast_fp16))[name = tensor("op_9224_cast_fp16")]; tensor var_9226_equation_0 = const()[name = tensor("op_9226_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9226_cast_fp16 = einsum(equation = var_9226_equation_0, values = (var_9114_cast_fp16, var_9207_cast_fp16))[name = tensor("op_9226_cast_fp16")]; tensor var_9228_equation_0 = const()[name = tensor("op_9228_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9228_cast_fp16 = einsum(equation = var_9228_equation_0, values = (var_9114_cast_fp16, var_9208_cast_fp16))[name = tensor("op_9228_cast_fp16")]; tensor var_9230_equation_0 = const()[name = tensor("op_9230_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9230_cast_fp16 = einsum(equation = var_9230_equation_0, values = (var_9118_cast_fp16, var_9209_cast_fp16))[name = tensor("op_9230_cast_fp16")]; tensor var_9232_equation_0 = const()[name = tensor("op_9232_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9232_cast_fp16 = einsum(equation = var_9232_equation_0, values = (var_9118_cast_fp16, var_9210_cast_fp16))[name = tensor("op_9232_cast_fp16")]; tensor var_9234_equation_0 = const()[name = tensor("op_9234_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9234_cast_fp16 = einsum(equation = var_9234_equation_0, values = (var_9122_cast_fp16, var_9211_cast_fp16))[name = tensor("op_9234_cast_fp16")]; tensor var_9236_equation_0 = const()[name = tensor("op_9236_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9236_cast_fp16 = einsum(equation = var_9236_equation_0, values = (var_9122_cast_fp16, var_9212_cast_fp16))[name = tensor("op_9236_cast_fp16")]; tensor var_9238_equation_0 = const()[name = tensor("op_9238_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9238_cast_fp16 = einsum(equation = var_9238_equation_0, values = (var_9126_cast_fp16, var_9213_cast_fp16))[name = tensor("op_9238_cast_fp16")]; tensor var_9240_equation_0 = const()[name = tensor("op_9240_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9240_cast_fp16 = einsum(equation = var_9240_equation_0, values = (var_9126_cast_fp16, var_9214_cast_fp16))[name = tensor("op_9240_cast_fp16")]; tensor var_9242_equation_0 = const()[name = tensor("op_9242_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9242_cast_fp16 = einsum(equation = var_9242_equation_0, values = (var_9130_cast_fp16, var_9215_cast_fp16))[name = tensor("op_9242_cast_fp16")]; tensor var_9244_equation_0 = const()[name = tensor("op_9244_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9244_cast_fp16 = einsum(equation = var_9244_equation_0, values = (var_9130_cast_fp16, var_9216_cast_fp16))[name = tensor("op_9244_cast_fp16")]; tensor var_9246_equation_0 = const()[name = tensor("op_9246_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9246_cast_fp16 = einsum(equation = var_9246_equation_0, values = (var_9134_cast_fp16, var_9217_cast_fp16))[name = tensor("op_9246_cast_fp16")]; tensor var_9248_equation_0 = const()[name = tensor("op_9248_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9248_cast_fp16 = einsum(equation = var_9248_equation_0, values = (var_9134_cast_fp16, var_9218_cast_fp16))[name = tensor("op_9248_cast_fp16")]; tensor var_9250_equation_0 = const()[name = tensor("op_9250_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9250_cast_fp16 = einsum(equation = var_9250_equation_0, values = (var_9138_cast_fp16, var_9219_cast_fp16))[name = tensor("op_9250_cast_fp16")]; tensor var_9252_equation_0 = const()[name = tensor("op_9252_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9252_cast_fp16 = einsum(equation = var_9252_equation_0, values = (var_9138_cast_fp16, var_9220_cast_fp16))[name = tensor("op_9252_cast_fp16")]; tensor var_9254_interleave_0 = const()[name = tensor("op_9254_interleave_0"), val = tensor(false)]; tensor var_9254_cast_fp16 = concat(axis = var_8160, interleave = var_9254_interleave_0, values = (var_9222_cast_fp16, var_9224_cast_fp16))[name = tensor("op_9254_cast_fp16")]; tensor var_9256_interleave_0 = const()[name = tensor("op_9256_interleave_0"), val = tensor(false)]; tensor var_9256_cast_fp16 = concat(axis = var_8160, interleave = var_9256_interleave_0, values = (var_9226_cast_fp16, var_9228_cast_fp16))[name = tensor("op_9256_cast_fp16")]; tensor var_9258_interleave_0 = const()[name = tensor("op_9258_interleave_0"), val = tensor(false)]; tensor var_9258_cast_fp16 = concat(axis = var_8160, interleave = var_9258_interleave_0, values = (var_9230_cast_fp16, var_9232_cast_fp16))[name = tensor("op_9258_cast_fp16")]; tensor var_9260_interleave_0 = const()[name = tensor("op_9260_interleave_0"), val = tensor(false)]; tensor var_9260_cast_fp16 = concat(axis = var_8160, interleave = var_9260_interleave_0, values = (var_9234_cast_fp16, var_9236_cast_fp16))[name = tensor("op_9260_cast_fp16")]; tensor var_9262_interleave_0 = const()[name = tensor("op_9262_interleave_0"), val = tensor(false)]; tensor var_9262_cast_fp16 = concat(axis = var_8160, interleave = var_9262_interleave_0, values = (var_9238_cast_fp16, var_9240_cast_fp16))[name = tensor("op_9262_cast_fp16")]; tensor var_9264_interleave_0 = const()[name = tensor("op_9264_interleave_0"), val = tensor(false)]; tensor var_9264_cast_fp16 = concat(axis = var_8160, interleave = var_9264_interleave_0, values = (var_9242_cast_fp16, var_9244_cast_fp16))[name = tensor("op_9264_cast_fp16")]; tensor var_9266_interleave_0 = const()[name = tensor("op_9266_interleave_0"), val = tensor(false)]; tensor var_9266_cast_fp16 = concat(axis = var_8160, interleave = var_9266_interleave_0, values = (var_9246_cast_fp16, var_9248_cast_fp16))[name = tensor("op_9266_cast_fp16")]; tensor var_9268_interleave_0 = const()[name = tensor("op_9268_interleave_0"), val = tensor(false)]; tensor var_9268_cast_fp16 = concat(axis = var_8160, interleave = var_9268_interleave_0, values = (var_9250_cast_fp16, var_9252_cast_fp16))[name = tensor("op_9268_cast_fp16")]; tensor input_409_interleave_0 = const()[name = tensor("input_409_interleave_0"), val = tensor(false)]; tensor input_409_cast_fp16 = concat(axis = var_8182, interleave = input_409_interleave_0, values = (var_9254_cast_fp16, var_9256_cast_fp16, var_9258_cast_fp16, var_9260_cast_fp16, var_9262_cast_fp16, var_9264_cast_fp16, var_9266_cast_fp16, var_9268_cast_fp16))[name = tensor("input_409_cast_fp16")]; tensor var_9274 = const()[name = tensor("op_9274"), val = tensor([1, 1])]; tensor var_9276 = const()[name = tensor("op_9276"), val = tensor([1, 1])]; tensor var_9278_pad_type_0 = const()[name = tensor("op_9278_pad_type_0"), val = tensor("custom")]; tensor var_9278_pad_0 = const()[name = tensor("op_9278_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1619301632)))]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1620120896)))]; tensor var_9278_cast_fp16 = conv(bias = up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_9276, groups = var_8182, pad = var_9278_pad_0, pad_type = var_9278_pad_type_0, strides = var_9274, weight = up_blocks_2_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_409_cast_fp16)[name = tensor("op_9278_cast_fp16")]; tensor inputs_69_cast_fp16 = add(x = var_9278_cast_fp16, y = inputs_67_cast_fp16)[name = tensor("inputs_69_cast_fp16")]; tensor hidden_states_249_axes_0 = const()[name = tensor("hidden_states_249_axes_0"), val = tensor([1])]; tensor hidden_states_249_gamma_0_to_fp16 = const()[name = tensor("hidden_states_249_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1620122240)))]; tensor hidden_states_249_beta_0_to_fp16 = const()[name = tensor("hidden_states_249_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1620123584)))]; tensor var_9288_to_fp16 = const()[name = tensor("op_9288_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_249_cast_fp16 = layer_norm(axes = hidden_states_249_axes_0, beta = hidden_states_249_beta_0_to_fp16, epsilon = var_9288_to_fp16, gamma = hidden_states_249_gamma_0_to_fp16, x = inputs_69_cast_fp16)[name = tensor("hidden_states_249_cast_fp16")]; tensor var_9303 = const()[name = tensor("op_9303"), val = tensor([1, 1])]; tensor var_9305 = const()[name = tensor("op_9305"), val = tensor([1, 1])]; tensor q_47_pad_type_0 = const()[name = tensor("q_47_pad_type_0"), val = tensor("custom")]; tensor q_47_pad_0 = const()[name = tensor("q_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1620124928)))]; tensor q_47_cast_fp16 = conv(dilations = var_9305, groups = var_8182, pad = q_47_pad_0, pad_type = q_47_pad_type_0, strides = var_9303, weight = up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_249_cast_fp16)[name = tensor("q_47_cast_fp16")]; tensor var_9309 = const()[name = tensor("op_9309"), val = tensor([1, 1])]; tensor var_9311 = const()[name = tensor("op_9311"), val = tensor([1, 1])]; tensor k_93_pad_type_0 = const()[name = tensor("k_93_pad_type_0"), val = tensor("custom")]; tensor k_93_pad_0 = const()[name = tensor("k_93_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1620944192)))]; tensor k_93_cast_fp16 = conv(dilations = var_9311, groups = var_8182, pad = k_93_pad_0, pad_type = k_93_pad_type_0, strides = var_9309, weight = up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_93_cast_fp16")]; tensor var_9315 = const()[name = tensor("op_9315"), val = tensor([1, 1])]; tensor var_9317 = const()[name = tensor("op_9317"), val = tensor([1, 1])]; tensor v_47_pad_type_0 = const()[name = tensor("v_47_pad_type_0"), val = tensor("custom")]; tensor v_47_pad_0 = const()[name = tensor("v_47_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1621927296)))]; tensor v_47_cast_fp16 = conv(dilations = var_9317, groups = var_8182, pad = v_47_pad_0, pad_type = v_47_pad_type_0, strides = var_9315, weight = up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_47_cast_fp16")]; tensor var_9321_begin_0 = const()[name = tensor("op_9321_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9321_end_0 = const()[name = tensor("op_9321_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9321_end_mask_0 = const()[name = tensor("op_9321_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9321_cast_fp16 = slice_by_index(begin = var_9321_begin_0, end = var_9321_end_0, end_mask = var_9321_end_mask_0, x = q_47_cast_fp16)[name = tensor("op_9321_cast_fp16")]; tensor var_9325_begin_0 = const()[name = tensor("op_9325_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_9325_end_0 = const()[name = tensor("op_9325_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_9325_end_mask_0 = const()[name = tensor("op_9325_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9325_cast_fp16 = slice_by_index(begin = var_9325_begin_0, end = var_9325_end_0, end_mask = var_9325_end_mask_0, x = q_47_cast_fp16)[name = tensor("op_9325_cast_fp16")]; tensor var_9329_begin_0 = const()[name = tensor("op_9329_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_9329_end_0 = const()[name = tensor("op_9329_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_9329_end_mask_0 = const()[name = tensor("op_9329_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9329_cast_fp16 = slice_by_index(begin = var_9329_begin_0, end = var_9329_end_0, end_mask = var_9329_end_mask_0, x = q_47_cast_fp16)[name = tensor("op_9329_cast_fp16")]; tensor var_9333_begin_0 = const()[name = tensor("op_9333_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_9333_end_0 = const()[name = tensor("op_9333_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_9333_end_mask_0 = const()[name = tensor("op_9333_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9333_cast_fp16 = slice_by_index(begin = var_9333_begin_0, end = var_9333_end_0, end_mask = var_9333_end_mask_0, x = q_47_cast_fp16)[name = tensor("op_9333_cast_fp16")]; tensor var_9337_begin_0 = const()[name = tensor("op_9337_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9337_end_0 = const()[name = tensor("op_9337_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_9337_end_mask_0 = const()[name = tensor("op_9337_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9337_cast_fp16 = slice_by_index(begin = var_9337_begin_0, end = var_9337_end_0, end_mask = var_9337_end_mask_0, x = q_47_cast_fp16)[name = tensor("op_9337_cast_fp16")]; tensor var_9341_begin_0 = const()[name = tensor("op_9341_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_9341_end_0 = const()[name = tensor("op_9341_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_9341_end_mask_0 = const()[name = tensor("op_9341_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9341_cast_fp16 = slice_by_index(begin = var_9341_begin_0, end = var_9341_end_0, end_mask = var_9341_end_mask_0, x = q_47_cast_fp16)[name = tensor("op_9341_cast_fp16")]; tensor var_9345_begin_0 = const()[name = tensor("op_9345_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_9345_end_0 = const()[name = tensor("op_9345_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_9345_end_mask_0 = const()[name = tensor("op_9345_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9345_cast_fp16 = slice_by_index(begin = var_9345_begin_0, end = var_9345_end_0, end_mask = var_9345_end_mask_0, x = q_47_cast_fp16)[name = tensor("op_9345_cast_fp16")]; tensor var_9349_begin_0 = const()[name = tensor("op_9349_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_9349_end_0 = const()[name = tensor("op_9349_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_9349_end_mask_0 = const()[name = tensor("op_9349_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9349_cast_fp16 = slice_by_index(begin = var_9349_begin_0, end = var_9349_end_0, end_mask = var_9349_end_mask_0, x = q_47_cast_fp16)[name = tensor("op_9349_cast_fp16")]; tensor var_9352_begin_0 = const()[name = tensor("op_9352_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9352_end_0 = const()[name = tensor("op_9352_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9352_end_mask_0 = const()[name = tensor("op_9352_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9352_cast_fp16 = slice_by_index(begin = var_9352_begin_0, end = var_9352_end_0, end_mask = var_9352_end_mask_0, x = var_9321_cast_fp16)[name = tensor("op_9352_cast_fp16")]; tensor var_9353_begin_0 = const()[name = tensor("op_9353_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9353_end_0 = const()[name = tensor("op_9353_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9353_end_mask_0 = const()[name = tensor("op_9353_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9353_cast_fp16 = slice_by_index(begin = var_9353_begin_0, end = var_9353_end_0, end_mask = var_9353_end_mask_0, x = var_9321_cast_fp16)[name = tensor("op_9353_cast_fp16")]; tensor var_9354_begin_0 = const()[name = tensor("op_9354_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9354_end_0 = const()[name = tensor("op_9354_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9354_end_mask_0 = const()[name = tensor("op_9354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9354_cast_fp16 = slice_by_index(begin = var_9354_begin_0, end = var_9354_end_0, end_mask = var_9354_end_mask_0, x = var_9325_cast_fp16)[name = tensor("op_9354_cast_fp16")]; tensor var_9355_begin_0 = const()[name = tensor("op_9355_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9355_end_0 = const()[name = tensor("op_9355_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9355_end_mask_0 = const()[name = tensor("op_9355_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9355_cast_fp16 = slice_by_index(begin = var_9355_begin_0, end = var_9355_end_0, end_mask = var_9355_end_mask_0, x = var_9325_cast_fp16)[name = tensor("op_9355_cast_fp16")]; tensor var_9356_begin_0 = const()[name = tensor("op_9356_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9356_end_0 = const()[name = tensor("op_9356_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9356_end_mask_0 = const()[name = tensor("op_9356_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9356_cast_fp16 = slice_by_index(begin = var_9356_begin_0, end = var_9356_end_0, end_mask = var_9356_end_mask_0, x = var_9329_cast_fp16)[name = tensor("op_9356_cast_fp16")]; tensor var_9357_begin_0 = const()[name = tensor("op_9357_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9357_end_0 = const()[name = tensor("op_9357_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9357_end_mask_0 = const()[name = tensor("op_9357_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9357_cast_fp16 = slice_by_index(begin = var_9357_begin_0, end = var_9357_end_0, end_mask = var_9357_end_mask_0, x = var_9329_cast_fp16)[name = tensor("op_9357_cast_fp16")]; tensor var_9358_begin_0 = const()[name = tensor("op_9358_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9358_end_0 = const()[name = tensor("op_9358_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9358_end_mask_0 = const()[name = tensor("op_9358_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9358_cast_fp16 = slice_by_index(begin = var_9358_begin_0, end = var_9358_end_0, end_mask = var_9358_end_mask_0, x = var_9333_cast_fp16)[name = tensor("op_9358_cast_fp16")]; tensor var_9359_begin_0 = const()[name = tensor("op_9359_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9359_end_0 = const()[name = tensor("op_9359_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9359_end_mask_0 = const()[name = tensor("op_9359_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9359_cast_fp16 = slice_by_index(begin = var_9359_begin_0, end = var_9359_end_0, end_mask = var_9359_end_mask_0, x = var_9333_cast_fp16)[name = tensor("op_9359_cast_fp16")]; tensor var_9360_begin_0 = const()[name = tensor("op_9360_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9360_end_0 = const()[name = tensor("op_9360_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9360_end_mask_0 = const()[name = tensor("op_9360_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9360_cast_fp16 = slice_by_index(begin = var_9360_begin_0, end = var_9360_end_0, end_mask = var_9360_end_mask_0, x = var_9337_cast_fp16)[name = tensor("op_9360_cast_fp16")]; tensor var_9361_begin_0 = const()[name = tensor("op_9361_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9361_end_0 = const()[name = tensor("op_9361_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9361_end_mask_0 = const()[name = tensor("op_9361_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9361_cast_fp16 = slice_by_index(begin = var_9361_begin_0, end = var_9361_end_0, end_mask = var_9361_end_mask_0, x = var_9337_cast_fp16)[name = tensor("op_9361_cast_fp16")]; tensor var_9362_begin_0 = const()[name = tensor("op_9362_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9362_end_0 = const()[name = tensor("op_9362_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9362_end_mask_0 = const()[name = tensor("op_9362_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9362_cast_fp16 = slice_by_index(begin = var_9362_begin_0, end = var_9362_end_0, end_mask = var_9362_end_mask_0, x = var_9341_cast_fp16)[name = tensor("op_9362_cast_fp16")]; tensor var_9363_begin_0 = const()[name = tensor("op_9363_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9363_end_0 = const()[name = tensor("op_9363_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9363_end_mask_0 = const()[name = tensor("op_9363_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9363_cast_fp16 = slice_by_index(begin = var_9363_begin_0, end = var_9363_end_0, end_mask = var_9363_end_mask_0, x = var_9341_cast_fp16)[name = tensor("op_9363_cast_fp16")]; tensor var_9364_begin_0 = const()[name = tensor("op_9364_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9364_end_0 = const()[name = tensor("op_9364_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9364_end_mask_0 = const()[name = tensor("op_9364_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9364_cast_fp16 = slice_by_index(begin = var_9364_begin_0, end = var_9364_end_0, end_mask = var_9364_end_mask_0, x = var_9345_cast_fp16)[name = tensor("op_9364_cast_fp16")]; tensor var_9365_begin_0 = const()[name = tensor("op_9365_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9365_end_0 = const()[name = tensor("op_9365_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9365_end_mask_0 = const()[name = tensor("op_9365_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9365_cast_fp16 = slice_by_index(begin = var_9365_begin_0, end = var_9365_end_0, end_mask = var_9365_end_mask_0, x = var_9345_cast_fp16)[name = tensor("op_9365_cast_fp16")]; tensor var_9366_begin_0 = const()[name = tensor("op_9366_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9366_end_0 = const()[name = tensor("op_9366_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9366_end_mask_0 = const()[name = tensor("op_9366_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9366_cast_fp16 = slice_by_index(begin = var_9366_begin_0, end = var_9366_end_0, end_mask = var_9366_end_mask_0, x = var_9349_cast_fp16)[name = tensor("op_9366_cast_fp16")]; tensor var_9367_begin_0 = const()[name = tensor("op_9367_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9367_end_0 = const()[name = tensor("op_9367_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9367_end_mask_0 = const()[name = tensor("op_9367_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9367_cast_fp16 = slice_by_index(begin = var_9367_begin_0, end = var_9367_end_0, end_mask = var_9367_end_mask_0, x = var_9349_cast_fp16)[name = tensor("op_9367_cast_fp16")]; tensor k_95_perm_0 = const()[name = tensor("k_95_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_9372_begin_0 = const()[name = tensor("op_9372_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9372_end_0 = const()[name = tensor("op_9372_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_9372_end_mask_0 = const()[name = tensor("op_9372_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_8 = transpose(perm = k_95_perm_0, x = k_93_cast_fp16)[name = tensor("transpose_8")]; tensor var_9372_cast_fp16 = slice_by_index(begin = var_9372_begin_0, end = var_9372_end_0, end_mask = var_9372_end_mask_0, x = transpose_8)[name = tensor("op_9372_cast_fp16")]; tensor var_9376_begin_0 = const()[name = tensor("op_9376_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_9376_end_0 = const()[name = tensor("op_9376_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_9376_end_mask_0 = const()[name = tensor("op_9376_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9376_cast_fp16 = slice_by_index(begin = var_9376_begin_0, end = var_9376_end_0, end_mask = var_9376_end_mask_0, x = transpose_8)[name = tensor("op_9376_cast_fp16")]; tensor var_9380_begin_0 = const()[name = tensor("op_9380_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_9380_end_0 = const()[name = tensor("op_9380_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_9380_end_mask_0 = const()[name = tensor("op_9380_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9380_cast_fp16 = slice_by_index(begin = var_9380_begin_0, end = var_9380_end_0, end_mask = var_9380_end_mask_0, x = transpose_8)[name = tensor("op_9380_cast_fp16")]; tensor var_9384_begin_0 = const()[name = tensor("op_9384_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_9384_end_0 = const()[name = tensor("op_9384_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_9384_end_mask_0 = const()[name = tensor("op_9384_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9384_cast_fp16 = slice_by_index(begin = var_9384_begin_0, end = var_9384_end_0, end_mask = var_9384_end_mask_0, x = transpose_8)[name = tensor("op_9384_cast_fp16")]; tensor var_9388_begin_0 = const()[name = tensor("op_9388_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_9388_end_0 = const()[name = tensor("op_9388_end_0"), val = tensor([2, 77, 1, 400])]; tensor var_9388_end_mask_0 = const()[name = tensor("op_9388_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9388_cast_fp16 = slice_by_index(begin = var_9388_begin_0, end = var_9388_end_0, end_mask = var_9388_end_mask_0, x = transpose_8)[name = tensor("op_9388_cast_fp16")]; tensor var_9392_begin_0 = const()[name = tensor("op_9392_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_9392_end_0 = const()[name = tensor("op_9392_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_9392_end_mask_0 = const()[name = tensor("op_9392_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9392_cast_fp16 = slice_by_index(begin = var_9392_begin_0, end = var_9392_end_0, end_mask = var_9392_end_mask_0, x = transpose_8)[name = tensor("op_9392_cast_fp16")]; tensor var_9396_begin_0 = const()[name = tensor("op_9396_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_9396_end_0 = const()[name = tensor("op_9396_end_0"), val = tensor([2, 77, 1, 560])]; tensor var_9396_end_mask_0 = const()[name = tensor("op_9396_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9396_cast_fp16 = slice_by_index(begin = var_9396_begin_0, end = var_9396_end_0, end_mask = var_9396_end_mask_0, x = transpose_8)[name = tensor("op_9396_cast_fp16")]; tensor var_9400_begin_0 = const()[name = tensor("op_9400_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_9400_end_0 = const()[name = tensor("op_9400_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_9400_end_mask_0 = const()[name = tensor("op_9400_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9400_cast_fp16 = slice_by_index(begin = var_9400_begin_0, end = var_9400_end_0, end_mask = var_9400_end_mask_0, x = transpose_8)[name = tensor("op_9400_cast_fp16")]; tensor var_9402_begin_0 = const()[name = tensor("op_9402_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9402_end_0 = const()[name = tensor("op_9402_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_9402_end_mask_0 = const()[name = tensor("op_9402_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9402_cast_fp16 = slice_by_index(begin = var_9402_begin_0, end = var_9402_end_0, end_mask = var_9402_end_mask_0, x = v_47_cast_fp16)[name = tensor("op_9402_cast_fp16")]; tensor var_9406_begin_0 = const()[name = tensor("op_9406_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_9406_end_0 = const()[name = tensor("op_9406_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_9406_end_mask_0 = const()[name = tensor("op_9406_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9406_cast_fp16 = slice_by_index(begin = var_9406_begin_0, end = var_9406_end_0, end_mask = var_9406_end_mask_0, x = v_47_cast_fp16)[name = tensor("op_9406_cast_fp16")]; tensor var_9410_begin_0 = const()[name = tensor("op_9410_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_9410_end_0 = const()[name = tensor("op_9410_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_9410_end_mask_0 = const()[name = tensor("op_9410_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9410_cast_fp16 = slice_by_index(begin = var_9410_begin_0, end = var_9410_end_0, end_mask = var_9410_end_mask_0, x = v_47_cast_fp16)[name = tensor("op_9410_cast_fp16")]; tensor var_9414_begin_0 = const()[name = tensor("op_9414_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_9414_end_0 = const()[name = tensor("op_9414_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_9414_end_mask_0 = const()[name = tensor("op_9414_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9414_cast_fp16 = slice_by_index(begin = var_9414_begin_0, end = var_9414_end_0, end_mask = var_9414_end_mask_0, x = v_47_cast_fp16)[name = tensor("op_9414_cast_fp16")]; tensor var_9418_begin_0 = const()[name = tensor("op_9418_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9418_end_0 = const()[name = tensor("op_9418_end_0"), val = tensor([2, 400, 1, 77])]; tensor var_9418_end_mask_0 = const()[name = tensor("op_9418_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9418_cast_fp16 = slice_by_index(begin = var_9418_begin_0, end = var_9418_end_0, end_mask = var_9418_end_mask_0, x = v_47_cast_fp16)[name = tensor("op_9418_cast_fp16")]; tensor var_9422_begin_0 = const()[name = tensor("op_9422_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_9422_end_0 = const()[name = tensor("op_9422_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_9422_end_mask_0 = const()[name = tensor("op_9422_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9422_cast_fp16 = slice_by_index(begin = var_9422_begin_0, end = var_9422_end_0, end_mask = var_9422_end_mask_0, x = v_47_cast_fp16)[name = tensor("op_9422_cast_fp16")]; tensor var_9426_begin_0 = const()[name = tensor("op_9426_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_9426_end_0 = const()[name = tensor("op_9426_end_0"), val = tensor([2, 560, 1, 77])]; tensor var_9426_end_mask_0 = const()[name = tensor("op_9426_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9426_cast_fp16 = slice_by_index(begin = var_9426_begin_0, end = var_9426_end_0, end_mask = var_9426_end_mask_0, x = v_47_cast_fp16)[name = tensor("op_9426_cast_fp16")]; tensor var_9430_begin_0 = const()[name = tensor("op_9430_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_9430_end_0 = const()[name = tensor("op_9430_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_9430_end_mask_0 = const()[name = tensor("op_9430_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9430_cast_fp16 = slice_by_index(begin = var_9430_begin_0, end = var_9430_end_0, end_mask = var_9430_end_mask_0, x = v_47_cast_fp16)[name = tensor("op_9430_cast_fp16")]; tensor var_9434_equation_0 = const()[name = tensor("op_9434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9434_cast_fp16 = einsum(equation = var_9434_equation_0, values = (var_9372_cast_fp16, var_9352_cast_fp16))[name = tensor("op_9434_cast_fp16")]; tensor var_9435_to_fp16 = const()[name = tensor("op_9435_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_737_cast_fp16 = mul(x = var_9434_cast_fp16, y = var_9435_to_fp16)[name = tensor("aw_chunk_737_cast_fp16")]; tensor var_9438_equation_0 = const()[name = tensor("op_9438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9438_cast_fp16 = einsum(equation = var_9438_equation_0, values = (var_9372_cast_fp16, var_9353_cast_fp16))[name = tensor("op_9438_cast_fp16")]; tensor var_9439_to_fp16 = const()[name = tensor("op_9439_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_739_cast_fp16 = mul(x = var_9438_cast_fp16, y = var_9439_to_fp16)[name = tensor("aw_chunk_739_cast_fp16")]; tensor var_9442_equation_0 = const()[name = tensor("op_9442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9442_cast_fp16 = einsum(equation = var_9442_equation_0, values = (var_9376_cast_fp16, var_9354_cast_fp16))[name = tensor("op_9442_cast_fp16")]; tensor var_9443_to_fp16 = const()[name = tensor("op_9443_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_741_cast_fp16 = mul(x = var_9442_cast_fp16, y = var_9443_to_fp16)[name = tensor("aw_chunk_741_cast_fp16")]; tensor var_9446_equation_0 = const()[name = tensor("op_9446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9446_cast_fp16 = einsum(equation = var_9446_equation_0, values = (var_9376_cast_fp16, var_9355_cast_fp16))[name = tensor("op_9446_cast_fp16")]; tensor var_9447_to_fp16 = const()[name = tensor("op_9447_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_743_cast_fp16 = mul(x = var_9446_cast_fp16, y = var_9447_to_fp16)[name = tensor("aw_chunk_743_cast_fp16")]; tensor var_9450_equation_0 = const()[name = tensor("op_9450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9450_cast_fp16 = einsum(equation = var_9450_equation_0, values = (var_9380_cast_fp16, var_9356_cast_fp16))[name = tensor("op_9450_cast_fp16")]; tensor var_9451_to_fp16 = const()[name = tensor("op_9451_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_745_cast_fp16 = mul(x = var_9450_cast_fp16, y = var_9451_to_fp16)[name = tensor("aw_chunk_745_cast_fp16")]; tensor var_9454_equation_0 = const()[name = tensor("op_9454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9454_cast_fp16 = einsum(equation = var_9454_equation_0, values = (var_9380_cast_fp16, var_9357_cast_fp16))[name = tensor("op_9454_cast_fp16")]; tensor var_9455_to_fp16 = const()[name = tensor("op_9455_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_747_cast_fp16 = mul(x = var_9454_cast_fp16, y = var_9455_to_fp16)[name = tensor("aw_chunk_747_cast_fp16")]; tensor var_9458_equation_0 = const()[name = tensor("op_9458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9458_cast_fp16 = einsum(equation = var_9458_equation_0, values = (var_9384_cast_fp16, var_9358_cast_fp16))[name = tensor("op_9458_cast_fp16")]; tensor var_9459_to_fp16 = const()[name = tensor("op_9459_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_749_cast_fp16 = mul(x = var_9458_cast_fp16, y = var_9459_to_fp16)[name = tensor("aw_chunk_749_cast_fp16")]; tensor var_9462_equation_0 = const()[name = tensor("op_9462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9462_cast_fp16 = einsum(equation = var_9462_equation_0, values = (var_9384_cast_fp16, var_9359_cast_fp16))[name = tensor("op_9462_cast_fp16")]; tensor var_9463_to_fp16 = const()[name = tensor("op_9463_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_751_cast_fp16 = mul(x = var_9462_cast_fp16, y = var_9463_to_fp16)[name = tensor("aw_chunk_751_cast_fp16")]; tensor var_9466_equation_0 = const()[name = tensor("op_9466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9466_cast_fp16 = einsum(equation = var_9466_equation_0, values = (var_9388_cast_fp16, var_9360_cast_fp16))[name = tensor("op_9466_cast_fp16")]; tensor var_9467_to_fp16 = const()[name = tensor("op_9467_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_753_cast_fp16 = mul(x = var_9466_cast_fp16, y = var_9467_to_fp16)[name = tensor("aw_chunk_753_cast_fp16")]; tensor var_9470_equation_0 = const()[name = tensor("op_9470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9470_cast_fp16 = einsum(equation = var_9470_equation_0, values = (var_9388_cast_fp16, var_9361_cast_fp16))[name = tensor("op_9470_cast_fp16")]; tensor var_9471_to_fp16 = const()[name = tensor("op_9471_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_755_cast_fp16 = mul(x = var_9470_cast_fp16, y = var_9471_to_fp16)[name = tensor("aw_chunk_755_cast_fp16")]; tensor var_9474_equation_0 = const()[name = tensor("op_9474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9474_cast_fp16 = einsum(equation = var_9474_equation_0, values = (var_9392_cast_fp16, var_9362_cast_fp16))[name = tensor("op_9474_cast_fp16")]; tensor var_9475_to_fp16 = const()[name = tensor("op_9475_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_757_cast_fp16 = mul(x = var_9474_cast_fp16, y = var_9475_to_fp16)[name = tensor("aw_chunk_757_cast_fp16")]; tensor var_9478_equation_0 = const()[name = tensor("op_9478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9478_cast_fp16 = einsum(equation = var_9478_equation_0, values = (var_9392_cast_fp16, var_9363_cast_fp16))[name = tensor("op_9478_cast_fp16")]; tensor var_9479_to_fp16 = const()[name = tensor("op_9479_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_759_cast_fp16 = mul(x = var_9478_cast_fp16, y = var_9479_to_fp16)[name = tensor("aw_chunk_759_cast_fp16")]; tensor var_9482_equation_0 = const()[name = tensor("op_9482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9482_cast_fp16 = einsum(equation = var_9482_equation_0, values = (var_9396_cast_fp16, var_9364_cast_fp16))[name = tensor("op_9482_cast_fp16")]; tensor var_9483_to_fp16 = const()[name = tensor("op_9483_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_761_cast_fp16 = mul(x = var_9482_cast_fp16, y = var_9483_to_fp16)[name = tensor("aw_chunk_761_cast_fp16")]; tensor var_9486_equation_0 = const()[name = tensor("op_9486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9486_cast_fp16 = einsum(equation = var_9486_equation_0, values = (var_9396_cast_fp16, var_9365_cast_fp16))[name = tensor("op_9486_cast_fp16")]; tensor var_9487_to_fp16 = const()[name = tensor("op_9487_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_763_cast_fp16 = mul(x = var_9486_cast_fp16, y = var_9487_to_fp16)[name = tensor("aw_chunk_763_cast_fp16")]; tensor var_9490_equation_0 = const()[name = tensor("op_9490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9490_cast_fp16 = einsum(equation = var_9490_equation_0, values = (var_9400_cast_fp16, var_9366_cast_fp16))[name = tensor("op_9490_cast_fp16")]; tensor var_9491_to_fp16 = const()[name = tensor("op_9491_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_765_cast_fp16 = mul(x = var_9490_cast_fp16, y = var_9491_to_fp16)[name = tensor("aw_chunk_765_cast_fp16")]; tensor var_9494_equation_0 = const()[name = tensor("op_9494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9494_cast_fp16 = einsum(equation = var_9494_equation_0, values = (var_9400_cast_fp16, var_9367_cast_fp16))[name = tensor("op_9494_cast_fp16")]; tensor var_9495_to_fp16 = const()[name = tensor("op_9495_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_767_cast_fp16 = mul(x = var_9494_cast_fp16, y = var_9495_to_fp16)[name = tensor("aw_chunk_767_cast_fp16")]; tensor var_9497_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_737_cast_fp16)[name = tensor("op_9497_cast_fp16")]; tensor var_9498_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_739_cast_fp16)[name = tensor("op_9498_cast_fp16")]; tensor var_9499_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_741_cast_fp16)[name = tensor("op_9499_cast_fp16")]; tensor var_9500_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_743_cast_fp16)[name = tensor("op_9500_cast_fp16")]; tensor var_9501_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_745_cast_fp16)[name = tensor("op_9501_cast_fp16")]; tensor var_9502_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_747_cast_fp16)[name = tensor("op_9502_cast_fp16")]; tensor var_9503_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_749_cast_fp16)[name = tensor("op_9503_cast_fp16")]; tensor var_9504_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_751_cast_fp16)[name = tensor("op_9504_cast_fp16")]; tensor var_9505_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_753_cast_fp16)[name = tensor("op_9505_cast_fp16")]; tensor var_9506_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_755_cast_fp16)[name = tensor("op_9506_cast_fp16")]; tensor var_9507_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_757_cast_fp16)[name = tensor("op_9507_cast_fp16")]; tensor var_9508_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_759_cast_fp16)[name = tensor("op_9508_cast_fp16")]; tensor var_9509_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_761_cast_fp16)[name = tensor("op_9509_cast_fp16")]; tensor var_9510_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_763_cast_fp16)[name = tensor("op_9510_cast_fp16")]; tensor var_9511_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_765_cast_fp16)[name = tensor("op_9511_cast_fp16")]; tensor var_9512_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_767_cast_fp16)[name = tensor("op_9512_cast_fp16")]; tensor var_9514_equation_0 = const()[name = tensor("op_9514_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9514_cast_fp16 = einsum(equation = var_9514_equation_0, values = (var_9402_cast_fp16, var_9497_cast_fp16))[name = tensor("op_9514_cast_fp16")]; tensor var_9516_equation_0 = const()[name = tensor("op_9516_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9516_cast_fp16 = einsum(equation = var_9516_equation_0, values = (var_9402_cast_fp16, var_9498_cast_fp16))[name = tensor("op_9516_cast_fp16")]; tensor var_9518_equation_0 = const()[name = tensor("op_9518_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9518_cast_fp16 = einsum(equation = var_9518_equation_0, values = (var_9406_cast_fp16, var_9499_cast_fp16))[name = tensor("op_9518_cast_fp16")]; tensor var_9520_equation_0 = const()[name = tensor("op_9520_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9520_cast_fp16 = einsum(equation = var_9520_equation_0, values = (var_9406_cast_fp16, var_9500_cast_fp16))[name = tensor("op_9520_cast_fp16")]; tensor var_9522_equation_0 = const()[name = tensor("op_9522_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9522_cast_fp16 = einsum(equation = var_9522_equation_0, values = (var_9410_cast_fp16, var_9501_cast_fp16))[name = tensor("op_9522_cast_fp16")]; tensor var_9524_equation_0 = const()[name = tensor("op_9524_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9524_cast_fp16 = einsum(equation = var_9524_equation_0, values = (var_9410_cast_fp16, var_9502_cast_fp16))[name = tensor("op_9524_cast_fp16")]; tensor var_9526_equation_0 = const()[name = tensor("op_9526_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9526_cast_fp16 = einsum(equation = var_9526_equation_0, values = (var_9414_cast_fp16, var_9503_cast_fp16))[name = tensor("op_9526_cast_fp16")]; tensor var_9528_equation_0 = const()[name = tensor("op_9528_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9528_cast_fp16 = einsum(equation = var_9528_equation_0, values = (var_9414_cast_fp16, var_9504_cast_fp16))[name = tensor("op_9528_cast_fp16")]; tensor var_9530_equation_0 = const()[name = tensor("op_9530_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9530_cast_fp16 = einsum(equation = var_9530_equation_0, values = (var_9418_cast_fp16, var_9505_cast_fp16))[name = tensor("op_9530_cast_fp16")]; tensor var_9532_equation_0 = const()[name = tensor("op_9532_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9532_cast_fp16 = einsum(equation = var_9532_equation_0, values = (var_9418_cast_fp16, var_9506_cast_fp16))[name = tensor("op_9532_cast_fp16")]; tensor var_9534_equation_0 = const()[name = tensor("op_9534_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9534_cast_fp16 = einsum(equation = var_9534_equation_0, values = (var_9422_cast_fp16, var_9507_cast_fp16))[name = tensor("op_9534_cast_fp16")]; tensor var_9536_equation_0 = const()[name = tensor("op_9536_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9536_cast_fp16 = einsum(equation = var_9536_equation_0, values = (var_9422_cast_fp16, var_9508_cast_fp16))[name = tensor("op_9536_cast_fp16")]; tensor var_9538_equation_0 = const()[name = tensor("op_9538_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9538_cast_fp16 = einsum(equation = var_9538_equation_0, values = (var_9426_cast_fp16, var_9509_cast_fp16))[name = tensor("op_9538_cast_fp16")]; tensor var_9540_equation_0 = const()[name = tensor("op_9540_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9540_cast_fp16 = einsum(equation = var_9540_equation_0, values = (var_9426_cast_fp16, var_9510_cast_fp16))[name = tensor("op_9540_cast_fp16")]; tensor var_9542_equation_0 = const()[name = tensor("op_9542_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9542_cast_fp16 = einsum(equation = var_9542_equation_0, values = (var_9430_cast_fp16, var_9511_cast_fp16))[name = tensor("op_9542_cast_fp16")]; tensor var_9544_equation_0 = const()[name = tensor("op_9544_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9544_cast_fp16 = einsum(equation = var_9544_equation_0, values = (var_9430_cast_fp16, var_9512_cast_fp16))[name = tensor("op_9544_cast_fp16")]; tensor var_9546_interleave_0 = const()[name = tensor("op_9546_interleave_0"), val = tensor(false)]; tensor var_9546_cast_fp16 = concat(axis = var_8160, interleave = var_9546_interleave_0, values = (var_9514_cast_fp16, var_9516_cast_fp16))[name = tensor("op_9546_cast_fp16")]; tensor var_9548_interleave_0 = const()[name = tensor("op_9548_interleave_0"), val = tensor(false)]; tensor var_9548_cast_fp16 = concat(axis = var_8160, interleave = var_9548_interleave_0, values = (var_9518_cast_fp16, var_9520_cast_fp16))[name = tensor("op_9548_cast_fp16")]; tensor var_9550_interleave_0 = const()[name = tensor("op_9550_interleave_0"), val = tensor(false)]; tensor var_9550_cast_fp16 = concat(axis = var_8160, interleave = var_9550_interleave_0, values = (var_9522_cast_fp16, var_9524_cast_fp16))[name = tensor("op_9550_cast_fp16")]; tensor var_9552_interleave_0 = const()[name = tensor("op_9552_interleave_0"), val = tensor(false)]; tensor var_9552_cast_fp16 = concat(axis = var_8160, interleave = var_9552_interleave_0, values = (var_9526_cast_fp16, var_9528_cast_fp16))[name = tensor("op_9552_cast_fp16")]; tensor var_9554_interleave_0 = const()[name = tensor("op_9554_interleave_0"), val = tensor(false)]; tensor var_9554_cast_fp16 = concat(axis = var_8160, interleave = var_9554_interleave_0, values = (var_9530_cast_fp16, var_9532_cast_fp16))[name = tensor("op_9554_cast_fp16")]; tensor var_9556_interleave_0 = const()[name = tensor("op_9556_interleave_0"), val = tensor(false)]; tensor var_9556_cast_fp16 = concat(axis = var_8160, interleave = var_9556_interleave_0, values = (var_9534_cast_fp16, var_9536_cast_fp16))[name = tensor("op_9556_cast_fp16")]; tensor var_9558_interleave_0 = const()[name = tensor("op_9558_interleave_0"), val = tensor(false)]; tensor var_9558_cast_fp16 = concat(axis = var_8160, interleave = var_9558_interleave_0, values = (var_9538_cast_fp16, var_9540_cast_fp16))[name = tensor("op_9558_cast_fp16")]; tensor var_9560_interleave_0 = const()[name = tensor("op_9560_interleave_0"), val = tensor(false)]; tensor var_9560_cast_fp16 = concat(axis = var_8160, interleave = var_9560_interleave_0, values = (var_9542_cast_fp16, var_9544_cast_fp16))[name = tensor("op_9560_cast_fp16")]; tensor input_411_interleave_0 = const()[name = tensor("input_411_interleave_0"), val = tensor(false)]; tensor input_411_cast_fp16 = concat(axis = var_8182, interleave = input_411_interleave_0, values = (var_9546_cast_fp16, var_9548_cast_fp16, var_9550_cast_fp16, var_9552_cast_fp16, var_9554_cast_fp16, var_9556_cast_fp16, var_9558_cast_fp16, var_9560_cast_fp16))[name = tensor("input_411_cast_fp16")]; tensor var_9566 = const()[name = tensor("op_9566"), val = tensor([1, 1])]; tensor var_9568 = const()[name = tensor("op_9568"), val = tensor([1, 1])]; tensor var_9570_pad_type_0 = const()[name = tensor("op_9570_pad_type_0"), val = tensor("custom")]; tensor var_9570_pad_0 = const()[name = tensor("op_9570_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1622910400)))]; tensor up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1623729664)))]; tensor var_9570_cast_fp16 = conv(bias = up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_9568, groups = var_8182, pad = var_9570_pad_0, pad_type = var_9570_pad_type_0, strides = var_9566, weight = up_blocks_2_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_411_cast_fp16)[name = tensor("op_9570_cast_fp16")]; tensor inputs_71_cast_fp16 = add(x = var_9570_cast_fp16, y = inputs_69_cast_fp16)[name = tensor("inputs_71_cast_fp16")]; tensor input_413_axes_0 = const()[name = tensor("input_413_axes_0"), val = tensor([1])]; tensor input_413_gamma_0_to_fp16 = const()[name = tensor("input_413_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1623731008)))]; tensor input_413_beta_0_to_fp16 = const()[name = tensor("input_413_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1623732352)))]; tensor var_9580_to_fp16 = const()[name = tensor("op_9580_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_413_cast_fp16 = layer_norm(axes = input_413_axes_0, beta = input_413_beta_0_to_fp16, epsilon = var_9580_to_fp16, gamma = input_413_gamma_0_to_fp16, x = inputs_71_cast_fp16)[name = tensor("input_413_cast_fp16")]; tensor var_9596 = const()[name = tensor("op_9596"), val = tensor([1, 1])]; tensor var_9598 = const()[name = tensor("op_9598"), val = tensor([1, 1])]; tensor var_9600_pad_type_0 = const()[name = tensor("op_9600_pad_type_0"), val = tensor("custom")]; tensor var_9600_pad_0 = const()[name = tensor("op_9600_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1623733696)))]; tensor up_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1630287360)))]; tensor var_9600_cast_fp16 = conv(bias = up_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_9598, groups = var_8182, pad = var_9600_pad_0, pad_type = var_9600_pad_type_0, strides = var_9596, weight = up_blocks_2_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_413_cast_fp16)[name = tensor("op_9600_cast_fp16")]; tensor var_9601_split_sizes_0 = const()[name = tensor("op_9601_split_sizes_0"), val = tensor([2560, 2560])]; tensor var_9601_axis_0 = const()[name = tensor("op_9601_axis_0"), val = tensor(1)]; tensor var_9601_cast_fp16_0, tensor var_9601_cast_fp16_1 = split(axis = var_9601_axis_0, split_sizes = var_9601_split_sizes_0, x = var_9600_cast_fp16)[name = tensor("op_9601_cast_fp16")]; tensor var_9603_mode_0 = const()[name = tensor("op_9603_mode_0"), val = tensor("EXACT")]; tensor var_9603_cast_fp16 = gelu(mode = var_9603_mode_0, x = var_9601_cast_fp16_1)[name = tensor("op_9603_cast_fp16")]; tensor input_415_cast_fp16 = mul(x = var_9601_cast_fp16_0, y = var_9603_cast_fp16)[name = tensor("input_415_cast_fp16")]; tensor var_9607 = const()[name = tensor("op_9607"), val = tensor([1, 1])]; tensor var_9609 = const()[name = tensor("op_9609"), val = tensor([1, 1])]; tensor var_9611_pad_type_0 = const()[name = tensor("op_9611_pad_type_0"), val = tensor("custom")]; tensor var_9611_pad_0 = const()[name = tensor("op_9611_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1630297664)))]; tensor up_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1633574528)))]; tensor var_9611_cast_fp16 = conv(bias = up_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_9609, groups = var_8182, pad = var_9611_pad_0, pad_type = var_9611_pad_type_0, strides = var_9607, weight = up_blocks_2_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_415_cast_fp16)[name = tensor("op_9611_cast_fp16")]; tensor hidden_states_253_cast_fp16 = add(x = var_9611_cast_fp16, y = inputs_71_cast_fp16)[name = tensor("hidden_states_253_cast_fp16")]; tensor var_9613 = const()[name = tensor("op_9613"), val = tensor([2, 640, 32, 32])]; tensor input_417_cast_fp16 = reshape(shape = var_9613, x = hidden_states_253_cast_fp16)[name = tensor("input_417_cast_fp16")]; tensor var_9617 = const()[name = tensor("op_9617"), val = tensor([1, 1])]; tensor var_9619 = const()[name = tensor("op_9619"), val = tensor([1, 1])]; tensor hidden_states_255_pad_type_0 = const()[name = tensor("hidden_states_255_pad_type_0"), val = tensor("custom")]; tensor hidden_states_255_pad_0 = const()[name = tensor("hidden_states_255_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_1_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1633575872)))]; tensor up_blocks_2_attentions_1_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_1_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1634395136)))]; tensor hidden_states_255_cast_fp16 = conv(bias = up_blocks_2_attentions_1_proj_out_bias_to_fp16, dilations = var_9619, groups = var_8182, pad = hidden_states_255_pad_0, pad_type = hidden_states_255_pad_type_0, strides = var_9617, weight = up_blocks_2_attentions_1_proj_out_weight_to_fp16, x = input_417_cast_fp16)[name = tensor("hidden_states_255_cast_fp16")]; tensor hidden_states_257_cast_fp16 = add(x = hidden_states_255_cast_fp16, y = hidden_states_243_cast_fp16)[name = tensor("hidden_states_257_cast_fp16")]; tensor input_419_interleave_0 = const()[name = tensor("input_419_interleave_0"), val = tensor(false)]; tensor input_419_cast_fp16 = concat(axis = var_8182, interleave = input_419_interleave_0, values = (hidden_states_257_cast_fp16, input_63_cast_fp16))[name = tensor("input_419_cast_fp16")]; tensor reshape_192_shape_0 = const()[name = tensor("reshape_192_shape_0"), val = tensor([2, 32, 30, 32, 32])]; tensor reshape_192_cast_fp16 = reshape(shape = reshape_192_shape_0, x = input_419_cast_fp16)[name = tensor("reshape_192_cast_fp16")]; tensor reduce_mean_144_axes_0 = const()[name = tensor("reduce_mean_144_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_144_keep_dims_0 = const()[name = tensor("reduce_mean_144_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_144_cast_fp16 = reduce_mean(axes = reduce_mean_144_axes_0, keep_dims = reduce_mean_144_keep_dims_0, x = reshape_192_cast_fp16)[name = tensor("reduce_mean_144_cast_fp16")]; tensor sub_96_cast_fp16 = sub(x = reshape_192_cast_fp16, y = reduce_mean_144_cast_fp16)[name = tensor("sub_96_cast_fp16")]; tensor square_48_cast_fp16 = square(x = sub_96_cast_fp16)[name = tensor("square_48_cast_fp16")]; tensor reduce_mean_146_axes_0 = const()[name = tensor("reduce_mean_146_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_146_keep_dims_0 = const()[name = tensor("reduce_mean_146_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_146_cast_fp16 = reduce_mean(axes = reduce_mean_146_axes_0, keep_dims = reduce_mean_146_keep_dims_0, x = square_48_cast_fp16)[name = tensor("reduce_mean_146_cast_fp16")]; tensor add_96_y_0_to_fp16 = const()[name = tensor("add_96_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_96_cast_fp16 = add(x = reduce_mean_146_cast_fp16, y = add_96_y_0_to_fp16)[name = tensor("add_96_cast_fp16")]; tensor sqrt_48_cast_fp16 = sqrt(x = add_96_cast_fp16)[name = tensor("sqrt_48_cast_fp16")]; tensor real_div_48_cast_fp16 = real_div(x = sub_96_cast_fp16, y = sqrt_48_cast_fp16)[name = tensor("real_div_48_cast_fp16")]; tensor reshape_193_shape_0 = const()[name = tensor("reshape_193_shape_0"), val = tensor([2, 960, 32, 32])]; tensor reshape_193_cast_fp16 = reshape(shape = reshape_193_shape_0, x = real_div_48_cast_fp16)[name = tensor("reshape_193_cast_fp16")]; tensor add_97_mean_0_to_fp16 = const()[name = tensor("add_97_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1634396480)))]; tensor add_97_variance_0_to_fp16 = const()[name = tensor("add_97_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1634398464)))]; tensor add_97_gamma_0_to_fp16 = const()[name = tensor("add_97_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1634400448)))]; tensor add_97_beta_0_to_fp16 = const()[name = tensor("add_97_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1634402432)))]; tensor add_97_epsilon_0_to_fp16 = const()[name = tensor("add_97_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_97_cast_fp16 = batch_norm(beta = add_97_beta_0_to_fp16, epsilon = add_97_epsilon_0_to_fp16, gamma = add_97_gamma_0_to_fp16, mean = add_97_mean_0_to_fp16, variance = add_97_variance_0_to_fp16, x = reshape_193_cast_fp16)[name = tensor("add_97_cast_fp16")]; tensor input_423_cast_fp16 = silu(x = add_97_cast_fp16)[name = tensor("input_423_cast_fp16")]; tensor var_9637 = const()[name = tensor("op_9637"), val = tensor([1, 1])]; tensor var_9639 = const()[name = tensor("op_9639"), val = tensor([1, 1])]; tensor hidden_states_259_pad_type_0 = const()[name = tensor("hidden_states_259_pad_type_0"), val = tensor("custom")]; tensor hidden_states_259_pad_0 = const()[name = tensor("hidden_states_259_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_2_resnets_2_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_2_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1634404416)))]; tensor up_blocks_2_resnets_2_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_2_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1645463680)))]; tensor hidden_states_259_cast_fp16 = conv(bias = up_blocks_2_resnets_2_conv1_bias_to_fp16, dilations = var_9639, groups = var_8182, pad = hidden_states_259_pad_0, pad_type = hidden_states_259_pad_type_0, strides = var_9637, weight = up_blocks_2_resnets_2_conv1_weight_to_fp16, x = input_423_cast_fp16)[name = tensor("hidden_states_259_cast_fp16")]; tensor var_9645 = const()[name = tensor("op_9645"), val = tensor([1, 1])]; tensor var_9647 = const()[name = tensor("op_9647"), val = tensor([1, 1])]; tensor temb_37_pad_type_0 = const()[name = tensor("temb_37_pad_type_0"), val = tensor("custom")]; tensor temb_37_pad_0 = const()[name = tensor("temb_37_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_resnets_2_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_2_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1645465024)))]; tensor up_blocks_2_resnets_2_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_2_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1647103488)))]; tensor temb_37_cast_fp16 = conv(bias = up_blocks_2_resnets_2_time_emb_proj_bias_to_fp16, dilations = var_9647, groups = var_8182, pad = temb_37_pad_0, pad_type = temb_37_pad_type_0, strides = var_9645, weight = up_blocks_2_resnets_2_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_37_cast_fp16")]; tensor input_427_cast_fp16 = add(x = hidden_states_259_cast_fp16, y = temb_37_cast_fp16)[name = tensor("input_427_cast_fp16")]; tensor reshape_196_shape_0 = const()[name = tensor("reshape_196_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_196_cast_fp16 = reshape(shape = reshape_196_shape_0, x = input_427_cast_fp16)[name = tensor("reshape_196_cast_fp16")]; tensor reduce_mean_147_axes_0 = const()[name = tensor("reduce_mean_147_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_147_keep_dims_0 = const()[name = tensor("reduce_mean_147_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_147_cast_fp16 = reduce_mean(axes = reduce_mean_147_axes_0, keep_dims = reduce_mean_147_keep_dims_0, x = reshape_196_cast_fp16)[name = tensor("reduce_mean_147_cast_fp16")]; tensor sub_98_cast_fp16 = sub(x = reshape_196_cast_fp16, y = reduce_mean_147_cast_fp16)[name = tensor("sub_98_cast_fp16")]; tensor square_49_cast_fp16 = square(x = sub_98_cast_fp16)[name = tensor("square_49_cast_fp16")]; tensor reduce_mean_149_axes_0 = const()[name = tensor("reduce_mean_149_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_149_keep_dims_0 = const()[name = tensor("reduce_mean_149_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_149_cast_fp16 = reduce_mean(axes = reduce_mean_149_axes_0, keep_dims = reduce_mean_149_keep_dims_0, x = square_49_cast_fp16)[name = tensor("reduce_mean_149_cast_fp16")]; tensor add_98_y_0_to_fp16 = const()[name = tensor("add_98_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_98_cast_fp16 = add(x = reduce_mean_149_cast_fp16, y = add_98_y_0_to_fp16)[name = tensor("add_98_cast_fp16")]; tensor sqrt_49_cast_fp16 = sqrt(x = add_98_cast_fp16)[name = tensor("sqrt_49_cast_fp16")]; tensor real_div_49_cast_fp16 = real_div(x = sub_98_cast_fp16, y = sqrt_49_cast_fp16)[name = tensor("real_div_49_cast_fp16")]; tensor reshape_197_shape_0 = const()[name = tensor("reshape_197_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_197_cast_fp16 = reshape(shape = reshape_197_shape_0, x = real_div_49_cast_fp16)[name = tensor("reshape_197_cast_fp16")]; tensor add_99_gamma_0_to_fp16 = const()[name = tensor("add_99_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1647104832)))]; tensor add_99_beta_0_to_fp16 = const()[name = tensor("add_99_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1647106176)))]; tensor add_99_epsilon_0_to_fp16 = const()[name = tensor("add_99_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_99_cast_fp16 = batch_norm(beta = add_99_beta_0_to_fp16, epsilon = add_99_epsilon_0_to_fp16, gamma = add_99_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_197_cast_fp16)[name = tensor("add_99_cast_fp16")]; tensor input_431_cast_fp16 = silu(x = add_99_cast_fp16)[name = tensor("input_431_cast_fp16")]; tensor var_9657 = const()[name = tensor("op_9657"), val = tensor([1, 1])]; tensor var_9659 = const()[name = tensor("op_9659"), val = tensor([1, 1])]; tensor hidden_states_261_pad_type_0 = const()[name = tensor("hidden_states_261_pad_type_0"), val = tensor("custom")]; tensor hidden_states_261_pad_0 = const()[name = tensor("hidden_states_261_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_2_resnets_2_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_2_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1647107520)))]; tensor up_blocks_2_resnets_2_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_2_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1654480384)))]; tensor hidden_states_261_cast_fp16 = conv(bias = up_blocks_2_resnets_2_conv2_bias_to_fp16, dilations = var_9659, groups = var_8182, pad = hidden_states_261_pad_0, pad_type = hidden_states_261_pad_type_0, strides = var_9657, weight = up_blocks_2_resnets_2_conv2_weight_to_fp16, x = input_431_cast_fp16)[name = tensor("hidden_states_261_cast_fp16")]; tensor var_9664 = const()[name = tensor("op_9664"), val = tensor([1, 1])]; tensor var_9666 = const()[name = tensor("op_9666"), val = tensor([1, 1])]; tensor x_21_pad_type_0 = const()[name = tensor("x_21_pad_type_0"), val = tensor("custom")]; tensor x_21_pad_0 = const()[name = tensor("x_21_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_resnets_2_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_2_resnets_2_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1654481728)))]; tensor up_blocks_2_resnets_2_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_2_resnets_2_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1655710592)))]; tensor x_21_cast_fp16 = conv(bias = up_blocks_2_resnets_2_conv_shortcut_bias_to_fp16, dilations = var_9666, groups = var_8182, pad = x_21_pad_0, pad_type = x_21_pad_type_0, strides = var_9664, weight = up_blocks_2_resnets_2_conv_shortcut_weight_to_fp16, x = input_419_cast_fp16)[name = tensor("x_21_cast_fp16")]; tensor hidden_states_263_cast_fp16 = add(x = x_21_cast_fp16, y = hidden_states_261_cast_fp16)[name = tensor("hidden_states_263_cast_fp16")]; tensor reshape_200_shape_0 = const()[name = tensor("reshape_200_shape_0"), val = tensor([2, 32, 20, 32, 32])]; tensor reshape_200_cast_fp16 = reshape(shape = reshape_200_shape_0, x = hidden_states_263_cast_fp16)[name = tensor("reshape_200_cast_fp16")]; tensor reduce_mean_150_axes_0 = const()[name = tensor("reduce_mean_150_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_150_keep_dims_0 = const()[name = tensor("reduce_mean_150_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_150_cast_fp16 = reduce_mean(axes = reduce_mean_150_axes_0, keep_dims = reduce_mean_150_keep_dims_0, x = reshape_200_cast_fp16)[name = tensor("reduce_mean_150_cast_fp16")]; tensor sub_100_cast_fp16 = sub(x = reshape_200_cast_fp16, y = reduce_mean_150_cast_fp16)[name = tensor("sub_100_cast_fp16")]; tensor square_50_cast_fp16 = square(x = sub_100_cast_fp16)[name = tensor("square_50_cast_fp16")]; tensor reduce_mean_152_axes_0 = const()[name = tensor("reduce_mean_152_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_152_keep_dims_0 = const()[name = tensor("reduce_mean_152_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_152_cast_fp16 = reduce_mean(axes = reduce_mean_152_axes_0, keep_dims = reduce_mean_152_keep_dims_0, x = square_50_cast_fp16)[name = tensor("reduce_mean_152_cast_fp16")]; tensor add_100_y_0_to_fp16 = const()[name = tensor("add_100_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_100_cast_fp16 = add(x = reduce_mean_152_cast_fp16, y = add_100_y_0_to_fp16)[name = tensor("add_100_cast_fp16")]; tensor sqrt_50_cast_fp16 = sqrt(x = add_100_cast_fp16)[name = tensor("sqrt_50_cast_fp16")]; tensor real_div_50_cast_fp16 = real_div(x = sub_100_cast_fp16, y = sqrt_50_cast_fp16)[name = tensor("real_div_50_cast_fp16")]; tensor reshape_201_shape_0 = const()[name = tensor("reshape_201_shape_0"), val = tensor([2, 640, 32, 32])]; tensor reshape_201_cast_fp16 = reshape(shape = reshape_201_shape_0, x = real_div_50_cast_fp16)[name = tensor("reshape_201_cast_fp16")]; tensor add_101_gamma_0_to_fp16 = const()[name = tensor("add_101_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1655711936)))]; tensor add_101_beta_0_to_fp16 = const()[name = tensor("add_101_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1655713280)))]; tensor add_101_epsilon_0_to_fp16 = const()[name = tensor("add_101_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_101_cast_fp16 = batch_norm(beta = add_101_beta_0_to_fp16, epsilon = add_101_epsilon_0_to_fp16, gamma = add_101_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_201_cast_fp16)[name = tensor("add_101_cast_fp16")]; tensor var_9686 = const()[name = tensor("op_9686"), val = tensor([1, 1])]; tensor var_9688 = const()[name = tensor("op_9688"), val = tensor([1, 1])]; tensor hidden_states_265_pad_type_0 = const()[name = tensor("hidden_states_265_pad_type_0"), val = tensor("custom")]; tensor hidden_states_265_pad_0 = const()[name = tensor("hidden_states_265_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1655714624)))]; tensor up_blocks_2_attentions_2_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1656533888)))]; tensor hidden_states_265_cast_fp16 = conv(bias = up_blocks_2_attentions_2_proj_in_bias_to_fp16, dilations = var_9688, groups = var_8182, pad = hidden_states_265_pad_0, pad_type = hidden_states_265_pad_type_0, strides = var_9686, weight = up_blocks_2_attentions_2_proj_in_weight_to_fp16, x = add_101_cast_fp16)[name = tensor("hidden_states_265_cast_fp16")]; tensor var_9693 = const()[name = tensor("op_9693"), val = tensor([2, 640, 1, 1024])]; tensor inputs_73_cast_fp16 = reshape(shape = var_9693, x = hidden_states_265_cast_fp16)[name = tensor("inputs_73_cast_fp16")]; tensor hidden_states_267_axes_0 = const()[name = tensor("hidden_states_267_axes_0"), val = tensor([1])]; tensor hidden_states_267_gamma_0_to_fp16 = const()[name = tensor("hidden_states_267_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1656535232)))]; tensor hidden_states_267_beta_0_to_fp16 = const()[name = tensor("hidden_states_267_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1656536576)))]; tensor var_9709_to_fp16 = const()[name = tensor("op_9709_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_267_cast_fp16 = layer_norm(axes = hidden_states_267_axes_0, beta = hidden_states_267_beta_0_to_fp16, epsilon = var_9709_to_fp16, gamma = hidden_states_267_gamma_0_to_fp16, x = inputs_73_cast_fp16)[name = tensor("hidden_states_267_cast_fp16")]; tensor var_9724 = const()[name = tensor("op_9724"), val = tensor([1, 1])]; tensor var_9726 = const()[name = tensor("op_9726"), val = tensor([1, 1])]; tensor q_49_pad_type_0 = const()[name = tensor("q_49_pad_type_0"), val = tensor("custom")]; tensor q_49_pad_0 = const()[name = tensor("q_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1656537920)))]; tensor q_49_cast_fp16 = conv(dilations = var_9726, groups = var_8182, pad = q_49_pad_0, pad_type = q_49_pad_type_0, strides = var_9724, weight = up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_267_cast_fp16)[name = tensor("q_49_cast_fp16")]; tensor var_9730 = const()[name = tensor("op_9730"), val = tensor([1, 1])]; tensor var_9732 = const()[name = tensor("op_9732"), val = tensor([1, 1])]; tensor k_97_pad_type_0 = const()[name = tensor("k_97_pad_type_0"), val = tensor("custom")]; tensor k_97_pad_0 = const()[name = tensor("k_97_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1657357184)))]; tensor k_97_cast_fp16 = conv(dilations = var_9732, groups = var_8182, pad = k_97_pad_0, pad_type = k_97_pad_type_0, strides = var_9730, weight = up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_267_cast_fp16)[name = tensor("k_97_cast_fp16")]; tensor var_9736 = const()[name = tensor("op_9736"), val = tensor([1, 1])]; tensor var_9738 = const()[name = tensor("op_9738"), val = tensor([1, 1])]; tensor v_49_pad_type_0 = const()[name = tensor("v_49_pad_type_0"), val = tensor("custom")]; tensor v_49_pad_0 = const()[name = tensor("v_49_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1658176448)))]; tensor v_49_cast_fp16 = conv(dilations = var_9738, groups = var_8182, pad = v_49_pad_0, pad_type = v_49_pad_type_0, strides = var_9736, weight = up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_267_cast_fp16)[name = tensor("v_49_cast_fp16")]; tensor var_9742_begin_0 = const()[name = tensor("op_9742_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9742_end_0 = const()[name = tensor("op_9742_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9742_end_mask_0 = const()[name = tensor("op_9742_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9742_cast_fp16 = slice_by_index(begin = var_9742_begin_0, end = var_9742_end_0, end_mask = var_9742_end_mask_0, x = q_49_cast_fp16)[name = tensor("op_9742_cast_fp16")]; tensor var_9746_begin_0 = const()[name = tensor("op_9746_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_9746_end_0 = const()[name = tensor("op_9746_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_9746_end_mask_0 = const()[name = tensor("op_9746_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9746_cast_fp16 = slice_by_index(begin = var_9746_begin_0, end = var_9746_end_0, end_mask = var_9746_end_mask_0, x = q_49_cast_fp16)[name = tensor("op_9746_cast_fp16")]; tensor var_9750_begin_0 = const()[name = tensor("op_9750_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_9750_end_0 = const()[name = tensor("op_9750_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_9750_end_mask_0 = const()[name = tensor("op_9750_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9750_cast_fp16 = slice_by_index(begin = var_9750_begin_0, end = var_9750_end_0, end_mask = var_9750_end_mask_0, x = q_49_cast_fp16)[name = tensor("op_9750_cast_fp16")]; tensor var_9754_begin_0 = const()[name = tensor("op_9754_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_9754_end_0 = const()[name = tensor("op_9754_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_9754_end_mask_0 = const()[name = tensor("op_9754_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9754_cast_fp16 = slice_by_index(begin = var_9754_begin_0, end = var_9754_end_0, end_mask = var_9754_end_mask_0, x = q_49_cast_fp16)[name = tensor("op_9754_cast_fp16")]; tensor var_9758_begin_0 = const()[name = tensor("op_9758_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9758_end_0 = const()[name = tensor("op_9758_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_9758_end_mask_0 = const()[name = tensor("op_9758_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9758_cast_fp16 = slice_by_index(begin = var_9758_begin_0, end = var_9758_end_0, end_mask = var_9758_end_mask_0, x = q_49_cast_fp16)[name = tensor("op_9758_cast_fp16")]; tensor var_9762_begin_0 = const()[name = tensor("op_9762_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_9762_end_0 = const()[name = tensor("op_9762_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_9762_end_mask_0 = const()[name = tensor("op_9762_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9762_cast_fp16 = slice_by_index(begin = var_9762_begin_0, end = var_9762_end_0, end_mask = var_9762_end_mask_0, x = q_49_cast_fp16)[name = tensor("op_9762_cast_fp16")]; tensor var_9766_begin_0 = const()[name = tensor("op_9766_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_9766_end_0 = const()[name = tensor("op_9766_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_9766_end_mask_0 = const()[name = tensor("op_9766_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9766_cast_fp16 = slice_by_index(begin = var_9766_begin_0, end = var_9766_end_0, end_mask = var_9766_end_mask_0, x = q_49_cast_fp16)[name = tensor("op_9766_cast_fp16")]; tensor var_9770_begin_0 = const()[name = tensor("op_9770_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_9770_end_0 = const()[name = tensor("op_9770_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_9770_end_mask_0 = const()[name = tensor("op_9770_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9770_cast_fp16 = slice_by_index(begin = var_9770_begin_0, end = var_9770_end_0, end_mask = var_9770_end_mask_0, x = q_49_cast_fp16)[name = tensor("op_9770_cast_fp16")]; tensor var_9773_begin_0 = const()[name = tensor("op_9773_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9773_end_0 = const()[name = tensor("op_9773_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9773_end_mask_0 = const()[name = tensor("op_9773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9773_cast_fp16 = slice_by_index(begin = var_9773_begin_0, end = var_9773_end_0, end_mask = var_9773_end_mask_0, x = var_9742_cast_fp16)[name = tensor("op_9773_cast_fp16")]; tensor var_9774_begin_0 = const()[name = tensor("op_9774_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9774_end_0 = const()[name = tensor("op_9774_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9774_end_mask_0 = const()[name = tensor("op_9774_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9774_cast_fp16 = slice_by_index(begin = var_9774_begin_0, end = var_9774_end_0, end_mask = var_9774_end_mask_0, x = var_9742_cast_fp16)[name = tensor("op_9774_cast_fp16")]; tensor var_9775_begin_0 = const()[name = tensor("op_9775_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9775_end_0 = const()[name = tensor("op_9775_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9775_end_mask_0 = const()[name = tensor("op_9775_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9775_cast_fp16 = slice_by_index(begin = var_9775_begin_0, end = var_9775_end_0, end_mask = var_9775_end_mask_0, x = var_9746_cast_fp16)[name = tensor("op_9775_cast_fp16")]; tensor var_9776_begin_0 = const()[name = tensor("op_9776_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9776_end_0 = const()[name = tensor("op_9776_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9776_end_mask_0 = const()[name = tensor("op_9776_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9776_cast_fp16 = slice_by_index(begin = var_9776_begin_0, end = var_9776_end_0, end_mask = var_9776_end_mask_0, x = var_9746_cast_fp16)[name = tensor("op_9776_cast_fp16")]; tensor var_9777_begin_0 = const()[name = tensor("op_9777_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9777_end_0 = const()[name = tensor("op_9777_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9777_end_mask_0 = const()[name = tensor("op_9777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9777_cast_fp16 = slice_by_index(begin = var_9777_begin_0, end = var_9777_end_0, end_mask = var_9777_end_mask_0, x = var_9750_cast_fp16)[name = tensor("op_9777_cast_fp16")]; tensor var_9778_begin_0 = const()[name = tensor("op_9778_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9778_end_0 = const()[name = tensor("op_9778_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9778_end_mask_0 = const()[name = tensor("op_9778_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9778_cast_fp16 = slice_by_index(begin = var_9778_begin_0, end = var_9778_end_0, end_mask = var_9778_end_mask_0, x = var_9750_cast_fp16)[name = tensor("op_9778_cast_fp16")]; tensor var_9779_begin_0 = const()[name = tensor("op_9779_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9779_end_0 = const()[name = tensor("op_9779_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9779_end_mask_0 = const()[name = tensor("op_9779_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9779_cast_fp16 = slice_by_index(begin = var_9779_begin_0, end = var_9779_end_0, end_mask = var_9779_end_mask_0, x = var_9754_cast_fp16)[name = tensor("op_9779_cast_fp16")]; tensor var_9780_begin_0 = const()[name = tensor("op_9780_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9780_end_0 = const()[name = tensor("op_9780_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9780_end_mask_0 = const()[name = tensor("op_9780_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9780_cast_fp16 = slice_by_index(begin = var_9780_begin_0, end = var_9780_end_0, end_mask = var_9780_end_mask_0, x = var_9754_cast_fp16)[name = tensor("op_9780_cast_fp16")]; tensor var_9781_begin_0 = const()[name = tensor("op_9781_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9781_end_0 = const()[name = tensor("op_9781_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9781_end_mask_0 = const()[name = tensor("op_9781_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9781_cast_fp16 = slice_by_index(begin = var_9781_begin_0, end = var_9781_end_0, end_mask = var_9781_end_mask_0, x = var_9758_cast_fp16)[name = tensor("op_9781_cast_fp16")]; tensor var_9782_begin_0 = const()[name = tensor("op_9782_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9782_end_0 = const()[name = tensor("op_9782_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9782_end_mask_0 = const()[name = tensor("op_9782_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9782_cast_fp16 = slice_by_index(begin = var_9782_begin_0, end = var_9782_end_0, end_mask = var_9782_end_mask_0, x = var_9758_cast_fp16)[name = tensor("op_9782_cast_fp16")]; tensor var_9783_begin_0 = const()[name = tensor("op_9783_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9783_end_0 = const()[name = tensor("op_9783_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9783_end_mask_0 = const()[name = tensor("op_9783_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9783_cast_fp16 = slice_by_index(begin = var_9783_begin_0, end = var_9783_end_0, end_mask = var_9783_end_mask_0, x = var_9762_cast_fp16)[name = tensor("op_9783_cast_fp16")]; tensor var_9784_begin_0 = const()[name = tensor("op_9784_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9784_end_0 = const()[name = tensor("op_9784_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9784_end_mask_0 = const()[name = tensor("op_9784_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9784_cast_fp16 = slice_by_index(begin = var_9784_begin_0, end = var_9784_end_0, end_mask = var_9784_end_mask_0, x = var_9762_cast_fp16)[name = tensor("op_9784_cast_fp16")]; tensor var_9785_begin_0 = const()[name = tensor("op_9785_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9785_end_0 = const()[name = tensor("op_9785_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9785_end_mask_0 = const()[name = tensor("op_9785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9785_cast_fp16 = slice_by_index(begin = var_9785_begin_0, end = var_9785_end_0, end_mask = var_9785_end_mask_0, x = var_9766_cast_fp16)[name = tensor("op_9785_cast_fp16")]; tensor var_9786_begin_0 = const()[name = tensor("op_9786_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9786_end_0 = const()[name = tensor("op_9786_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9786_end_mask_0 = const()[name = tensor("op_9786_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9786_cast_fp16 = slice_by_index(begin = var_9786_begin_0, end = var_9786_end_0, end_mask = var_9786_end_mask_0, x = var_9766_cast_fp16)[name = tensor("op_9786_cast_fp16")]; tensor var_9787_begin_0 = const()[name = tensor("op_9787_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9787_end_0 = const()[name = tensor("op_9787_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_9787_end_mask_0 = const()[name = tensor("op_9787_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9787_cast_fp16 = slice_by_index(begin = var_9787_begin_0, end = var_9787_end_0, end_mask = var_9787_end_mask_0, x = var_9770_cast_fp16)[name = tensor("op_9787_cast_fp16")]; tensor var_9788_begin_0 = const()[name = tensor("op_9788_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_9788_end_0 = const()[name = tensor("op_9788_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9788_end_mask_0 = const()[name = tensor("op_9788_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9788_cast_fp16 = slice_by_index(begin = var_9788_begin_0, end = var_9788_end_0, end_mask = var_9788_end_mask_0, x = var_9770_cast_fp16)[name = tensor("op_9788_cast_fp16")]; tensor k_99_perm_0 = const()[name = tensor("k_99_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_9793_begin_0 = const()[name = tensor("op_9793_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9793_end_0 = const()[name = tensor("op_9793_end_0"), val = tensor([2, 1024, 1, 80])]; tensor var_9793_end_mask_0 = const()[name = tensor("op_9793_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_7 = transpose(perm = k_99_perm_0, x = k_97_cast_fp16)[name = tensor("transpose_7")]; tensor var_9793_cast_fp16 = slice_by_index(begin = var_9793_begin_0, end = var_9793_end_0, end_mask = var_9793_end_mask_0, x = transpose_7)[name = tensor("op_9793_cast_fp16")]; tensor var_9797_begin_0 = const()[name = tensor("op_9797_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_9797_end_0 = const()[name = tensor("op_9797_end_0"), val = tensor([2, 1024, 1, 160])]; tensor var_9797_end_mask_0 = const()[name = tensor("op_9797_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9797_cast_fp16 = slice_by_index(begin = var_9797_begin_0, end = var_9797_end_0, end_mask = var_9797_end_mask_0, x = transpose_7)[name = tensor("op_9797_cast_fp16")]; tensor var_9801_begin_0 = const()[name = tensor("op_9801_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_9801_end_0 = const()[name = tensor("op_9801_end_0"), val = tensor([2, 1024, 1, 240])]; tensor var_9801_end_mask_0 = const()[name = tensor("op_9801_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9801_cast_fp16 = slice_by_index(begin = var_9801_begin_0, end = var_9801_end_0, end_mask = var_9801_end_mask_0, x = transpose_7)[name = tensor("op_9801_cast_fp16")]; tensor var_9805_begin_0 = const()[name = tensor("op_9805_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_9805_end_0 = const()[name = tensor("op_9805_end_0"), val = tensor([2, 1024, 1, 320])]; tensor var_9805_end_mask_0 = const()[name = tensor("op_9805_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9805_cast_fp16 = slice_by_index(begin = var_9805_begin_0, end = var_9805_end_0, end_mask = var_9805_end_mask_0, x = transpose_7)[name = tensor("op_9805_cast_fp16")]; tensor var_9809_begin_0 = const()[name = tensor("op_9809_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_9809_end_0 = const()[name = tensor("op_9809_end_0"), val = tensor([2, 1024, 1, 400])]; tensor var_9809_end_mask_0 = const()[name = tensor("op_9809_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9809_cast_fp16 = slice_by_index(begin = var_9809_begin_0, end = var_9809_end_0, end_mask = var_9809_end_mask_0, x = transpose_7)[name = tensor("op_9809_cast_fp16")]; tensor var_9813_begin_0 = const()[name = tensor("op_9813_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_9813_end_0 = const()[name = tensor("op_9813_end_0"), val = tensor([2, 1024, 1, 480])]; tensor var_9813_end_mask_0 = const()[name = tensor("op_9813_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9813_cast_fp16 = slice_by_index(begin = var_9813_begin_0, end = var_9813_end_0, end_mask = var_9813_end_mask_0, x = transpose_7)[name = tensor("op_9813_cast_fp16")]; tensor var_9817_begin_0 = const()[name = tensor("op_9817_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_9817_end_0 = const()[name = tensor("op_9817_end_0"), val = tensor([2, 1024, 1, 560])]; tensor var_9817_end_mask_0 = const()[name = tensor("op_9817_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9817_cast_fp16 = slice_by_index(begin = var_9817_begin_0, end = var_9817_end_0, end_mask = var_9817_end_mask_0, x = transpose_7)[name = tensor("op_9817_cast_fp16")]; tensor var_9821_begin_0 = const()[name = tensor("op_9821_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_9821_end_0 = const()[name = tensor("op_9821_end_0"), val = tensor([2, 1024, 1, 640])]; tensor var_9821_end_mask_0 = const()[name = tensor("op_9821_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9821_cast_fp16 = slice_by_index(begin = var_9821_begin_0, end = var_9821_end_0, end_mask = var_9821_end_mask_0, x = transpose_7)[name = tensor("op_9821_cast_fp16")]; tensor var_9823_begin_0 = const()[name = tensor("op_9823_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9823_end_0 = const()[name = tensor("op_9823_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_9823_end_mask_0 = const()[name = tensor("op_9823_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9823_cast_fp16 = slice_by_index(begin = var_9823_begin_0, end = var_9823_end_0, end_mask = var_9823_end_mask_0, x = v_49_cast_fp16)[name = tensor("op_9823_cast_fp16")]; tensor var_9827_begin_0 = const()[name = tensor("op_9827_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_9827_end_0 = const()[name = tensor("op_9827_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_9827_end_mask_0 = const()[name = tensor("op_9827_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9827_cast_fp16 = slice_by_index(begin = var_9827_begin_0, end = var_9827_end_0, end_mask = var_9827_end_mask_0, x = v_49_cast_fp16)[name = tensor("op_9827_cast_fp16")]; tensor var_9831_begin_0 = const()[name = tensor("op_9831_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_9831_end_0 = const()[name = tensor("op_9831_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_9831_end_mask_0 = const()[name = tensor("op_9831_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9831_cast_fp16 = slice_by_index(begin = var_9831_begin_0, end = var_9831_end_0, end_mask = var_9831_end_mask_0, x = v_49_cast_fp16)[name = tensor("op_9831_cast_fp16")]; tensor var_9835_begin_0 = const()[name = tensor("op_9835_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_9835_end_0 = const()[name = tensor("op_9835_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_9835_end_mask_0 = const()[name = tensor("op_9835_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9835_cast_fp16 = slice_by_index(begin = var_9835_begin_0, end = var_9835_end_0, end_mask = var_9835_end_mask_0, x = v_49_cast_fp16)[name = tensor("op_9835_cast_fp16")]; tensor var_9839_begin_0 = const()[name = tensor("op_9839_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9839_end_0 = const()[name = tensor("op_9839_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_9839_end_mask_0 = const()[name = tensor("op_9839_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9839_cast_fp16 = slice_by_index(begin = var_9839_begin_0, end = var_9839_end_0, end_mask = var_9839_end_mask_0, x = v_49_cast_fp16)[name = tensor("op_9839_cast_fp16")]; tensor var_9843_begin_0 = const()[name = tensor("op_9843_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_9843_end_0 = const()[name = tensor("op_9843_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_9843_end_mask_0 = const()[name = tensor("op_9843_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9843_cast_fp16 = slice_by_index(begin = var_9843_begin_0, end = var_9843_end_0, end_mask = var_9843_end_mask_0, x = v_49_cast_fp16)[name = tensor("op_9843_cast_fp16")]; tensor var_9847_begin_0 = const()[name = tensor("op_9847_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_9847_end_0 = const()[name = tensor("op_9847_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_9847_end_mask_0 = const()[name = tensor("op_9847_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9847_cast_fp16 = slice_by_index(begin = var_9847_begin_0, end = var_9847_end_0, end_mask = var_9847_end_mask_0, x = v_49_cast_fp16)[name = tensor("op_9847_cast_fp16")]; tensor var_9851_begin_0 = const()[name = tensor("op_9851_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_9851_end_0 = const()[name = tensor("op_9851_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_9851_end_mask_0 = const()[name = tensor("op_9851_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9851_cast_fp16 = slice_by_index(begin = var_9851_begin_0, end = var_9851_end_0, end_mask = var_9851_end_mask_0, x = v_49_cast_fp16)[name = tensor("op_9851_cast_fp16")]; tensor var_9855_equation_0 = const()[name = tensor("op_9855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9855_cast_fp16 = einsum(equation = var_9855_equation_0, values = (var_9793_cast_fp16, var_9773_cast_fp16))[name = tensor("op_9855_cast_fp16")]; tensor var_9856_to_fp16 = const()[name = tensor("op_9856_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_769_cast_fp16 = mul(x = var_9855_cast_fp16, y = var_9856_to_fp16)[name = tensor("aw_chunk_769_cast_fp16")]; tensor var_9859_equation_0 = const()[name = tensor("op_9859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9859_cast_fp16 = einsum(equation = var_9859_equation_0, values = (var_9793_cast_fp16, var_9774_cast_fp16))[name = tensor("op_9859_cast_fp16")]; tensor var_9860_to_fp16 = const()[name = tensor("op_9860_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_771_cast_fp16 = mul(x = var_9859_cast_fp16, y = var_9860_to_fp16)[name = tensor("aw_chunk_771_cast_fp16")]; tensor var_9863_equation_0 = const()[name = tensor("op_9863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9863_cast_fp16 = einsum(equation = var_9863_equation_0, values = (var_9797_cast_fp16, var_9775_cast_fp16))[name = tensor("op_9863_cast_fp16")]; tensor var_9864_to_fp16 = const()[name = tensor("op_9864_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_773_cast_fp16 = mul(x = var_9863_cast_fp16, y = var_9864_to_fp16)[name = tensor("aw_chunk_773_cast_fp16")]; tensor var_9867_equation_0 = const()[name = tensor("op_9867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9867_cast_fp16 = einsum(equation = var_9867_equation_0, values = (var_9797_cast_fp16, var_9776_cast_fp16))[name = tensor("op_9867_cast_fp16")]; tensor var_9868_to_fp16 = const()[name = tensor("op_9868_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_775_cast_fp16 = mul(x = var_9867_cast_fp16, y = var_9868_to_fp16)[name = tensor("aw_chunk_775_cast_fp16")]; tensor var_9871_equation_0 = const()[name = tensor("op_9871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9871_cast_fp16 = einsum(equation = var_9871_equation_0, values = (var_9801_cast_fp16, var_9777_cast_fp16))[name = tensor("op_9871_cast_fp16")]; tensor var_9872_to_fp16 = const()[name = tensor("op_9872_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_777_cast_fp16 = mul(x = var_9871_cast_fp16, y = var_9872_to_fp16)[name = tensor("aw_chunk_777_cast_fp16")]; tensor var_9875_equation_0 = const()[name = tensor("op_9875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9875_cast_fp16 = einsum(equation = var_9875_equation_0, values = (var_9801_cast_fp16, var_9778_cast_fp16))[name = tensor("op_9875_cast_fp16")]; tensor var_9876_to_fp16 = const()[name = tensor("op_9876_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_779_cast_fp16 = mul(x = var_9875_cast_fp16, y = var_9876_to_fp16)[name = tensor("aw_chunk_779_cast_fp16")]; tensor var_9879_equation_0 = const()[name = tensor("op_9879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9879_cast_fp16 = einsum(equation = var_9879_equation_0, values = (var_9805_cast_fp16, var_9779_cast_fp16))[name = tensor("op_9879_cast_fp16")]; tensor var_9880_to_fp16 = const()[name = tensor("op_9880_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_781_cast_fp16 = mul(x = var_9879_cast_fp16, y = var_9880_to_fp16)[name = tensor("aw_chunk_781_cast_fp16")]; tensor var_9883_equation_0 = const()[name = tensor("op_9883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9883_cast_fp16 = einsum(equation = var_9883_equation_0, values = (var_9805_cast_fp16, var_9780_cast_fp16))[name = tensor("op_9883_cast_fp16")]; tensor var_9884_to_fp16 = const()[name = tensor("op_9884_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_783_cast_fp16 = mul(x = var_9883_cast_fp16, y = var_9884_to_fp16)[name = tensor("aw_chunk_783_cast_fp16")]; tensor var_9887_equation_0 = const()[name = tensor("op_9887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9887_cast_fp16 = einsum(equation = var_9887_equation_0, values = (var_9809_cast_fp16, var_9781_cast_fp16))[name = tensor("op_9887_cast_fp16")]; tensor var_9888_to_fp16 = const()[name = tensor("op_9888_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_785_cast_fp16 = mul(x = var_9887_cast_fp16, y = var_9888_to_fp16)[name = tensor("aw_chunk_785_cast_fp16")]; tensor var_9891_equation_0 = const()[name = tensor("op_9891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9891_cast_fp16 = einsum(equation = var_9891_equation_0, values = (var_9809_cast_fp16, var_9782_cast_fp16))[name = tensor("op_9891_cast_fp16")]; tensor var_9892_to_fp16 = const()[name = tensor("op_9892_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_787_cast_fp16 = mul(x = var_9891_cast_fp16, y = var_9892_to_fp16)[name = tensor("aw_chunk_787_cast_fp16")]; tensor var_9895_equation_0 = const()[name = tensor("op_9895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9895_cast_fp16 = einsum(equation = var_9895_equation_0, values = (var_9813_cast_fp16, var_9783_cast_fp16))[name = tensor("op_9895_cast_fp16")]; tensor var_9896_to_fp16 = const()[name = tensor("op_9896_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_789_cast_fp16 = mul(x = var_9895_cast_fp16, y = var_9896_to_fp16)[name = tensor("aw_chunk_789_cast_fp16")]; tensor var_9899_equation_0 = const()[name = tensor("op_9899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9899_cast_fp16 = einsum(equation = var_9899_equation_0, values = (var_9813_cast_fp16, var_9784_cast_fp16))[name = tensor("op_9899_cast_fp16")]; tensor var_9900_to_fp16 = const()[name = tensor("op_9900_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_791_cast_fp16 = mul(x = var_9899_cast_fp16, y = var_9900_to_fp16)[name = tensor("aw_chunk_791_cast_fp16")]; tensor var_9903_equation_0 = const()[name = tensor("op_9903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9903_cast_fp16 = einsum(equation = var_9903_equation_0, values = (var_9817_cast_fp16, var_9785_cast_fp16))[name = tensor("op_9903_cast_fp16")]; tensor var_9904_to_fp16 = const()[name = tensor("op_9904_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_793_cast_fp16 = mul(x = var_9903_cast_fp16, y = var_9904_to_fp16)[name = tensor("aw_chunk_793_cast_fp16")]; tensor var_9907_equation_0 = const()[name = tensor("op_9907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9907_cast_fp16 = einsum(equation = var_9907_equation_0, values = (var_9817_cast_fp16, var_9786_cast_fp16))[name = tensor("op_9907_cast_fp16")]; tensor var_9908_to_fp16 = const()[name = tensor("op_9908_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_795_cast_fp16 = mul(x = var_9907_cast_fp16, y = var_9908_to_fp16)[name = tensor("aw_chunk_795_cast_fp16")]; tensor var_9911_equation_0 = const()[name = tensor("op_9911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9911_cast_fp16 = einsum(equation = var_9911_equation_0, values = (var_9821_cast_fp16, var_9787_cast_fp16))[name = tensor("op_9911_cast_fp16")]; tensor var_9912_to_fp16 = const()[name = tensor("op_9912_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_797_cast_fp16 = mul(x = var_9911_cast_fp16, y = var_9912_to_fp16)[name = tensor("aw_chunk_797_cast_fp16")]; tensor var_9915_equation_0 = const()[name = tensor("op_9915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_9915_cast_fp16 = einsum(equation = var_9915_equation_0, values = (var_9821_cast_fp16, var_9788_cast_fp16))[name = tensor("op_9915_cast_fp16")]; tensor var_9916_to_fp16 = const()[name = tensor("op_9916_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_799_cast_fp16 = mul(x = var_9915_cast_fp16, y = var_9916_to_fp16)[name = tensor("aw_chunk_799_cast_fp16")]; tensor var_9918_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_769_cast_fp16)[name = tensor("op_9918_cast_fp16")]; tensor var_9919_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_771_cast_fp16)[name = tensor("op_9919_cast_fp16")]; tensor var_9920_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_773_cast_fp16)[name = tensor("op_9920_cast_fp16")]; tensor var_9921_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_775_cast_fp16)[name = tensor("op_9921_cast_fp16")]; tensor var_9922_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_777_cast_fp16)[name = tensor("op_9922_cast_fp16")]; tensor var_9923_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_779_cast_fp16)[name = tensor("op_9923_cast_fp16")]; tensor var_9924_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_781_cast_fp16)[name = tensor("op_9924_cast_fp16")]; tensor var_9925_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_783_cast_fp16)[name = tensor("op_9925_cast_fp16")]; tensor var_9926_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_785_cast_fp16)[name = tensor("op_9926_cast_fp16")]; tensor var_9927_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_787_cast_fp16)[name = tensor("op_9927_cast_fp16")]; tensor var_9928_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_789_cast_fp16)[name = tensor("op_9928_cast_fp16")]; tensor var_9929_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_791_cast_fp16)[name = tensor("op_9929_cast_fp16")]; tensor var_9930_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_793_cast_fp16)[name = tensor("op_9930_cast_fp16")]; tensor var_9931_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_795_cast_fp16)[name = tensor("op_9931_cast_fp16")]; tensor var_9932_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_797_cast_fp16)[name = tensor("op_9932_cast_fp16")]; tensor var_9933_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_799_cast_fp16)[name = tensor("op_9933_cast_fp16")]; tensor var_9935_equation_0 = const()[name = tensor("op_9935_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9935_cast_fp16 = einsum(equation = var_9935_equation_0, values = (var_9823_cast_fp16, var_9918_cast_fp16))[name = tensor("op_9935_cast_fp16")]; tensor var_9937_equation_0 = const()[name = tensor("op_9937_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9937_cast_fp16 = einsum(equation = var_9937_equation_0, values = (var_9823_cast_fp16, var_9919_cast_fp16))[name = tensor("op_9937_cast_fp16")]; tensor var_9939_equation_0 = const()[name = tensor("op_9939_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9939_cast_fp16 = einsum(equation = var_9939_equation_0, values = (var_9827_cast_fp16, var_9920_cast_fp16))[name = tensor("op_9939_cast_fp16")]; tensor var_9941_equation_0 = const()[name = tensor("op_9941_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9941_cast_fp16 = einsum(equation = var_9941_equation_0, values = (var_9827_cast_fp16, var_9921_cast_fp16))[name = tensor("op_9941_cast_fp16")]; tensor var_9943_equation_0 = const()[name = tensor("op_9943_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9943_cast_fp16 = einsum(equation = var_9943_equation_0, values = (var_9831_cast_fp16, var_9922_cast_fp16))[name = tensor("op_9943_cast_fp16")]; tensor var_9945_equation_0 = const()[name = tensor("op_9945_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9945_cast_fp16 = einsum(equation = var_9945_equation_0, values = (var_9831_cast_fp16, var_9923_cast_fp16))[name = tensor("op_9945_cast_fp16")]; tensor var_9947_equation_0 = const()[name = tensor("op_9947_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9947_cast_fp16 = einsum(equation = var_9947_equation_0, values = (var_9835_cast_fp16, var_9924_cast_fp16))[name = tensor("op_9947_cast_fp16")]; tensor var_9949_equation_0 = const()[name = tensor("op_9949_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9949_cast_fp16 = einsum(equation = var_9949_equation_0, values = (var_9835_cast_fp16, var_9925_cast_fp16))[name = tensor("op_9949_cast_fp16")]; tensor var_9951_equation_0 = const()[name = tensor("op_9951_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9951_cast_fp16 = einsum(equation = var_9951_equation_0, values = (var_9839_cast_fp16, var_9926_cast_fp16))[name = tensor("op_9951_cast_fp16")]; tensor var_9953_equation_0 = const()[name = tensor("op_9953_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9953_cast_fp16 = einsum(equation = var_9953_equation_0, values = (var_9839_cast_fp16, var_9927_cast_fp16))[name = tensor("op_9953_cast_fp16")]; tensor var_9955_equation_0 = const()[name = tensor("op_9955_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9955_cast_fp16 = einsum(equation = var_9955_equation_0, values = (var_9843_cast_fp16, var_9928_cast_fp16))[name = tensor("op_9955_cast_fp16")]; tensor var_9957_equation_0 = const()[name = tensor("op_9957_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9957_cast_fp16 = einsum(equation = var_9957_equation_0, values = (var_9843_cast_fp16, var_9929_cast_fp16))[name = tensor("op_9957_cast_fp16")]; tensor var_9959_equation_0 = const()[name = tensor("op_9959_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9959_cast_fp16 = einsum(equation = var_9959_equation_0, values = (var_9847_cast_fp16, var_9930_cast_fp16))[name = tensor("op_9959_cast_fp16")]; tensor var_9961_equation_0 = const()[name = tensor("op_9961_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9961_cast_fp16 = einsum(equation = var_9961_equation_0, values = (var_9847_cast_fp16, var_9931_cast_fp16))[name = tensor("op_9961_cast_fp16")]; tensor var_9963_equation_0 = const()[name = tensor("op_9963_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9963_cast_fp16 = einsum(equation = var_9963_equation_0, values = (var_9851_cast_fp16, var_9932_cast_fp16))[name = tensor("op_9963_cast_fp16")]; tensor var_9965_equation_0 = const()[name = tensor("op_9965_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9965_cast_fp16 = einsum(equation = var_9965_equation_0, values = (var_9851_cast_fp16, var_9933_cast_fp16))[name = tensor("op_9965_cast_fp16")]; tensor var_9967_interleave_0 = const()[name = tensor("op_9967_interleave_0"), val = tensor(false)]; tensor var_9967_cast_fp16 = concat(axis = var_8160, interleave = var_9967_interleave_0, values = (var_9935_cast_fp16, var_9937_cast_fp16))[name = tensor("op_9967_cast_fp16")]; tensor var_9969_interleave_0 = const()[name = tensor("op_9969_interleave_0"), val = tensor(false)]; tensor var_9969_cast_fp16 = concat(axis = var_8160, interleave = var_9969_interleave_0, values = (var_9939_cast_fp16, var_9941_cast_fp16))[name = tensor("op_9969_cast_fp16")]; tensor var_9971_interleave_0 = const()[name = tensor("op_9971_interleave_0"), val = tensor(false)]; tensor var_9971_cast_fp16 = concat(axis = var_8160, interleave = var_9971_interleave_0, values = (var_9943_cast_fp16, var_9945_cast_fp16))[name = tensor("op_9971_cast_fp16")]; tensor var_9973_interleave_0 = const()[name = tensor("op_9973_interleave_0"), val = tensor(false)]; tensor var_9973_cast_fp16 = concat(axis = var_8160, interleave = var_9973_interleave_0, values = (var_9947_cast_fp16, var_9949_cast_fp16))[name = tensor("op_9973_cast_fp16")]; tensor var_9975_interleave_0 = const()[name = tensor("op_9975_interleave_0"), val = tensor(false)]; tensor var_9975_cast_fp16 = concat(axis = var_8160, interleave = var_9975_interleave_0, values = (var_9951_cast_fp16, var_9953_cast_fp16))[name = tensor("op_9975_cast_fp16")]; tensor var_9977_interleave_0 = const()[name = tensor("op_9977_interleave_0"), val = tensor(false)]; tensor var_9977_cast_fp16 = concat(axis = var_8160, interleave = var_9977_interleave_0, values = (var_9955_cast_fp16, var_9957_cast_fp16))[name = tensor("op_9977_cast_fp16")]; tensor var_9979_interleave_0 = const()[name = tensor("op_9979_interleave_0"), val = tensor(false)]; tensor var_9979_cast_fp16 = concat(axis = var_8160, interleave = var_9979_interleave_0, values = (var_9959_cast_fp16, var_9961_cast_fp16))[name = tensor("op_9979_cast_fp16")]; tensor var_9981_interleave_0 = const()[name = tensor("op_9981_interleave_0"), val = tensor(false)]; tensor var_9981_cast_fp16 = concat(axis = var_8160, interleave = var_9981_interleave_0, values = (var_9963_cast_fp16, var_9965_cast_fp16))[name = tensor("op_9981_cast_fp16")]; tensor input_435_interleave_0 = const()[name = tensor("input_435_interleave_0"), val = tensor(false)]; tensor input_435_cast_fp16 = concat(axis = var_8182, interleave = input_435_interleave_0, values = (var_9967_cast_fp16, var_9969_cast_fp16, var_9971_cast_fp16, var_9973_cast_fp16, var_9975_cast_fp16, var_9977_cast_fp16, var_9979_cast_fp16, var_9981_cast_fp16))[name = tensor("input_435_cast_fp16")]; tensor var_9987 = const()[name = tensor("op_9987"), val = tensor([1, 1])]; tensor var_9989 = const()[name = tensor("op_9989"), val = tensor([1, 1])]; tensor var_9991_pad_type_0 = const()[name = tensor("op_9991_pad_type_0"), val = tensor("custom")]; tensor var_9991_pad_0 = const()[name = tensor("op_9991_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1658995712)))]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1659814976)))]; tensor var_9991_cast_fp16 = conv(bias = up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_9989, groups = var_8182, pad = var_9991_pad_0, pad_type = var_9991_pad_type_0, strides = var_9987, weight = up_blocks_2_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_435_cast_fp16)[name = tensor("op_9991_cast_fp16")]; tensor inputs_75_cast_fp16 = add(x = var_9991_cast_fp16, y = inputs_73_cast_fp16)[name = tensor("inputs_75_cast_fp16")]; tensor hidden_states_269_axes_0 = const()[name = tensor("hidden_states_269_axes_0"), val = tensor([1])]; tensor hidden_states_269_gamma_0_to_fp16 = const()[name = tensor("hidden_states_269_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1659816320)))]; tensor hidden_states_269_beta_0_to_fp16 = const()[name = tensor("hidden_states_269_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1659817664)))]; tensor var_10001_to_fp16 = const()[name = tensor("op_10001_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_269_cast_fp16 = layer_norm(axes = hidden_states_269_axes_0, beta = hidden_states_269_beta_0_to_fp16, epsilon = var_10001_to_fp16, gamma = hidden_states_269_gamma_0_to_fp16, x = inputs_75_cast_fp16)[name = tensor("hidden_states_269_cast_fp16")]; tensor var_10016 = const()[name = tensor("op_10016"), val = tensor([1, 1])]; tensor var_10018 = const()[name = tensor("op_10018"), val = tensor([1, 1])]; tensor q_51_pad_type_0 = const()[name = tensor("q_51_pad_type_0"), val = tensor("custom")]; tensor q_51_pad_0 = const()[name = tensor("q_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1659819008)))]; tensor q_51_cast_fp16 = conv(dilations = var_10018, groups = var_8182, pad = q_51_pad_0, pad_type = q_51_pad_type_0, strides = var_10016, weight = up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_269_cast_fp16)[name = tensor("q_51_cast_fp16")]; tensor var_10022 = const()[name = tensor("op_10022"), val = tensor([1, 1])]; tensor var_10024 = const()[name = tensor("op_10024"), val = tensor([1, 1])]; tensor k_101_pad_type_0 = const()[name = tensor("k_101_pad_type_0"), val = tensor("custom")]; tensor k_101_pad_0 = const()[name = tensor("k_101_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1660638272)))]; tensor k_101_cast_fp16 = conv(dilations = var_10024, groups = var_8182, pad = k_101_pad_0, pad_type = k_101_pad_type_0, strides = var_10022, weight = up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_101_cast_fp16")]; tensor var_10028 = const()[name = tensor("op_10028"), val = tensor([1, 1])]; tensor var_10030 = const()[name = tensor("op_10030"), val = tensor([1, 1])]; tensor v_51_pad_type_0 = const()[name = tensor("v_51_pad_type_0"), val = tensor("custom")]; tensor v_51_pad_0 = const()[name = tensor("v_51_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1661621376)))]; tensor v_51_cast_fp16 = conv(dilations = var_10030, groups = var_8182, pad = v_51_pad_0, pad_type = v_51_pad_type_0, strides = var_10028, weight = up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_51_cast_fp16")]; tensor var_10034_begin_0 = const()[name = tensor("op_10034_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10034_end_0 = const()[name = tensor("op_10034_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10034_end_mask_0 = const()[name = tensor("op_10034_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10034_cast_fp16 = slice_by_index(begin = var_10034_begin_0, end = var_10034_end_0, end_mask = var_10034_end_mask_0, x = q_51_cast_fp16)[name = tensor("op_10034_cast_fp16")]; tensor var_10038_begin_0 = const()[name = tensor("op_10038_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_10038_end_0 = const()[name = tensor("op_10038_end_0"), val = tensor([2, 160, 1, 1024])]; tensor var_10038_end_mask_0 = const()[name = tensor("op_10038_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10038_cast_fp16 = slice_by_index(begin = var_10038_begin_0, end = var_10038_end_0, end_mask = var_10038_end_mask_0, x = q_51_cast_fp16)[name = tensor("op_10038_cast_fp16")]; tensor var_10042_begin_0 = const()[name = tensor("op_10042_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_10042_end_0 = const()[name = tensor("op_10042_end_0"), val = tensor([2, 240, 1, 1024])]; tensor var_10042_end_mask_0 = const()[name = tensor("op_10042_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10042_cast_fp16 = slice_by_index(begin = var_10042_begin_0, end = var_10042_end_0, end_mask = var_10042_end_mask_0, x = q_51_cast_fp16)[name = tensor("op_10042_cast_fp16")]; tensor var_10046_begin_0 = const()[name = tensor("op_10046_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_10046_end_0 = const()[name = tensor("op_10046_end_0"), val = tensor([2, 320, 1, 1024])]; tensor var_10046_end_mask_0 = const()[name = tensor("op_10046_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10046_cast_fp16 = slice_by_index(begin = var_10046_begin_0, end = var_10046_end_0, end_mask = var_10046_end_mask_0, x = q_51_cast_fp16)[name = tensor("op_10046_cast_fp16")]; tensor var_10050_begin_0 = const()[name = tensor("op_10050_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_10050_end_0 = const()[name = tensor("op_10050_end_0"), val = tensor([2, 400, 1, 1024])]; tensor var_10050_end_mask_0 = const()[name = tensor("op_10050_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10050_cast_fp16 = slice_by_index(begin = var_10050_begin_0, end = var_10050_end_0, end_mask = var_10050_end_mask_0, x = q_51_cast_fp16)[name = tensor("op_10050_cast_fp16")]; tensor var_10054_begin_0 = const()[name = tensor("op_10054_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_10054_end_0 = const()[name = tensor("op_10054_end_0"), val = tensor([2, 480, 1, 1024])]; tensor var_10054_end_mask_0 = const()[name = tensor("op_10054_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10054_cast_fp16 = slice_by_index(begin = var_10054_begin_0, end = var_10054_end_0, end_mask = var_10054_end_mask_0, x = q_51_cast_fp16)[name = tensor("op_10054_cast_fp16")]; tensor var_10058_begin_0 = const()[name = tensor("op_10058_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_10058_end_0 = const()[name = tensor("op_10058_end_0"), val = tensor([2, 560, 1, 1024])]; tensor var_10058_end_mask_0 = const()[name = tensor("op_10058_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10058_cast_fp16 = slice_by_index(begin = var_10058_begin_0, end = var_10058_end_0, end_mask = var_10058_end_mask_0, x = q_51_cast_fp16)[name = tensor("op_10058_cast_fp16")]; tensor var_10062_begin_0 = const()[name = tensor("op_10062_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_10062_end_0 = const()[name = tensor("op_10062_end_0"), val = tensor([2, 640, 1, 1024])]; tensor var_10062_end_mask_0 = const()[name = tensor("op_10062_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10062_cast_fp16 = slice_by_index(begin = var_10062_begin_0, end = var_10062_end_0, end_mask = var_10062_end_mask_0, x = q_51_cast_fp16)[name = tensor("op_10062_cast_fp16")]; tensor var_10065_begin_0 = const()[name = tensor("op_10065_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10065_end_0 = const()[name = tensor("op_10065_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_10065_end_mask_0 = const()[name = tensor("op_10065_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10065_cast_fp16 = slice_by_index(begin = var_10065_begin_0, end = var_10065_end_0, end_mask = var_10065_end_mask_0, x = var_10034_cast_fp16)[name = tensor("op_10065_cast_fp16")]; tensor var_10066_begin_0 = const()[name = tensor("op_10066_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10066_end_0 = const()[name = tensor("op_10066_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10066_end_mask_0 = const()[name = tensor("op_10066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10066_cast_fp16 = slice_by_index(begin = var_10066_begin_0, end = var_10066_end_0, end_mask = var_10066_end_mask_0, x = var_10034_cast_fp16)[name = tensor("op_10066_cast_fp16")]; tensor var_10067_begin_0 = const()[name = tensor("op_10067_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10067_end_0 = const()[name = tensor("op_10067_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_10067_end_mask_0 = const()[name = tensor("op_10067_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10067_cast_fp16 = slice_by_index(begin = var_10067_begin_0, end = var_10067_end_0, end_mask = var_10067_end_mask_0, x = var_10038_cast_fp16)[name = tensor("op_10067_cast_fp16")]; tensor var_10068_begin_0 = const()[name = tensor("op_10068_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10068_end_0 = const()[name = tensor("op_10068_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10068_end_mask_0 = const()[name = tensor("op_10068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10068_cast_fp16 = slice_by_index(begin = var_10068_begin_0, end = var_10068_end_0, end_mask = var_10068_end_mask_0, x = var_10038_cast_fp16)[name = tensor("op_10068_cast_fp16")]; tensor var_10069_begin_0 = const()[name = tensor("op_10069_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10069_end_0 = const()[name = tensor("op_10069_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_10069_end_mask_0 = const()[name = tensor("op_10069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10069_cast_fp16 = slice_by_index(begin = var_10069_begin_0, end = var_10069_end_0, end_mask = var_10069_end_mask_0, x = var_10042_cast_fp16)[name = tensor("op_10069_cast_fp16")]; tensor var_10070_begin_0 = const()[name = tensor("op_10070_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10070_end_0 = const()[name = tensor("op_10070_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10070_end_mask_0 = const()[name = tensor("op_10070_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10070_cast_fp16 = slice_by_index(begin = var_10070_begin_0, end = var_10070_end_0, end_mask = var_10070_end_mask_0, x = var_10042_cast_fp16)[name = tensor("op_10070_cast_fp16")]; tensor var_10071_begin_0 = const()[name = tensor("op_10071_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10071_end_0 = const()[name = tensor("op_10071_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_10071_end_mask_0 = const()[name = tensor("op_10071_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10071_cast_fp16 = slice_by_index(begin = var_10071_begin_0, end = var_10071_end_0, end_mask = var_10071_end_mask_0, x = var_10046_cast_fp16)[name = tensor("op_10071_cast_fp16")]; tensor var_10072_begin_0 = const()[name = tensor("op_10072_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10072_end_0 = const()[name = tensor("op_10072_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10072_end_mask_0 = const()[name = tensor("op_10072_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10072_cast_fp16 = slice_by_index(begin = var_10072_begin_0, end = var_10072_end_0, end_mask = var_10072_end_mask_0, x = var_10046_cast_fp16)[name = tensor("op_10072_cast_fp16")]; tensor var_10073_begin_0 = const()[name = tensor("op_10073_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10073_end_0 = const()[name = tensor("op_10073_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_10073_end_mask_0 = const()[name = tensor("op_10073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10073_cast_fp16 = slice_by_index(begin = var_10073_begin_0, end = var_10073_end_0, end_mask = var_10073_end_mask_0, x = var_10050_cast_fp16)[name = tensor("op_10073_cast_fp16")]; tensor var_10074_begin_0 = const()[name = tensor("op_10074_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10074_end_0 = const()[name = tensor("op_10074_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10074_end_mask_0 = const()[name = tensor("op_10074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10074_cast_fp16 = slice_by_index(begin = var_10074_begin_0, end = var_10074_end_0, end_mask = var_10074_end_mask_0, x = var_10050_cast_fp16)[name = tensor("op_10074_cast_fp16")]; tensor var_10075_begin_0 = const()[name = tensor("op_10075_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10075_end_0 = const()[name = tensor("op_10075_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_10075_end_mask_0 = const()[name = tensor("op_10075_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10075_cast_fp16 = slice_by_index(begin = var_10075_begin_0, end = var_10075_end_0, end_mask = var_10075_end_mask_0, x = var_10054_cast_fp16)[name = tensor("op_10075_cast_fp16")]; tensor var_10076_begin_0 = const()[name = tensor("op_10076_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10076_end_0 = const()[name = tensor("op_10076_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10076_end_mask_0 = const()[name = tensor("op_10076_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10076_cast_fp16 = slice_by_index(begin = var_10076_begin_0, end = var_10076_end_0, end_mask = var_10076_end_mask_0, x = var_10054_cast_fp16)[name = tensor("op_10076_cast_fp16")]; tensor var_10077_begin_0 = const()[name = tensor("op_10077_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10077_end_0 = const()[name = tensor("op_10077_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_10077_end_mask_0 = const()[name = tensor("op_10077_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10077_cast_fp16 = slice_by_index(begin = var_10077_begin_0, end = var_10077_end_0, end_mask = var_10077_end_mask_0, x = var_10058_cast_fp16)[name = tensor("op_10077_cast_fp16")]; tensor var_10078_begin_0 = const()[name = tensor("op_10078_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10078_end_0 = const()[name = tensor("op_10078_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10078_end_mask_0 = const()[name = tensor("op_10078_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10078_cast_fp16 = slice_by_index(begin = var_10078_begin_0, end = var_10078_end_0, end_mask = var_10078_end_mask_0, x = var_10058_cast_fp16)[name = tensor("op_10078_cast_fp16")]; tensor var_10079_begin_0 = const()[name = tensor("op_10079_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10079_end_0 = const()[name = tensor("op_10079_end_0"), val = tensor([2, 80, 1, 512])]; tensor var_10079_end_mask_0 = const()[name = tensor("op_10079_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10079_cast_fp16 = slice_by_index(begin = var_10079_begin_0, end = var_10079_end_0, end_mask = var_10079_end_mask_0, x = var_10062_cast_fp16)[name = tensor("op_10079_cast_fp16")]; tensor var_10080_begin_0 = const()[name = tensor("op_10080_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10080_end_0 = const()[name = tensor("op_10080_end_0"), val = tensor([2, 80, 1, 1024])]; tensor var_10080_end_mask_0 = const()[name = tensor("op_10080_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10080_cast_fp16 = slice_by_index(begin = var_10080_begin_0, end = var_10080_end_0, end_mask = var_10080_end_mask_0, x = var_10062_cast_fp16)[name = tensor("op_10080_cast_fp16")]; tensor k_103_perm_0 = const()[name = tensor("k_103_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_10085_begin_0 = const()[name = tensor("op_10085_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10085_end_0 = const()[name = tensor("op_10085_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_10085_end_mask_0 = const()[name = tensor("op_10085_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_6 = transpose(perm = k_103_perm_0, x = k_101_cast_fp16)[name = tensor("transpose_6")]; tensor var_10085_cast_fp16 = slice_by_index(begin = var_10085_begin_0, end = var_10085_end_0, end_mask = var_10085_end_mask_0, x = transpose_6)[name = tensor("op_10085_cast_fp16")]; tensor var_10089_begin_0 = const()[name = tensor("op_10089_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_10089_end_0 = const()[name = tensor("op_10089_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_10089_end_mask_0 = const()[name = tensor("op_10089_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10089_cast_fp16 = slice_by_index(begin = var_10089_begin_0, end = var_10089_end_0, end_mask = var_10089_end_mask_0, x = transpose_6)[name = tensor("op_10089_cast_fp16")]; tensor var_10093_begin_0 = const()[name = tensor("op_10093_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_10093_end_0 = const()[name = tensor("op_10093_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_10093_end_mask_0 = const()[name = tensor("op_10093_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10093_cast_fp16 = slice_by_index(begin = var_10093_begin_0, end = var_10093_end_0, end_mask = var_10093_end_mask_0, x = transpose_6)[name = tensor("op_10093_cast_fp16")]; tensor var_10097_begin_0 = const()[name = tensor("op_10097_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_10097_end_0 = const()[name = tensor("op_10097_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_10097_end_mask_0 = const()[name = tensor("op_10097_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10097_cast_fp16 = slice_by_index(begin = var_10097_begin_0, end = var_10097_end_0, end_mask = var_10097_end_mask_0, x = transpose_6)[name = tensor("op_10097_cast_fp16")]; tensor var_10101_begin_0 = const()[name = tensor("op_10101_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_10101_end_0 = const()[name = tensor("op_10101_end_0"), val = tensor([2, 77, 1, 400])]; tensor var_10101_end_mask_0 = const()[name = tensor("op_10101_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10101_cast_fp16 = slice_by_index(begin = var_10101_begin_0, end = var_10101_end_0, end_mask = var_10101_end_mask_0, x = transpose_6)[name = tensor("op_10101_cast_fp16")]; tensor var_10105_begin_0 = const()[name = tensor("op_10105_begin_0"), val = tensor([0, 0, 0, 400])]; tensor var_10105_end_0 = const()[name = tensor("op_10105_end_0"), val = tensor([2, 77, 1, 480])]; tensor var_10105_end_mask_0 = const()[name = tensor("op_10105_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10105_cast_fp16 = slice_by_index(begin = var_10105_begin_0, end = var_10105_end_0, end_mask = var_10105_end_mask_0, x = transpose_6)[name = tensor("op_10105_cast_fp16")]; tensor var_10109_begin_0 = const()[name = tensor("op_10109_begin_0"), val = tensor([0, 0, 0, 480])]; tensor var_10109_end_0 = const()[name = tensor("op_10109_end_0"), val = tensor([2, 77, 1, 560])]; tensor var_10109_end_mask_0 = const()[name = tensor("op_10109_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10109_cast_fp16 = slice_by_index(begin = var_10109_begin_0, end = var_10109_end_0, end_mask = var_10109_end_mask_0, x = transpose_6)[name = tensor("op_10109_cast_fp16")]; tensor var_10113_begin_0 = const()[name = tensor("op_10113_begin_0"), val = tensor([0, 0, 0, 560])]; tensor var_10113_end_0 = const()[name = tensor("op_10113_end_0"), val = tensor([2, 77, 1, 640])]; tensor var_10113_end_mask_0 = const()[name = tensor("op_10113_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10113_cast_fp16 = slice_by_index(begin = var_10113_begin_0, end = var_10113_end_0, end_mask = var_10113_end_mask_0, x = transpose_6)[name = tensor("op_10113_cast_fp16")]; tensor var_10115_begin_0 = const()[name = tensor("op_10115_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10115_end_0 = const()[name = tensor("op_10115_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_10115_end_mask_0 = const()[name = tensor("op_10115_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10115_cast_fp16 = slice_by_index(begin = var_10115_begin_0, end = var_10115_end_0, end_mask = var_10115_end_mask_0, x = v_51_cast_fp16)[name = tensor("op_10115_cast_fp16")]; tensor var_10119_begin_0 = const()[name = tensor("op_10119_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_10119_end_0 = const()[name = tensor("op_10119_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_10119_end_mask_0 = const()[name = tensor("op_10119_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10119_cast_fp16 = slice_by_index(begin = var_10119_begin_0, end = var_10119_end_0, end_mask = var_10119_end_mask_0, x = v_51_cast_fp16)[name = tensor("op_10119_cast_fp16")]; tensor var_10123_begin_0 = const()[name = tensor("op_10123_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_10123_end_0 = const()[name = tensor("op_10123_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_10123_end_mask_0 = const()[name = tensor("op_10123_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10123_cast_fp16 = slice_by_index(begin = var_10123_begin_0, end = var_10123_end_0, end_mask = var_10123_end_mask_0, x = v_51_cast_fp16)[name = tensor("op_10123_cast_fp16")]; tensor var_10127_begin_0 = const()[name = tensor("op_10127_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_10127_end_0 = const()[name = tensor("op_10127_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_10127_end_mask_0 = const()[name = tensor("op_10127_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10127_cast_fp16 = slice_by_index(begin = var_10127_begin_0, end = var_10127_end_0, end_mask = var_10127_end_mask_0, x = v_51_cast_fp16)[name = tensor("op_10127_cast_fp16")]; tensor var_10131_begin_0 = const()[name = tensor("op_10131_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_10131_end_0 = const()[name = tensor("op_10131_end_0"), val = tensor([2, 400, 1, 77])]; tensor var_10131_end_mask_0 = const()[name = tensor("op_10131_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10131_cast_fp16 = slice_by_index(begin = var_10131_begin_0, end = var_10131_end_0, end_mask = var_10131_end_mask_0, x = v_51_cast_fp16)[name = tensor("op_10131_cast_fp16")]; tensor var_10135_begin_0 = const()[name = tensor("op_10135_begin_0"), val = tensor([0, 400, 0, 0])]; tensor var_10135_end_0 = const()[name = tensor("op_10135_end_0"), val = tensor([2, 480, 1, 77])]; tensor var_10135_end_mask_0 = const()[name = tensor("op_10135_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10135_cast_fp16 = slice_by_index(begin = var_10135_begin_0, end = var_10135_end_0, end_mask = var_10135_end_mask_0, x = v_51_cast_fp16)[name = tensor("op_10135_cast_fp16")]; tensor var_10139_begin_0 = const()[name = tensor("op_10139_begin_0"), val = tensor([0, 480, 0, 0])]; tensor var_10139_end_0 = const()[name = tensor("op_10139_end_0"), val = tensor([2, 560, 1, 77])]; tensor var_10139_end_mask_0 = const()[name = tensor("op_10139_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10139_cast_fp16 = slice_by_index(begin = var_10139_begin_0, end = var_10139_end_0, end_mask = var_10139_end_mask_0, x = v_51_cast_fp16)[name = tensor("op_10139_cast_fp16")]; tensor var_10143_begin_0 = const()[name = tensor("op_10143_begin_0"), val = tensor([0, 560, 0, 0])]; tensor var_10143_end_0 = const()[name = tensor("op_10143_end_0"), val = tensor([2, 640, 1, 77])]; tensor var_10143_end_mask_0 = const()[name = tensor("op_10143_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10143_cast_fp16 = slice_by_index(begin = var_10143_begin_0, end = var_10143_end_0, end_mask = var_10143_end_mask_0, x = v_51_cast_fp16)[name = tensor("op_10143_cast_fp16")]; tensor var_10147_equation_0 = const()[name = tensor("op_10147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10147_cast_fp16 = einsum(equation = var_10147_equation_0, values = (var_10085_cast_fp16, var_10065_cast_fp16))[name = tensor("op_10147_cast_fp16")]; tensor var_10148_to_fp16 = const()[name = tensor("op_10148_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_801_cast_fp16 = mul(x = var_10147_cast_fp16, y = var_10148_to_fp16)[name = tensor("aw_chunk_801_cast_fp16")]; tensor var_10151_equation_0 = const()[name = tensor("op_10151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10151_cast_fp16 = einsum(equation = var_10151_equation_0, values = (var_10085_cast_fp16, var_10066_cast_fp16))[name = tensor("op_10151_cast_fp16")]; tensor var_10152_to_fp16 = const()[name = tensor("op_10152_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_803_cast_fp16 = mul(x = var_10151_cast_fp16, y = var_10152_to_fp16)[name = tensor("aw_chunk_803_cast_fp16")]; tensor var_10155_equation_0 = const()[name = tensor("op_10155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10155_cast_fp16 = einsum(equation = var_10155_equation_0, values = (var_10089_cast_fp16, var_10067_cast_fp16))[name = tensor("op_10155_cast_fp16")]; tensor var_10156_to_fp16 = const()[name = tensor("op_10156_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_805_cast_fp16 = mul(x = var_10155_cast_fp16, y = var_10156_to_fp16)[name = tensor("aw_chunk_805_cast_fp16")]; tensor var_10159_equation_0 = const()[name = tensor("op_10159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10159_cast_fp16 = einsum(equation = var_10159_equation_0, values = (var_10089_cast_fp16, var_10068_cast_fp16))[name = tensor("op_10159_cast_fp16")]; tensor var_10160_to_fp16 = const()[name = tensor("op_10160_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_807_cast_fp16 = mul(x = var_10159_cast_fp16, y = var_10160_to_fp16)[name = tensor("aw_chunk_807_cast_fp16")]; tensor var_10163_equation_0 = const()[name = tensor("op_10163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10163_cast_fp16 = einsum(equation = var_10163_equation_0, values = (var_10093_cast_fp16, var_10069_cast_fp16))[name = tensor("op_10163_cast_fp16")]; tensor var_10164_to_fp16 = const()[name = tensor("op_10164_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_809_cast_fp16 = mul(x = var_10163_cast_fp16, y = var_10164_to_fp16)[name = tensor("aw_chunk_809_cast_fp16")]; tensor var_10167_equation_0 = const()[name = tensor("op_10167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10167_cast_fp16 = einsum(equation = var_10167_equation_0, values = (var_10093_cast_fp16, var_10070_cast_fp16))[name = tensor("op_10167_cast_fp16")]; tensor var_10168_to_fp16 = const()[name = tensor("op_10168_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_811_cast_fp16 = mul(x = var_10167_cast_fp16, y = var_10168_to_fp16)[name = tensor("aw_chunk_811_cast_fp16")]; tensor var_10171_equation_0 = const()[name = tensor("op_10171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10171_cast_fp16 = einsum(equation = var_10171_equation_0, values = (var_10097_cast_fp16, var_10071_cast_fp16))[name = tensor("op_10171_cast_fp16")]; tensor var_10172_to_fp16 = const()[name = tensor("op_10172_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_813_cast_fp16 = mul(x = var_10171_cast_fp16, y = var_10172_to_fp16)[name = tensor("aw_chunk_813_cast_fp16")]; tensor var_10175_equation_0 = const()[name = tensor("op_10175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10175_cast_fp16 = einsum(equation = var_10175_equation_0, values = (var_10097_cast_fp16, var_10072_cast_fp16))[name = tensor("op_10175_cast_fp16")]; tensor var_10176_to_fp16 = const()[name = tensor("op_10176_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_815_cast_fp16 = mul(x = var_10175_cast_fp16, y = var_10176_to_fp16)[name = tensor("aw_chunk_815_cast_fp16")]; tensor var_10179_equation_0 = const()[name = tensor("op_10179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10179_cast_fp16 = einsum(equation = var_10179_equation_0, values = (var_10101_cast_fp16, var_10073_cast_fp16))[name = tensor("op_10179_cast_fp16")]; tensor var_10180_to_fp16 = const()[name = tensor("op_10180_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_817_cast_fp16 = mul(x = var_10179_cast_fp16, y = var_10180_to_fp16)[name = tensor("aw_chunk_817_cast_fp16")]; tensor var_10183_equation_0 = const()[name = tensor("op_10183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10183_cast_fp16 = einsum(equation = var_10183_equation_0, values = (var_10101_cast_fp16, var_10074_cast_fp16))[name = tensor("op_10183_cast_fp16")]; tensor var_10184_to_fp16 = const()[name = tensor("op_10184_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_819_cast_fp16 = mul(x = var_10183_cast_fp16, y = var_10184_to_fp16)[name = tensor("aw_chunk_819_cast_fp16")]; tensor var_10187_equation_0 = const()[name = tensor("op_10187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10187_cast_fp16 = einsum(equation = var_10187_equation_0, values = (var_10105_cast_fp16, var_10075_cast_fp16))[name = tensor("op_10187_cast_fp16")]; tensor var_10188_to_fp16 = const()[name = tensor("op_10188_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_821_cast_fp16 = mul(x = var_10187_cast_fp16, y = var_10188_to_fp16)[name = tensor("aw_chunk_821_cast_fp16")]; tensor var_10191_equation_0 = const()[name = tensor("op_10191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10191_cast_fp16 = einsum(equation = var_10191_equation_0, values = (var_10105_cast_fp16, var_10076_cast_fp16))[name = tensor("op_10191_cast_fp16")]; tensor var_10192_to_fp16 = const()[name = tensor("op_10192_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_823_cast_fp16 = mul(x = var_10191_cast_fp16, y = var_10192_to_fp16)[name = tensor("aw_chunk_823_cast_fp16")]; tensor var_10195_equation_0 = const()[name = tensor("op_10195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10195_cast_fp16 = einsum(equation = var_10195_equation_0, values = (var_10109_cast_fp16, var_10077_cast_fp16))[name = tensor("op_10195_cast_fp16")]; tensor var_10196_to_fp16 = const()[name = tensor("op_10196_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_825_cast_fp16 = mul(x = var_10195_cast_fp16, y = var_10196_to_fp16)[name = tensor("aw_chunk_825_cast_fp16")]; tensor var_10199_equation_0 = const()[name = tensor("op_10199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10199_cast_fp16 = einsum(equation = var_10199_equation_0, values = (var_10109_cast_fp16, var_10078_cast_fp16))[name = tensor("op_10199_cast_fp16")]; tensor var_10200_to_fp16 = const()[name = tensor("op_10200_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_827_cast_fp16 = mul(x = var_10199_cast_fp16, y = var_10200_to_fp16)[name = tensor("aw_chunk_827_cast_fp16")]; tensor var_10203_equation_0 = const()[name = tensor("op_10203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10203_cast_fp16 = einsum(equation = var_10203_equation_0, values = (var_10113_cast_fp16, var_10079_cast_fp16))[name = tensor("op_10203_cast_fp16")]; tensor var_10204_to_fp16 = const()[name = tensor("op_10204_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_829_cast_fp16 = mul(x = var_10203_cast_fp16, y = var_10204_to_fp16)[name = tensor("aw_chunk_829_cast_fp16")]; tensor var_10207_equation_0 = const()[name = tensor("op_10207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10207_cast_fp16 = einsum(equation = var_10207_equation_0, values = (var_10113_cast_fp16, var_10080_cast_fp16))[name = tensor("op_10207_cast_fp16")]; tensor var_10208_to_fp16 = const()[name = tensor("op_10208_to_fp16"), val = tensor(0x1.cap-4)]; tensor aw_chunk_831_cast_fp16 = mul(x = var_10207_cast_fp16, y = var_10208_to_fp16)[name = tensor("aw_chunk_831_cast_fp16")]; tensor var_10210_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_801_cast_fp16)[name = tensor("op_10210_cast_fp16")]; tensor var_10211_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_803_cast_fp16)[name = tensor("op_10211_cast_fp16")]; tensor var_10212_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_805_cast_fp16)[name = tensor("op_10212_cast_fp16")]; tensor var_10213_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_807_cast_fp16)[name = tensor("op_10213_cast_fp16")]; tensor var_10214_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_809_cast_fp16)[name = tensor("op_10214_cast_fp16")]; tensor var_10215_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_811_cast_fp16)[name = tensor("op_10215_cast_fp16")]; tensor var_10216_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_813_cast_fp16)[name = tensor("op_10216_cast_fp16")]; tensor var_10217_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_815_cast_fp16)[name = tensor("op_10217_cast_fp16")]; tensor var_10218_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_817_cast_fp16)[name = tensor("op_10218_cast_fp16")]; tensor var_10219_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_819_cast_fp16)[name = tensor("op_10219_cast_fp16")]; tensor var_10220_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_821_cast_fp16)[name = tensor("op_10220_cast_fp16")]; tensor var_10221_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_823_cast_fp16)[name = tensor("op_10221_cast_fp16")]; tensor var_10222_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_825_cast_fp16)[name = tensor("op_10222_cast_fp16")]; tensor var_10223_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_827_cast_fp16)[name = tensor("op_10223_cast_fp16")]; tensor var_10224_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_829_cast_fp16)[name = tensor("op_10224_cast_fp16")]; tensor var_10225_cast_fp16 = softmax(axis = var_8182, x = aw_chunk_831_cast_fp16)[name = tensor("op_10225_cast_fp16")]; tensor var_10227_equation_0 = const()[name = tensor("op_10227_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10227_cast_fp16 = einsum(equation = var_10227_equation_0, values = (var_10115_cast_fp16, var_10210_cast_fp16))[name = tensor("op_10227_cast_fp16")]; tensor var_10229_equation_0 = const()[name = tensor("op_10229_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10229_cast_fp16 = einsum(equation = var_10229_equation_0, values = (var_10115_cast_fp16, var_10211_cast_fp16))[name = tensor("op_10229_cast_fp16")]; tensor var_10231_equation_0 = const()[name = tensor("op_10231_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10231_cast_fp16 = einsum(equation = var_10231_equation_0, values = (var_10119_cast_fp16, var_10212_cast_fp16))[name = tensor("op_10231_cast_fp16")]; tensor var_10233_equation_0 = const()[name = tensor("op_10233_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10233_cast_fp16 = einsum(equation = var_10233_equation_0, values = (var_10119_cast_fp16, var_10213_cast_fp16))[name = tensor("op_10233_cast_fp16")]; tensor var_10235_equation_0 = const()[name = tensor("op_10235_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10235_cast_fp16 = einsum(equation = var_10235_equation_0, values = (var_10123_cast_fp16, var_10214_cast_fp16))[name = tensor("op_10235_cast_fp16")]; tensor var_10237_equation_0 = const()[name = tensor("op_10237_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10237_cast_fp16 = einsum(equation = var_10237_equation_0, values = (var_10123_cast_fp16, var_10215_cast_fp16))[name = tensor("op_10237_cast_fp16")]; tensor var_10239_equation_0 = const()[name = tensor("op_10239_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10239_cast_fp16 = einsum(equation = var_10239_equation_0, values = (var_10127_cast_fp16, var_10216_cast_fp16))[name = tensor("op_10239_cast_fp16")]; tensor var_10241_equation_0 = const()[name = tensor("op_10241_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10241_cast_fp16 = einsum(equation = var_10241_equation_0, values = (var_10127_cast_fp16, var_10217_cast_fp16))[name = tensor("op_10241_cast_fp16")]; tensor var_10243_equation_0 = const()[name = tensor("op_10243_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10243_cast_fp16 = einsum(equation = var_10243_equation_0, values = (var_10131_cast_fp16, var_10218_cast_fp16))[name = tensor("op_10243_cast_fp16")]; tensor var_10245_equation_0 = const()[name = tensor("op_10245_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10245_cast_fp16 = einsum(equation = var_10245_equation_0, values = (var_10131_cast_fp16, var_10219_cast_fp16))[name = tensor("op_10245_cast_fp16")]; tensor var_10247_equation_0 = const()[name = tensor("op_10247_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10247_cast_fp16 = einsum(equation = var_10247_equation_0, values = (var_10135_cast_fp16, var_10220_cast_fp16))[name = tensor("op_10247_cast_fp16")]; tensor var_10249_equation_0 = const()[name = tensor("op_10249_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10249_cast_fp16 = einsum(equation = var_10249_equation_0, values = (var_10135_cast_fp16, var_10221_cast_fp16))[name = tensor("op_10249_cast_fp16")]; tensor var_10251_equation_0 = const()[name = tensor("op_10251_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10251_cast_fp16 = einsum(equation = var_10251_equation_0, values = (var_10139_cast_fp16, var_10222_cast_fp16))[name = tensor("op_10251_cast_fp16")]; tensor var_10253_equation_0 = const()[name = tensor("op_10253_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10253_cast_fp16 = einsum(equation = var_10253_equation_0, values = (var_10139_cast_fp16, var_10223_cast_fp16))[name = tensor("op_10253_cast_fp16")]; tensor var_10255_equation_0 = const()[name = tensor("op_10255_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10255_cast_fp16 = einsum(equation = var_10255_equation_0, values = (var_10143_cast_fp16, var_10224_cast_fp16))[name = tensor("op_10255_cast_fp16")]; tensor var_10257_equation_0 = const()[name = tensor("op_10257_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10257_cast_fp16 = einsum(equation = var_10257_equation_0, values = (var_10143_cast_fp16, var_10225_cast_fp16))[name = tensor("op_10257_cast_fp16")]; tensor var_10259_interleave_0 = const()[name = tensor("op_10259_interleave_0"), val = tensor(false)]; tensor var_10259_cast_fp16 = concat(axis = var_8160, interleave = var_10259_interleave_0, values = (var_10227_cast_fp16, var_10229_cast_fp16))[name = tensor("op_10259_cast_fp16")]; tensor var_10261_interleave_0 = const()[name = tensor("op_10261_interleave_0"), val = tensor(false)]; tensor var_10261_cast_fp16 = concat(axis = var_8160, interleave = var_10261_interleave_0, values = (var_10231_cast_fp16, var_10233_cast_fp16))[name = tensor("op_10261_cast_fp16")]; tensor var_10263_interleave_0 = const()[name = tensor("op_10263_interleave_0"), val = tensor(false)]; tensor var_10263_cast_fp16 = concat(axis = var_8160, interleave = var_10263_interleave_0, values = (var_10235_cast_fp16, var_10237_cast_fp16))[name = tensor("op_10263_cast_fp16")]; tensor var_10265_interleave_0 = const()[name = tensor("op_10265_interleave_0"), val = tensor(false)]; tensor var_10265_cast_fp16 = concat(axis = var_8160, interleave = var_10265_interleave_0, values = (var_10239_cast_fp16, var_10241_cast_fp16))[name = tensor("op_10265_cast_fp16")]; tensor var_10267_interleave_0 = const()[name = tensor("op_10267_interleave_0"), val = tensor(false)]; tensor var_10267_cast_fp16 = concat(axis = var_8160, interleave = var_10267_interleave_0, values = (var_10243_cast_fp16, var_10245_cast_fp16))[name = tensor("op_10267_cast_fp16")]; tensor var_10269_interleave_0 = const()[name = tensor("op_10269_interleave_0"), val = tensor(false)]; tensor var_10269_cast_fp16 = concat(axis = var_8160, interleave = var_10269_interleave_0, values = (var_10247_cast_fp16, var_10249_cast_fp16))[name = tensor("op_10269_cast_fp16")]; tensor var_10271_interleave_0 = const()[name = tensor("op_10271_interleave_0"), val = tensor(false)]; tensor var_10271_cast_fp16 = concat(axis = var_8160, interleave = var_10271_interleave_0, values = (var_10251_cast_fp16, var_10253_cast_fp16))[name = tensor("op_10271_cast_fp16")]; tensor var_10273_interleave_0 = const()[name = tensor("op_10273_interleave_0"), val = tensor(false)]; tensor var_10273_cast_fp16 = concat(axis = var_8160, interleave = var_10273_interleave_0, values = (var_10255_cast_fp16, var_10257_cast_fp16))[name = tensor("op_10273_cast_fp16")]; tensor input_437_interleave_0 = const()[name = tensor("input_437_interleave_0"), val = tensor(false)]; tensor input_437_cast_fp16 = concat(axis = var_8182, interleave = input_437_interleave_0, values = (var_10259_cast_fp16, var_10261_cast_fp16, var_10263_cast_fp16, var_10265_cast_fp16, var_10267_cast_fp16, var_10269_cast_fp16, var_10271_cast_fp16, var_10273_cast_fp16))[name = tensor("input_437_cast_fp16")]; tensor var_10279 = const()[name = tensor("op_10279"), val = tensor([1, 1])]; tensor var_10281 = const()[name = tensor("op_10281"), val = tensor([1, 1])]; tensor var_10283_pad_type_0 = const()[name = tensor("op_10283_pad_type_0"), val = tensor("custom")]; tensor var_10283_pad_0 = const()[name = tensor("op_10283_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1662604480)))]; tensor up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1663423744)))]; tensor var_10283_cast_fp16 = conv(bias = up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_10281, groups = var_8182, pad = var_10283_pad_0, pad_type = var_10283_pad_type_0, strides = var_10279, weight = up_blocks_2_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_437_cast_fp16)[name = tensor("op_10283_cast_fp16")]; tensor inputs_77_cast_fp16 = add(x = var_10283_cast_fp16, y = inputs_75_cast_fp16)[name = tensor("inputs_77_cast_fp16")]; tensor input_439_axes_0 = const()[name = tensor("input_439_axes_0"), val = tensor([1])]; tensor input_439_gamma_0_to_fp16 = const()[name = tensor("input_439_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1663425088)))]; tensor input_439_beta_0_to_fp16 = const()[name = tensor("input_439_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1663426432)))]; tensor var_10293_to_fp16 = const()[name = tensor("op_10293_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_439_cast_fp16 = layer_norm(axes = input_439_axes_0, beta = input_439_beta_0_to_fp16, epsilon = var_10293_to_fp16, gamma = input_439_gamma_0_to_fp16, x = inputs_77_cast_fp16)[name = tensor("input_439_cast_fp16")]; tensor var_10309 = const()[name = tensor("op_10309"), val = tensor([1, 1])]; tensor var_10311 = const()[name = tensor("op_10311"), val = tensor([1, 1])]; tensor var_10313_pad_type_0 = const()[name = tensor("op_10313_pad_type_0"), val = tensor("custom")]; tensor var_10313_pad_0 = const()[name = tensor("op_10313_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1663427776)))]; tensor up_blocks_2_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1669981440)))]; tensor var_10313_cast_fp16 = conv(bias = up_blocks_2_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_10311, groups = var_8182, pad = var_10313_pad_0, pad_type = var_10313_pad_type_0, strides = var_10309, weight = up_blocks_2_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_439_cast_fp16)[name = tensor("op_10313_cast_fp16")]; tensor var_10314_split_sizes_0 = const()[name = tensor("op_10314_split_sizes_0"), val = tensor([2560, 2560])]; tensor var_10314_axis_0 = const()[name = tensor("op_10314_axis_0"), val = tensor(1)]; tensor var_10314_cast_fp16_0, tensor var_10314_cast_fp16_1 = split(axis = var_10314_axis_0, split_sizes = var_10314_split_sizes_0, x = var_10313_cast_fp16)[name = tensor("op_10314_cast_fp16")]; tensor var_10316_mode_0 = const()[name = tensor("op_10316_mode_0"), val = tensor("EXACT")]; tensor var_10316_cast_fp16 = gelu(mode = var_10316_mode_0, x = var_10314_cast_fp16_1)[name = tensor("op_10316_cast_fp16")]; tensor input_441_cast_fp16 = mul(x = var_10314_cast_fp16_0, y = var_10316_cast_fp16)[name = tensor("input_441_cast_fp16")]; tensor var_10320 = const()[name = tensor("op_10320"), val = tensor([1, 1])]; tensor var_10322 = const()[name = tensor("op_10322"), val = tensor([1, 1])]; tensor var_10324_pad_type_0 = const()[name = tensor("op_10324_pad_type_0"), val = tensor("custom")]; tensor var_10324_pad_0 = const()[name = tensor("op_10324_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1669991744)))]; tensor up_blocks_2_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1673268608)))]; tensor var_10324_cast_fp16 = conv(bias = up_blocks_2_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_10322, groups = var_8182, pad = var_10324_pad_0, pad_type = var_10324_pad_type_0, strides = var_10320, weight = up_blocks_2_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_441_cast_fp16)[name = tensor("op_10324_cast_fp16")]; tensor hidden_states_273_cast_fp16 = add(x = var_10324_cast_fp16, y = inputs_77_cast_fp16)[name = tensor("hidden_states_273_cast_fp16")]; tensor var_10326 = const()[name = tensor("op_10326"), val = tensor([2, 640, 32, 32])]; tensor input_443_cast_fp16 = reshape(shape = var_10326, x = hidden_states_273_cast_fp16)[name = tensor("input_443_cast_fp16")]; tensor var_10330 = const()[name = tensor("op_10330"), val = tensor([1, 1])]; tensor var_10332 = const()[name = tensor("op_10332"), val = tensor([1, 1])]; tensor hidden_states_275_pad_type_0 = const()[name = tensor("hidden_states_275_pad_type_0"), val = tensor("custom")]; tensor hidden_states_275_pad_0 = const()[name = tensor("hidden_states_275_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_2_attentions_2_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1673269952)))]; tensor up_blocks_2_attentions_2_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_2_attentions_2_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1674089216)))]; tensor hidden_states_275_cast_fp16 = conv(bias = up_blocks_2_attentions_2_proj_out_bias_to_fp16, dilations = var_10332, groups = var_8182, pad = hidden_states_275_pad_0, pad_type = hidden_states_275_pad_type_0, strides = var_10330, weight = up_blocks_2_attentions_2_proj_out_weight_to_fp16, x = input_443_cast_fp16)[name = tensor("hidden_states_275_cast_fp16")]; tensor input_445_cast_fp16 = add(x = hidden_states_275_cast_fp16, y = hidden_states_263_cast_fp16)[name = tensor("input_445_cast_fp16")]; tensor input_447_scale_factor_height_0 = const()[name = tensor("input_447_scale_factor_height_0"), val = tensor(0x1p+1)]; tensor input_447_scale_factor_width_0 = const()[name = tensor("input_447_scale_factor_width_0"), val = tensor(0x1p+1)]; tensor input_447_cast_fp16 = upsample_nearest_neighbor(scale_factor_height = input_447_scale_factor_height_0, scale_factor_width = input_447_scale_factor_width_0, x = input_445_cast_fp16)[name = tensor("input_447_cast_fp16")]; tensor var_10341 = const()[name = tensor("op_10341"), val = tensor([1, 1])]; tensor var_10343 = const()[name = tensor("op_10343"), val = tensor([1, 1])]; tensor hidden_states_277_pad_type_0 = const()[name = tensor("hidden_states_277_pad_type_0"), val = tensor("custom")]; tensor hidden_states_277_pad_0 = const()[name = tensor("hidden_states_277_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_2_upsamplers_0_conv_weight_to_fp16 = const()[name = tensor("up_blocks_2_upsamplers_0_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1674090560)))]; tensor up_blocks_2_upsamplers_0_conv_bias_to_fp16 = const()[name = tensor("up_blocks_2_upsamplers_0_conv_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1681463424)))]; tensor hidden_states_277_cast_fp16 = conv(bias = up_blocks_2_upsamplers_0_conv_bias_to_fp16, dilations = var_10343, groups = var_8182, pad = hidden_states_277_pad_0, pad_type = hidden_states_277_pad_type_0, strides = var_10341, weight = up_blocks_2_upsamplers_0_conv_weight_to_fp16, x = input_447_cast_fp16)[name = tensor("hidden_states_277_cast_fp16")]; tensor var_10347 = const()[name = tensor("op_10347"), val = tensor(3)]; tensor var_10375 = const()[name = tensor("op_10375"), val = tensor(1)]; tensor input_449_interleave_0 = const()[name = tensor("input_449_interleave_0"), val = tensor(false)]; tensor input_449_cast_fp16 = concat(axis = var_10375, interleave = input_449_interleave_0, values = (hidden_states_277_cast_fp16, input_61_cast_fp16))[name = tensor("input_449_cast_fp16")]; tensor reshape_204_shape_0 = const()[name = tensor("reshape_204_shape_0"), val = tensor([2, 32, 30, 64, 64])]; tensor reshape_204_cast_fp16 = reshape(shape = reshape_204_shape_0, x = input_449_cast_fp16)[name = tensor("reshape_204_cast_fp16")]; tensor reduce_mean_153_axes_0 = const()[name = tensor("reduce_mean_153_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_153_keep_dims_0 = const()[name = tensor("reduce_mean_153_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_153_cast_fp16 = reduce_mean(axes = reduce_mean_153_axes_0, keep_dims = reduce_mean_153_keep_dims_0, x = reshape_204_cast_fp16)[name = tensor("reduce_mean_153_cast_fp16")]; tensor sub_102_cast_fp16 = sub(x = reshape_204_cast_fp16, y = reduce_mean_153_cast_fp16)[name = tensor("sub_102_cast_fp16")]; tensor square_51_cast_fp16 = square(x = sub_102_cast_fp16)[name = tensor("square_51_cast_fp16")]; tensor reduce_mean_155_axes_0 = const()[name = tensor("reduce_mean_155_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_155_keep_dims_0 = const()[name = tensor("reduce_mean_155_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_155_cast_fp16 = reduce_mean(axes = reduce_mean_155_axes_0, keep_dims = reduce_mean_155_keep_dims_0, x = square_51_cast_fp16)[name = tensor("reduce_mean_155_cast_fp16")]; tensor add_102_y_0_to_fp16 = const()[name = tensor("add_102_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_102_cast_fp16 = add(x = reduce_mean_155_cast_fp16, y = add_102_y_0_to_fp16)[name = tensor("add_102_cast_fp16")]; tensor sqrt_51_cast_fp16 = sqrt(x = add_102_cast_fp16)[name = tensor("sqrt_51_cast_fp16")]; tensor real_div_51_cast_fp16 = real_div(x = sub_102_cast_fp16, y = sqrt_51_cast_fp16)[name = tensor("real_div_51_cast_fp16")]; tensor reshape_205_shape_0 = const()[name = tensor("reshape_205_shape_0"), val = tensor([2, 960, 64, 64])]; tensor reshape_205_cast_fp16 = reshape(shape = reshape_205_shape_0, x = real_div_51_cast_fp16)[name = tensor("reshape_205_cast_fp16")]; tensor add_103_gamma_0_to_fp16 = const()[name = tensor("add_103_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1681464768)))]; tensor add_103_beta_0_to_fp16 = const()[name = tensor("add_103_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1681466752)))]; tensor add_103_epsilon_0_to_fp16 = const()[name = tensor("add_103_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_103_cast_fp16 = batch_norm(beta = add_103_beta_0_to_fp16, epsilon = add_103_epsilon_0_to_fp16, gamma = add_103_gamma_0_to_fp16, mean = add_97_mean_0_to_fp16, variance = add_97_variance_0_to_fp16, x = reshape_205_cast_fp16)[name = tensor("add_103_cast_fp16")]; tensor input_453_cast_fp16 = silu(x = add_103_cast_fp16)[name = tensor("input_453_cast_fp16")]; tensor var_10402 = const()[name = tensor("op_10402"), val = tensor([1, 1])]; tensor var_10404 = const()[name = tensor("op_10404"), val = tensor([1, 1])]; tensor hidden_states_279_pad_type_0 = const()[name = tensor("hidden_states_279_pad_type_0"), val = tensor("custom")]; tensor hidden_states_279_pad_0 = const()[name = tensor("hidden_states_279_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_3_resnets_0_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_0_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1681468736)))]; tensor up_blocks_3_resnets_0_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_0_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1686998400)))]; tensor hidden_states_279_cast_fp16 = conv(bias = up_blocks_3_resnets_0_conv1_bias_to_fp16, dilations = var_10404, groups = var_10375, pad = hidden_states_279_pad_0, pad_type = hidden_states_279_pad_type_0, strides = var_10402, weight = up_blocks_3_resnets_0_conv1_weight_to_fp16, x = input_453_cast_fp16)[name = tensor("hidden_states_279_cast_fp16")]; tensor var_10410 = const()[name = tensor("op_10410"), val = tensor([1, 1])]; tensor var_10412 = const()[name = tensor("op_10412"), val = tensor([1, 1])]; tensor temb_39_pad_type_0 = const()[name = tensor("temb_39_pad_type_0"), val = tensor("custom")]; tensor temb_39_pad_0 = const()[name = tensor("temb_39_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_resnets_0_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_0_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1686999104)))]; tensor up_blocks_3_resnets_0_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_0_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1687818368)))]; tensor temb_39_cast_fp16 = conv(bias = up_blocks_3_resnets_0_time_emb_proj_bias_to_fp16, dilations = var_10412, groups = var_10375, pad = temb_39_pad_0, pad_type = temb_39_pad_type_0, strides = var_10410, weight = up_blocks_3_resnets_0_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_39_cast_fp16")]; tensor input_457_cast_fp16 = add(x = hidden_states_279_cast_fp16, y = temb_39_cast_fp16)[name = tensor("input_457_cast_fp16")]; tensor reshape_208_shape_0 = const()[name = tensor("reshape_208_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_208_cast_fp16 = reshape(shape = reshape_208_shape_0, x = input_457_cast_fp16)[name = tensor("reshape_208_cast_fp16")]; tensor reduce_mean_156_axes_0 = const()[name = tensor("reduce_mean_156_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_156_keep_dims_0 = const()[name = tensor("reduce_mean_156_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_156_cast_fp16 = reduce_mean(axes = reduce_mean_156_axes_0, keep_dims = reduce_mean_156_keep_dims_0, x = reshape_208_cast_fp16)[name = tensor("reduce_mean_156_cast_fp16")]; tensor sub_104_cast_fp16 = sub(x = reshape_208_cast_fp16, y = reduce_mean_156_cast_fp16)[name = tensor("sub_104_cast_fp16")]; tensor square_52_cast_fp16 = square(x = sub_104_cast_fp16)[name = tensor("square_52_cast_fp16")]; tensor reduce_mean_158_axes_0 = const()[name = tensor("reduce_mean_158_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_158_keep_dims_0 = const()[name = tensor("reduce_mean_158_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_158_cast_fp16 = reduce_mean(axes = reduce_mean_158_axes_0, keep_dims = reduce_mean_158_keep_dims_0, x = square_52_cast_fp16)[name = tensor("reduce_mean_158_cast_fp16")]; tensor add_104_y_0_to_fp16 = const()[name = tensor("add_104_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_104_cast_fp16 = add(x = reduce_mean_158_cast_fp16, y = add_104_y_0_to_fp16)[name = tensor("add_104_cast_fp16")]; tensor sqrt_52_cast_fp16 = sqrt(x = add_104_cast_fp16)[name = tensor("sqrt_52_cast_fp16")]; tensor real_div_52_cast_fp16 = real_div(x = sub_104_cast_fp16, y = sqrt_52_cast_fp16)[name = tensor("real_div_52_cast_fp16")]; tensor reshape_209_shape_0 = const()[name = tensor("reshape_209_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_209_cast_fp16 = reshape(shape = reshape_209_shape_0, x = real_div_52_cast_fp16)[name = tensor("reshape_209_cast_fp16")]; tensor add_105_gamma_0_to_fp16 = const()[name = tensor("add_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1687819072)))]; tensor add_105_beta_0_to_fp16 = const()[name = tensor("add_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1687819776)))]; tensor add_105_epsilon_0_to_fp16 = const()[name = tensor("add_105_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_105_cast_fp16 = batch_norm(beta = add_105_beta_0_to_fp16, epsilon = add_105_epsilon_0_to_fp16, gamma = add_105_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_209_cast_fp16)[name = tensor("add_105_cast_fp16")]; tensor input_461_cast_fp16 = silu(x = add_105_cast_fp16)[name = tensor("input_461_cast_fp16")]; tensor var_10422 = const()[name = tensor("op_10422"), val = tensor([1, 1])]; tensor var_10424 = const()[name = tensor("op_10424"), val = tensor([1, 1])]; tensor hidden_states_281_pad_type_0 = const()[name = tensor("hidden_states_281_pad_type_0"), val = tensor("custom")]; tensor hidden_states_281_pad_0 = const()[name = tensor("hidden_states_281_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_3_resnets_0_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_0_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1687820480)))]; tensor up_blocks_3_resnets_0_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_0_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1689663744)))]; tensor hidden_states_281_cast_fp16 = conv(bias = up_blocks_3_resnets_0_conv2_bias_to_fp16, dilations = var_10424, groups = var_10375, pad = hidden_states_281_pad_0, pad_type = hidden_states_281_pad_type_0, strides = var_10422, weight = up_blocks_3_resnets_0_conv2_weight_to_fp16, x = input_461_cast_fp16)[name = tensor("hidden_states_281_cast_fp16")]; tensor var_10429 = const()[name = tensor("op_10429"), val = tensor([1, 1])]; tensor var_10431 = const()[name = tensor("op_10431"), val = tensor([1, 1])]; tensor x_23_pad_type_0 = const()[name = tensor("x_23_pad_type_0"), val = tensor("custom")]; tensor x_23_pad_0 = const()[name = tensor("x_23_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_resnets_0_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_0_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1689664448)))]; tensor up_blocks_3_resnets_0_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_0_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690278912)))]; tensor x_23_cast_fp16 = conv(bias = up_blocks_3_resnets_0_conv_shortcut_bias_to_fp16, dilations = var_10431, groups = var_10375, pad = x_23_pad_0, pad_type = x_23_pad_type_0, strides = var_10429, weight = up_blocks_3_resnets_0_conv_shortcut_weight_to_fp16, x = input_449_cast_fp16)[name = tensor("x_23_cast_fp16")]; tensor hidden_states_283_cast_fp16 = add(x = x_23_cast_fp16, y = hidden_states_281_cast_fp16)[name = tensor("hidden_states_283_cast_fp16")]; tensor reshape_212_shape_0 = const()[name = tensor("reshape_212_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_212_cast_fp16 = reshape(shape = reshape_212_shape_0, x = hidden_states_283_cast_fp16)[name = tensor("reshape_212_cast_fp16")]; tensor reduce_mean_159_axes_0 = const()[name = tensor("reduce_mean_159_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_159_keep_dims_0 = const()[name = tensor("reduce_mean_159_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_159_cast_fp16 = reduce_mean(axes = reduce_mean_159_axes_0, keep_dims = reduce_mean_159_keep_dims_0, x = reshape_212_cast_fp16)[name = tensor("reduce_mean_159_cast_fp16")]; tensor sub_106_cast_fp16 = sub(x = reshape_212_cast_fp16, y = reduce_mean_159_cast_fp16)[name = tensor("sub_106_cast_fp16")]; tensor square_53_cast_fp16 = square(x = sub_106_cast_fp16)[name = tensor("square_53_cast_fp16")]; tensor reduce_mean_161_axes_0 = const()[name = tensor("reduce_mean_161_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_161_keep_dims_0 = const()[name = tensor("reduce_mean_161_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_161_cast_fp16 = reduce_mean(axes = reduce_mean_161_axes_0, keep_dims = reduce_mean_161_keep_dims_0, x = square_53_cast_fp16)[name = tensor("reduce_mean_161_cast_fp16")]; tensor add_106_y_0_to_fp16 = const()[name = tensor("add_106_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_106_cast_fp16 = add(x = reduce_mean_161_cast_fp16, y = add_106_y_0_to_fp16)[name = tensor("add_106_cast_fp16")]; tensor sqrt_53_cast_fp16 = sqrt(x = add_106_cast_fp16)[name = tensor("sqrt_53_cast_fp16")]; tensor real_div_53_cast_fp16 = real_div(x = sub_106_cast_fp16, y = sqrt_53_cast_fp16)[name = tensor("real_div_53_cast_fp16")]; tensor reshape_213_shape_0 = const()[name = tensor("reshape_213_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_213_cast_fp16 = reshape(shape = reshape_213_shape_0, x = real_div_53_cast_fp16)[name = tensor("reshape_213_cast_fp16")]; tensor add_107_gamma_0_to_fp16 = const()[name = tensor("add_107_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690279616)))]; tensor add_107_beta_0_to_fp16 = const()[name = tensor("add_107_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690280320)))]; tensor add_107_epsilon_0_to_fp16 = const()[name = tensor("add_107_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_107_cast_fp16 = batch_norm(beta = add_107_beta_0_to_fp16, epsilon = add_107_epsilon_0_to_fp16, gamma = add_107_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_213_cast_fp16)[name = tensor("add_107_cast_fp16")]; tensor var_10451 = const()[name = tensor("op_10451"), val = tensor([1, 1])]; tensor var_10453 = const()[name = tensor("op_10453"), val = tensor([1, 1])]; tensor hidden_states_285_pad_type_0 = const()[name = tensor("hidden_states_285_pad_type_0"), val = tensor("custom")]; tensor hidden_states_285_pad_0 = const()[name = tensor("hidden_states_285_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690281024)))]; tensor up_blocks_3_attentions_0_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690485888)))]; tensor hidden_states_285_cast_fp16 = conv(bias = up_blocks_3_attentions_0_proj_in_bias_to_fp16, dilations = var_10453, groups = var_10375, pad = hidden_states_285_pad_0, pad_type = hidden_states_285_pad_type_0, strides = var_10451, weight = up_blocks_3_attentions_0_proj_in_weight_to_fp16, x = add_107_cast_fp16)[name = tensor("hidden_states_285_cast_fp16")]; tensor var_10458 = const()[name = tensor("op_10458"), val = tensor([2, 320, 1, 4096])]; tensor inputs_79_cast_fp16 = reshape(shape = var_10458, x = hidden_states_285_cast_fp16)[name = tensor("inputs_79_cast_fp16")]; tensor hidden_states_287_axes_0 = const()[name = tensor("hidden_states_287_axes_0"), val = tensor([1])]; tensor hidden_states_287_gamma_0_to_fp16 = const()[name = tensor("hidden_states_287_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690486592)))]; tensor hidden_states_287_beta_0_to_fp16 = const()[name = tensor("hidden_states_287_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690487296)))]; tensor var_10474_to_fp16 = const()[name = tensor("op_10474_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_287_cast_fp16 = layer_norm(axes = hidden_states_287_axes_0, beta = hidden_states_287_beta_0_to_fp16, epsilon = var_10474_to_fp16, gamma = hidden_states_287_gamma_0_to_fp16, x = inputs_79_cast_fp16)[name = tensor("hidden_states_287_cast_fp16")]; tensor var_10489 = const()[name = tensor("op_10489"), val = tensor([1, 1])]; tensor var_10491 = const()[name = tensor("op_10491"), val = tensor([1, 1])]; tensor q_53_pad_type_0 = const()[name = tensor("q_53_pad_type_0"), val = tensor("custom")]; tensor q_53_pad_0 = const()[name = tensor("q_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690488000)))]; tensor q_53_cast_fp16 = conv(dilations = var_10491, groups = var_10375, pad = q_53_pad_0, pad_type = q_53_pad_type_0, strides = var_10489, weight = up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_287_cast_fp16)[name = tensor("q_53_cast_fp16")]; tensor var_10495 = const()[name = tensor("op_10495"), val = tensor([1, 1])]; tensor var_10497 = const()[name = tensor("op_10497"), val = tensor([1, 1])]; tensor k_105_pad_type_0 = const()[name = tensor("k_105_pad_type_0"), val = tensor("custom")]; tensor k_105_pad_0 = const()[name = tensor("k_105_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690692864)))]; tensor k_105_cast_fp16 = conv(dilations = var_10497, groups = var_10375, pad = k_105_pad_0, pad_type = k_105_pad_type_0, strides = var_10495, weight = up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_287_cast_fp16)[name = tensor("k_105_cast_fp16")]; tensor var_10501 = const()[name = tensor("op_10501"), val = tensor([1, 1])]; tensor var_10503 = const()[name = tensor("op_10503"), val = tensor([1, 1])]; tensor v_53_pad_type_0 = const()[name = tensor("v_53_pad_type_0"), val = tensor("custom")]; tensor v_53_pad_0 = const()[name = tensor("v_53_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1690897728)))]; tensor v_53_cast_fp16 = conv(dilations = var_10503, groups = var_10375, pad = v_53_pad_0, pad_type = v_53_pad_type_0, strides = var_10501, weight = up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_287_cast_fp16)[name = tensor("v_53_cast_fp16")]; tensor var_10507_begin_0 = const()[name = tensor("op_10507_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10507_end_0 = const()[name = tensor("op_10507_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10507_end_mask_0 = const()[name = tensor("op_10507_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10507_cast_fp16 = slice_by_index(begin = var_10507_begin_0, end = var_10507_end_0, end_mask = var_10507_end_mask_0, x = q_53_cast_fp16)[name = tensor("op_10507_cast_fp16")]; tensor var_10511_begin_0 = const()[name = tensor("op_10511_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_10511_end_0 = const()[name = tensor("op_10511_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_10511_end_mask_0 = const()[name = tensor("op_10511_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10511_cast_fp16 = slice_by_index(begin = var_10511_begin_0, end = var_10511_end_0, end_mask = var_10511_end_mask_0, x = q_53_cast_fp16)[name = tensor("op_10511_cast_fp16")]; tensor var_10515_begin_0 = const()[name = tensor("op_10515_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_10515_end_0 = const()[name = tensor("op_10515_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_10515_end_mask_0 = const()[name = tensor("op_10515_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10515_cast_fp16 = slice_by_index(begin = var_10515_begin_0, end = var_10515_end_0, end_mask = var_10515_end_mask_0, x = q_53_cast_fp16)[name = tensor("op_10515_cast_fp16")]; tensor var_10519_begin_0 = const()[name = tensor("op_10519_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_10519_end_0 = const()[name = tensor("op_10519_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_10519_end_mask_0 = const()[name = tensor("op_10519_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10519_cast_fp16 = slice_by_index(begin = var_10519_begin_0, end = var_10519_end_0, end_mask = var_10519_end_mask_0, x = q_53_cast_fp16)[name = tensor("op_10519_cast_fp16")]; tensor var_10523_begin_0 = const()[name = tensor("op_10523_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_10523_end_0 = const()[name = tensor("op_10523_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_10523_end_mask_0 = const()[name = tensor("op_10523_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10523_cast_fp16 = slice_by_index(begin = var_10523_begin_0, end = var_10523_end_0, end_mask = var_10523_end_mask_0, x = q_53_cast_fp16)[name = tensor("op_10523_cast_fp16")]; tensor var_10527_begin_0 = const()[name = tensor("op_10527_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_10527_end_0 = const()[name = tensor("op_10527_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_10527_end_mask_0 = const()[name = tensor("op_10527_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10527_cast_fp16 = slice_by_index(begin = var_10527_begin_0, end = var_10527_end_0, end_mask = var_10527_end_mask_0, x = q_53_cast_fp16)[name = tensor("op_10527_cast_fp16")]; tensor var_10531_begin_0 = const()[name = tensor("op_10531_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_10531_end_0 = const()[name = tensor("op_10531_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_10531_end_mask_0 = const()[name = tensor("op_10531_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10531_cast_fp16 = slice_by_index(begin = var_10531_begin_0, end = var_10531_end_0, end_mask = var_10531_end_mask_0, x = q_53_cast_fp16)[name = tensor("op_10531_cast_fp16")]; tensor var_10535_begin_0 = const()[name = tensor("op_10535_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_10535_end_0 = const()[name = tensor("op_10535_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_10535_end_mask_0 = const()[name = tensor("op_10535_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10535_cast_fp16 = slice_by_index(begin = var_10535_begin_0, end = var_10535_end_0, end_mask = var_10535_end_mask_0, x = q_53_cast_fp16)[name = tensor("op_10535_cast_fp16")]; tensor var_10538_begin_0 = const()[name = tensor("op_10538_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10538_end_0 = const()[name = tensor("op_10538_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_10538_end_mask_0 = const()[name = tensor("op_10538_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10538_cast_fp16 = slice_by_index(begin = var_10538_begin_0, end = var_10538_end_0, end_mask = var_10538_end_mask_0, x = var_10507_cast_fp16)[name = tensor("op_10538_cast_fp16")]; tensor var_10539_begin_0 = const()[name = tensor("op_10539_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10539_end_0 = const()[name = tensor("op_10539_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_10539_end_mask_0 = const()[name = tensor("op_10539_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10539_cast_fp16 = slice_by_index(begin = var_10539_begin_0, end = var_10539_end_0, end_mask = var_10539_end_mask_0, x = var_10507_cast_fp16)[name = tensor("op_10539_cast_fp16")]; tensor var_10540_begin_0 = const()[name = tensor("op_10540_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10540_end_0 = const()[name = tensor("op_10540_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_10540_end_mask_0 = const()[name = tensor("op_10540_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10540_cast_fp16 = slice_by_index(begin = var_10540_begin_0, end = var_10540_end_0, end_mask = var_10540_end_mask_0, x = var_10507_cast_fp16)[name = tensor("op_10540_cast_fp16")]; tensor var_10541_begin_0 = const()[name = tensor("op_10541_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_10541_end_0 = const()[name = tensor("op_10541_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_10541_end_mask_0 = const()[name = tensor("op_10541_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10541_cast_fp16 = slice_by_index(begin = var_10541_begin_0, end = var_10541_end_0, end_mask = var_10541_end_mask_0, x = var_10507_cast_fp16)[name = tensor("op_10541_cast_fp16")]; tensor var_10542_begin_0 = const()[name = tensor("op_10542_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_10542_end_0 = const()[name = tensor("op_10542_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_10542_end_mask_0 = const()[name = tensor("op_10542_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10542_cast_fp16 = slice_by_index(begin = var_10542_begin_0, end = var_10542_end_0, end_mask = var_10542_end_mask_0, x = var_10507_cast_fp16)[name = tensor("op_10542_cast_fp16")]; tensor var_10543_begin_0 = const()[name = tensor("op_10543_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_10543_end_0 = const()[name = tensor("op_10543_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_10543_end_mask_0 = const()[name = tensor("op_10543_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10543_cast_fp16 = slice_by_index(begin = var_10543_begin_0, end = var_10543_end_0, end_mask = var_10543_end_mask_0, x = var_10507_cast_fp16)[name = tensor("op_10543_cast_fp16")]; tensor var_10544_begin_0 = const()[name = tensor("op_10544_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_10544_end_0 = const()[name = tensor("op_10544_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_10544_end_mask_0 = const()[name = tensor("op_10544_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10544_cast_fp16 = slice_by_index(begin = var_10544_begin_0, end = var_10544_end_0, end_mask = var_10544_end_mask_0, x = var_10507_cast_fp16)[name = tensor("op_10544_cast_fp16")]; tensor var_10545_begin_0 = const()[name = tensor("op_10545_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_10545_end_0 = const()[name = tensor("op_10545_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10545_end_mask_0 = const()[name = tensor("op_10545_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10545_cast_fp16 = slice_by_index(begin = var_10545_begin_0, end = var_10545_end_0, end_mask = var_10545_end_mask_0, x = var_10507_cast_fp16)[name = tensor("op_10545_cast_fp16")]; tensor var_10546_begin_0 = const()[name = tensor("op_10546_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10546_end_0 = const()[name = tensor("op_10546_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_10546_end_mask_0 = const()[name = tensor("op_10546_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10546_cast_fp16 = slice_by_index(begin = var_10546_begin_0, end = var_10546_end_0, end_mask = var_10546_end_mask_0, x = var_10511_cast_fp16)[name = tensor("op_10546_cast_fp16")]; tensor var_10547_begin_0 = const()[name = tensor("op_10547_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10547_end_0 = const()[name = tensor("op_10547_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_10547_end_mask_0 = const()[name = tensor("op_10547_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10547_cast_fp16 = slice_by_index(begin = var_10547_begin_0, end = var_10547_end_0, end_mask = var_10547_end_mask_0, x = var_10511_cast_fp16)[name = tensor("op_10547_cast_fp16")]; tensor var_10548_begin_0 = const()[name = tensor("op_10548_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10548_end_0 = const()[name = tensor("op_10548_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_10548_end_mask_0 = const()[name = tensor("op_10548_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10548_cast_fp16 = slice_by_index(begin = var_10548_begin_0, end = var_10548_end_0, end_mask = var_10548_end_mask_0, x = var_10511_cast_fp16)[name = tensor("op_10548_cast_fp16")]; tensor var_10549_begin_0 = const()[name = tensor("op_10549_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_10549_end_0 = const()[name = tensor("op_10549_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_10549_end_mask_0 = const()[name = tensor("op_10549_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10549_cast_fp16 = slice_by_index(begin = var_10549_begin_0, end = var_10549_end_0, end_mask = var_10549_end_mask_0, x = var_10511_cast_fp16)[name = tensor("op_10549_cast_fp16")]; tensor var_10550_begin_0 = const()[name = tensor("op_10550_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_10550_end_0 = const()[name = tensor("op_10550_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_10550_end_mask_0 = const()[name = tensor("op_10550_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10550_cast_fp16 = slice_by_index(begin = var_10550_begin_0, end = var_10550_end_0, end_mask = var_10550_end_mask_0, x = var_10511_cast_fp16)[name = tensor("op_10550_cast_fp16")]; tensor var_10551_begin_0 = const()[name = tensor("op_10551_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_10551_end_0 = const()[name = tensor("op_10551_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_10551_end_mask_0 = const()[name = tensor("op_10551_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10551_cast_fp16 = slice_by_index(begin = var_10551_begin_0, end = var_10551_end_0, end_mask = var_10551_end_mask_0, x = var_10511_cast_fp16)[name = tensor("op_10551_cast_fp16")]; tensor var_10552_begin_0 = const()[name = tensor("op_10552_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_10552_end_0 = const()[name = tensor("op_10552_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_10552_end_mask_0 = const()[name = tensor("op_10552_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10552_cast_fp16 = slice_by_index(begin = var_10552_begin_0, end = var_10552_end_0, end_mask = var_10552_end_mask_0, x = var_10511_cast_fp16)[name = tensor("op_10552_cast_fp16")]; tensor var_10553_begin_0 = const()[name = tensor("op_10553_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_10553_end_0 = const()[name = tensor("op_10553_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10553_end_mask_0 = const()[name = tensor("op_10553_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10553_cast_fp16 = slice_by_index(begin = var_10553_begin_0, end = var_10553_end_0, end_mask = var_10553_end_mask_0, x = var_10511_cast_fp16)[name = tensor("op_10553_cast_fp16")]; tensor var_10554_begin_0 = const()[name = tensor("op_10554_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10554_end_0 = const()[name = tensor("op_10554_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_10554_end_mask_0 = const()[name = tensor("op_10554_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10554_cast_fp16 = slice_by_index(begin = var_10554_begin_0, end = var_10554_end_0, end_mask = var_10554_end_mask_0, x = var_10515_cast_fp16)[name = tensor("op_10554_cast_fp16")]; tensor var_10555_begin_0 = const()[name = tensor("op_10555_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10555_end_0 = const()[name = tensor("op_10555_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_10555_end_mask_0 = const()[name = tensor("op_10555_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10555_cast_fp16 = slice_by_index(begin = var_10555_begin_0, end = var_10555_end_0, end_mask = var_10555_end_mask_0, x = var_10515_cast_fp16)[name = tensor("op_10555_cast_fp16")]; tensor var_10556_begin_0 = const()[name = tensor("op_10556_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10556_end_0 = const()[name = tensor("op_10556_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_10556_end_mask_0 = const()[name = tensor("op_10556_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10556_cast_fp16 = slice_by_index(begin = var_10556_begin_0, end = var_10556_end_0, end_mask = var_10556_end_mask_0, x = var_10515_cast_fp16)[name = tensor("op_10556_cast_fp16")]; tensor var_10557_begin_0 = const()[name = tensor("op_10557_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_10557_end_0 = const()[name = tensor("op_10557_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_10557_end_mask_0 = const()[name = tensor("op_10557_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10557_cast_fp16 = slice_by_index(begin = var_10557_begin_0, end = var_10557_end_0, end_mask = var_10557_end_mask_0, x = var_10515_cast_fp16)[name = tensor("op_10557_cast_fp16")]; tensor var_10558_begin_0 = const()[name = tensor("op_10558_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_10558_end_0 = const()[name = tensor("op_10558_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_10558_end_mask_0 = const()[name = tensor("op_10558_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10558_cast_fp16 = slice_by_index(begin = var_10558_begin_0, end = var_10558_end_0, end_mask = var_10558_end_mask_0, x = var_10515_cast_fp16)[name = tensor("op_10558_cast_fp16")]; tensor var_10559_begin_0 = const()[name = tensor("op_10559_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_10559_end_0 = const()[name = tensor("op_10559_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_10559_end_mask_0 = const()[name = tensor("op_10559_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10559_cast_fp16 = slice_by_index(begin = var_10559_begin_0, end = var_10559_end_0, end_mask = var_10559_end_mask_0, x = var_10515_cast_fp16)[name = tensor("op_10559_cast_fp16")]; tensor var_10560_begin_0 = const()[name = tensor("op_10560_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_10560_end_0 = const()[name = tensor("op_10560_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_10560_end_mask_0 = const()[name = tensor("op_10560_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10560_cast_fp16 = slice_by_index(begin = var_10560_begin_0, end = var_10560_end_0, end_mask = var_10560_end_mask_0, x = var_10515_cast_fp16)[name = tensor("op_10560_cast_fp16")]; tensor var_10561_begin_0 = const()[name = tensor("op_10561_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_10561_end_0 = const()[name = tensor("op_10561_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10561_end_mask_0 = const()[name = tensor("op_10561_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10561_cast_fp16 = slice_by_index(begin = var_10561_begin_0, end = var_10561_end_0, end_mask = var_10561_end_mask_0, x = var_10515_cast_fp16)[name = tensor("op_10561_cast_fp16")]; tensor var_10562_begin_0 = const()[name = tensor("op_10562_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10562_end_0 = const()[name = tensor("op_10562_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_10562_end_mask_0 = const()[name = tensor("op_10562_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10562_cast_fp16 = slice_by_index(begin = var_10562_begin_0, end = var_10562_end_0, end_mask = var_10562_end_mask_0, x = var_10519_cast_fp16)[name = tensor("op_10562_cast_fp16")]; tensor var_10563_begin_0 = const()[name = tensor("op_10563_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10563_end_0 = const()[name = tensor("op_10563_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_10563_end_mask_0 = const()[name = tensor("op_10563_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10563_cast_fp16 = slice_by_index(begin = var_10563_begin_0, end = var_10563_end_0, end_mask = var_10563_end_mask_0, x = var_10519_cast_fp16)[name = tensor("op_10563_cast_fp16")]; tensor var_10564_begin_0 = const()[name = tensor("op_10564_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10564_end_0 = const()[name = tensor("op_10564_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_10564_end_mask_0 = const()[name = tensor("op_10564_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10564_cast_fp16 = slice_by_index(begin = var_10564_begin_0, end = var_10564_end_0, end_mask = var_10564_end_mask_0, x = var_10519_cast_fp16)[name = tensor("op_10564_cast_fp16")]; tensor var_10565_begin_0 = const()[name = tensor("op_10565_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_10565_end_0 = const()[name = tensor("op_10565_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_10565_end_mask_0 = const()[name = tensor("op_10565_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10565_cast_fp16 = slice_by_index(begin = var_10565_begin_0, end = var_10565_end_0, end_mask = var_10565_end_mask_0, x = var_10519_cast_fp16)[name = tensor("op_10565_cast_fp16")]; tensor var_10566_begin_0 = const()[name = tensor("op_10566_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_10566_end_0 = const()[name = tensor("op_10566_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_10566_end_mask_0 = const()[name = tensor("op_10566_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10566_cast_fp16 = slice_by_index(begin = var_10566_begin_0, end = var_10566_end_0, end_mask = var_10566_end_mask_0, x = var_10519_cast_fp16)[name = tensor("op_10566_cast_fp16")]; tensor var_10567_begin_0 = const()[name = tensor("op_10567_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_10567_end_0 = const()[name = tensor("op_10567_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_10567_end_mask_0 = const()[name = tensor("op_10567_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10567_cast_fp16 = slice_by_index(begin = var_10567_begin_0, end = var_10567_end_0, end_mask = var_10567_end_mask_0, x = var_10519_cast_fp16)[name = tensor("op_10567_cast_fp16")]; tensor var_10568_begin_0 = const()[name = tensor("op_10568_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_10568_end_0 = const()[name = tensor("op_10568_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_10568_end_mask_0 = const()[name = tensor("op_10568_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10568_cast_fp16 = slice_by_index(begin = var_10568_begin_0, end = var_10568_end_0, end_mask = var_10568_end_mask_0, x = var_10519_cast_fp16)[name = tensor("op_10568_cast_fp16")]; tensor var_10569_begin_0 = const()[name = tensor("op_10569_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_10569_end_0 = const()[name = tensor("op_10569_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10569_end_mask_0 = const()[name = tensor("op_10569_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10569_cast_fp16 = slice_by_index(begin = var_10569_begin_0, end = var_10569_end_0, end_mask = var_10569_end_mask_0, x = var_10519_cast_fp16)[name = tensor("op_10569_cast_fp16")]; tensor var_10570_begin_0 = const()[name = tensor("op_10570_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10570_end_0 = const()[name = tensor("op_10570_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_10570_end_mask_0 = const()[name = tensor("op_10570_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10570_cast_fp16 = slice_by_index(begin = var_10570_begin_0, end = var_10570_end_0, end_mask = var_10570_end_mask_0, x = var_10523_cast_fp16)[name = tensor("op_10570_cast_fp16")]; tensor var_10571_begin_0 = const()[name = tensor("op_10571_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10571_end_0 = const()[name = tensor("op_10571_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_10571_end_mask_0 = const()[name = tensor("op_10571_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10571_cast_fp16 = slice_by_index(begin = var_10571_begin_0, end = var_10571_end_0, end_mask = var_10571_end_mask_0, x = var_10523_cast_fp16)[name = tensor("op_10571_cast_fp16")]; tensor var_10572_begin_0 = const()[name = tensor("op_10572_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10572_end_0 = const()[name = tensor("op_10572_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_10572_end_mask_0 = const()[name = tensor("op_10572_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10572_cast_fp16 = slice_by_index(begin = var_10572_begin_0, end = var_10572_end_0, end_mask = var_10572_end_mask_0, x = var_10523_cast_fp16)[name = tensor("op_10572_cast_fp16")]; tensor var_10573_begin_0 = const()[name = tensor("op_10573_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_10573_end_0 = const()[name = tensor("op_10573_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_10573_end_mask_0 = const()[name = tensor("op_10573_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10573_cast_fp16 = slice_by_index(begin = var_10573_begin_0, end = var_10573_end_0, end_mask = var_10573_end_mask_0, x = var_10523_cast_fp16)[name = tensor("op_10573_cast_fp16")]; tensor var_10574_begin_0 = const()[name = tensor("op_10574_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_10574_end_0 = const()[name = tensor("op_10574_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_10574_end_mask_0 = const()[name = tensor("op_10574_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10574_cast_fp16 = slice_by_index(begin = var_10574_begin_0, end = var_10574_end_0, end_mask = var_10574_end_mask_0, x = var_10523_cast_fp16)[name = tensor("op_10574_cast_fp16")]; tensor var_10575_begin_0 = const()[name = tensor("op_10575_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_10575_end_0 = const()[name = tensor("op_10575_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_10575_end_mask_0 = const()[name = tensor("op_10575_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10575_cast_fp16 = slice_by_index(begin = var_10575_begin_0, end = var_10575_end_0, end_mask = var_10575_end_mask_0, x = var_10523_cast_fp16)[name = tensor("op_10575_cast_fp16")]; tensor var_10576_begin_0 = const()[name = tensor("op_10576_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_10576_end_0 = const()[name = tensor("op_10576_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_10576_end_mask_0 = const()[name = tensor("op_10576_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10576_cast_fp16 = slice_by_index(begin = var_10576_begin_0, end = var_10576_end_0, end_mask = var_10576_end_mask_0, x = var_10523_cast_fp16)[name = tensor("op_10576_cast_fp16")]; tensor var_10577_begin_0 = const()[name = tensor("op_10577_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_10577_end_0 = const()[name = tensor("op_10577_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10577_end_mask_0 = const()[name = tensor("op_10577_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10577_cast_fp16 = slice_by_index(begin = var_10577_begin_0, end = var_10577_end_0, end_mask = var_10577_end_mask_0, x = var_10523_cast_fp16)[name = tensor("op_10577_cast_fp16")]; tensor var_10578_begin_0 = const()[name = tensor("op_10578_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10578_end_0 = const()[name = tensor("op_10578_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_10578_end_mask_0 = const()[name = tensor("op_10578_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10578_cast_fp16 = slice_by_index(begin = var_10578_begin_0, end = var_10578_end_0, end_mask = var_10578_end_mask_0, x = var_10527_cast_fp16)[name = tensor("op_10578_cast_fp16")]; tensor var_10579_begin_0 = const()[name = tensor("op_10579_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10579_end_0 = const()[name = tensor("op_10579_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_10579_end_mask_0 = const()[name = tensor("op_10579_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10579_cast_fp16 = slice_by_index(begin = var_10579_begin_0, end = var_10579_end_0, end_mask = var_10579_end_mask_0, x = var_10527_cast_fp16)[name = tensor("op_10579_cast_fp16")]; tensor var_10580_begin_0 = const()[name = tensor("op_10580_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10580_end_0 = const()[name = tensor("op_10580_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_10580_end_mask_0 = const()[name = tensor("op_10580_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10580_cast_fp16 = slice_by_index(begin = var_10580_begin_0, end = var_10580_end_0, end_mask = var_10580_end_mask_0, x = var_10527_cast_fp16)[name = tensor("op_10580_cast_fp16")]; tensor var_10581_begin_0 = const()[name = tensor("op_10581_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_10581_end_0 = const()[name = tensor("op_10581_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_10581_end_mask_0 = const()[name = tensor("op_10581_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10581_cast_fp16 = slice_by_index(begin = var_10581_begin_0, end = var_10581_end_0, end_mask = var_10581_end_mask_0, x = var_10527_cast_fp16)[name = tensor("op_10581_cast_fp16")]; tensor var_10582_begin_0 = const()[name = tensor("op_10582_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_10582_end_0 = const()[name = tensor("op_10582_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_10582_end_mask_0 = const()[name = tensor("op_10582_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10582_cast_fp16 = slice_by_index(begin = var_10582_begin_0, end = var_10582_end_0, end_mask = var_10582_end_mask_0, x = var_10527_cast_fp16)[name = tensor("op_10582_cast_fp16")]; tensor var_10583_begin_0 = const()[name = tensor("op_10583_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_10583_end_0 = const()[name = tensor("op_10583_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_10583_end_mask_0 = const()[name = tensor("op_10583_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10583_cast_fp16 = slice_by_index(begin = var_10583_begin_0, end = var_10583_end_0, end_mask = var_10583_end_mask_0, x = var_10527_cast_fp16)[name = tensor("op_10583_cast_fp16")]; tensor var_10584_begin_0 = const()[name = tensor("op_10584_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_10584_end_0 = const()[name = tensor("op_10584_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_10584_end_mask_0 = const()[name = tensor("op_10584_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10584_cast_fp16 = slice_by_index(begin = var_10584_begin_0, end = var_10584_end_0, end_mask = var_10584_end_mask_0, x = var_10527_cast_fp16)[name = tensor("op_10584_cast_fp16")]; tensor var_10585_begin_0 = const()[name = tensor("op_10585_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_10585_end_0 = const()[name = tensor("op_10585_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10585_end_mask_0 = const()[name = tensor("op_10585_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10585_cast_fp16 = slice_by_index(begin = var_10585_begin_0, end = var_10585_end_0, end_mask = var_10585_end_mask_0, x = var_10527_cast_fp16)[name = tensor("op_10585_cast_fp16")]; tensor var_10586_begin_0 = const()[name = tensor("op_10586_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10586_end_0 = const()[name = tensor("op_10586_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_10586_end_mask_0 = const()[name = tensor("op_10586_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10586_cast_fp16 = slice_by_index(begin = var_10586_begin_0, end = var_10586_end_0, end_mask = var_10586_end_mask_0, x = var_10531_cast_fp16)[name = tensor("op_10586_cast_fp16")]; tensor var_10587_begin_0 = const()[name = tensor("op_10587_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10587_end_0 = const()[name = tensor("op_10587_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_10587_end_mask_0 = const()[name = tensor("op_10587_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10587_cast_fp16 = slice_by_index(begin = var_10587_begin_0, end = var_10587_end_0, end_mask = var_10587_end_mask_0, x = var_10531_cast_fp16)[name = tensor("op_10587_cast_fp16")]; tensor var_10588_begin_0 = const()[name = tensor("op_10588_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10588_end_0 = const()[name = tensor("op_10588_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_10588_end_mask_0 = const()[name = tensor("op_10588_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10588_cast_fp16 = slice_by_index(begin = var_10588_begin_0, end = var_10588_end_0, end_mask = var_10588_end_mask_0, x = var_10531_cast_fp16)[name = tensor("op_10588_cast_fp16")]; tensor var_10589_begin_0 = const()[name = tensor("op_10589_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_10589_end_0 = const()[name = tensor("op_10589_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_10589_end_mask_0 = const()[name = tensor("op_10589_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10589_cast_fp16 = slice_by_index(begin = var_10589_begin_0, end = var_10589_end_0, end_mask = var_10589_end_mask_0, x = var_10531_cast_fp16)[name = tensor("op_10589_cast_fp16")]; tensor var_10590_begin_0 = const()[name = tensor("op_10590_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_10590_end_0 = const()[name = tensor("op_10590_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_10590_end_mask_0 = const()[name = tensor("op_10590_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10590_cast_fp16 = slice_by_index(begin = var_10590_begin_0, end = var_10590_end_0, end_mask = var_10590_end_mask_0, x = var_10531_cast_fp16)[name = tensor("op_10590_cast_fp16")]; tensor var_10591_begin_0 = const()[name = tensor("op_10591_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_10591_end_0 = const()[name = tensor("op_10591_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_10591_end_mask_0 = const()[name = tensor("op_10591_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10591_cast_fp16 = slice_by_index(begin = var_10591_begin_0, end = var_10591_end_0, end_mask = var_10591_end_mask_0, x = var_10531_cast_fp16)[name = tensor("op_10591_cast_fp16")]; tensor var_10592_begin_0 = const()[name = tensor("op_10592_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_10592_end_0 = const()[name = tensor("op_10592_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_10592_end_mask_0 = const()[name = tensor("op_10592_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10592_cast_fp16 = slice_by_index(begin = var_10592_begin_0, end = var_10592_end_0, end_mask = var_10592_end_mask_0, x = var_10531_cast_fp16)[name = tensor("op_10592_cast_fp16")]; tensor var_10593_begin_0 = const()[name = tensor("op_10593_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_10593_end_0 = const()[name = tensor("op_10593_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10593_end_mask_0 = const()[name = tensor("op_10593_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10593_cast_fp16 = slice_by_index(begin = var_10593_begin_0, end = var_10593_end_0, end_mask = var_10593_end_mask_0, x = var_10531_cast_fp16)[name = tensor("op_10593_cast_fp16")]; tensor var_10594_begin_0 = const()[name = tensor("op_10594_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10594_end_0 = const()[name = tensor("op_10594_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_10594_end_mask_0 = const()[name = tensor("op_10594_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10594_cast_fp16 = slice_by_index(begin = var_10594_begin_0, end = var_10594_end_0, end_mask = var_10594_end_mask_0, x = var_10535_cast_fp16)[name = tensor("op_10594_cast_fp16")]; tensor var_10595_begin_0 = const()[name = tensor("op_10595_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10595_end_0 = const()[name = tensor("op_10595_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_10595_end_mask_0 = const()[name = tensor("op_10595_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10595_cast_fp16 = slice_by_index(begin = var_10595_begin_0, end = var_10595_end_0, end_mask = var_10595_end_mask_0, x = var_10535_cast_fp16)[name = tensor("op_10595_cast_fp16")]; tensor var_10596_begin_0 = const()[name = tensor("op_10596_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10596_end_0 = const()[name = tensor("op_10596_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_10596_end_mask_0 = const()[name = tensor("op_10596_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10596_cast_fp16 = slice_by_index(begin = var_10596_begin_0, end = var_10596_end_0, end_mask = var_10596_end_mask_0, x = var_10535_cast_fp16)[name = tensor("op_10596_cast_fp16")]; tensor var_10597_begin_0 = const()[name = tensor("op_10597_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_10597_end_0 = const()[name = tensor("op_10597_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_10597_end_mask_0 = const()[name = tensor("op_10597_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10597_cast_fp16 = slice_by_index(begin = var_10597_begin_0, end = var_10597_end_0, end_mask = var_10597_end_mask_0, x = var_10535_cast_fp16)[name = tensor("op_10597_cast_fp16")]; tensor var_10598_begin_0 = const()[name = tensor("op_10598_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_10598_end_0 = const()[name = tensor("op_10598_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_10598_end_mask_0 = const()[name = tensor("op_10598_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10598_cast_fp16 = slice_by_index(begin = var_10598_begin_0, end = var_10598_end_0, end_mask = var_10598_end_mask_0, x = var_10535_cast_fp16)[name = tensor("op_10598_cast_fp16")]; tensor var_10599_begin_0 = const()[name = tensor("op_10599_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_10599_end_0 = const()[name = tensor("op_10599_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_10599_end_mask_0 = const()[name = tensor("op_10599_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10599_cast_fp16 = slice_by_index(begin = var_10599_begin_0, end = var_10599_end_0, end_mask = var_10599_end_mask_0, x = var_10535_cast_fp16)[name = tensor("op_10599_cast_fp16")]; tensor var_10600_begin_0 = const()[name = tensor("op_10600_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_10600_end_0 = const()[name = tensor("op_10600_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_10600_end_mask_0 = const()[name = tensor("op_10600_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10600_cast_fp16 = slice_by_index(begin = var_10600_begin_0, end = var_10600_end_0, end_mask = var_10600_end_mask_0, x = var_10535_cast_fp16)[name = tensor("op_10600_cast_fp16")]; tensor var_10601_begin_0 = const()[name = tensor("op_10601_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_10601_end_0 = const()[name = tensor("op_10601_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10601_end_mask_0 = const()[name = tensor("op_10601_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10601_cast_fp16 = slice_by_index(begin = var_10601_begin_0, end = var_10601_end_0, end_mask = var_10601_end_mask_0, x = var_10535_cast_fp16)[name = tensor("op_10601_cast_fp16")]; tensor k_107_perm_0 = const()[name = tensor("k_107_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_10606_begin_0 = const()[name = tensor("op_10606_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10606_end_0 = const()[name = tensor("op_10606_end_0"), val = tensor([2, 4096, 1, 40])]; tensor var_10606_end_mask_0 = const()[name = tensor("op_10606_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_5 = transpose(perm = k_107_perm_0, x = k_105_cast_fp16)[name = tensor("transpose_5")]; tensor var_10606_cast_fp16 = slice_by_index(begin = var_10606_begin_0, end = var_10606_end_0, end_mask = var_10606_end_mask_0, x = transpose_5)[name = tensor("op_10606_cast_fp16")]; tensor var_10610_begin_0 = const()[name = tensor("op_10610_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_10610_end_0 = const()[name = tensor("op_10610_end_0"), val = tensor([2, 4096, 1, 80])]; tensor var_10610_end_mask_0 = const()[name = tensor("op_10610_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10610_cast_fp16 = slice_by_index(begin = var_10610_begin_0, end = var_10610_end_0, end_mask = var_10610_end_mask_0, x = transpose_5)[name = tensor("op_10610_cast_fp16")]; tensor var_10614_begin_0 = const()[name = tensor("op_10614_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_10614_end_0 = const()[name = tensor("op_10614_end_0"), val = tensor([2, 4096, 1, 120])]; tensor var_10614_end_mask_0 = const()[name = tensor("op_10614_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10614_cast_fp16 = slice_by_index(begin = var_10614_begin_0, end = var_10614_end_0, end_mask = var_10614_end_mask_0, x = transpose_5)[name = tensor("op_10614_cast_fp16")]; tensor var_10618_begin_0 = const()[name = tensor("op_10618_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_10618_end_0 = const()[name = tensor("op_10618_end_0"), val = tensor([2, 4096, 1, 160])]; tensor var_10618_end_mask_0 = const()[name = tensor("op_10618_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10618_cast_fp16 = slice_by_index(begin = var_10618_begin_0, end = var_10618_end_0, end_mask = var_10618_end_mask_0, x = transpose_5)[name = tensor("op_10618_cast_fp16")]; tensor var_10622_begin_0 = const()[name = tensor("op_10622_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_10622_end_0 = const()[name = tensor("op_10622_end_0"), val = tensor([2, 4096, 1, 200])]; tensor var_10622_end_mask_0 = const()[name = tensor("op_10622_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10622_cast_fp16 = slice_by_index(begin = var_10622_begin_0, end = var_10622_end_0, end_mask = var_10622_end_mask_0, x = transpose_5)[name = tensor("op_10622_cast_fp16")]; tensor var_10626_begin_0 = const()[name = tensor("op_10626_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_10626_end_0 = const()[name = tensor("op_10626_end_0"), val = tensor([2, 4096, 1, 240])]; tensor var_10626_end_mask_0 = const()[name = tensor("op_10626_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10626_cast_fp16 = slice_by_index(begin = var_10626_begin_0, end = var_10626_end_0, end_mask = var_10626_end_mask_0, x = transpose_5)[name = tensor("op_10626_cast_fp16")]; tensor var_10630_begin_0 = const()[name = tensor("op_10630_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_10630_end_0 = const()[name = tensor("op_10630_end_0"), val = tensor([2, 4096, 1, 280])]; tensor var_10630_end_mask_0 = const()[name = tensor("op_10630_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10630_cast_fp16 = slice_by_index(begin = var_10630_begin_0, end = var_10630_end_0, end_mask = var_10630_end_mask_0, x = transpose_5)[name = tensor("op_10630_cast_fp16")]; tensor var_10634_begin_0 = const()[name = tensor("op_10634_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_10634_end_0 = const()[name = tensor("op_10634_end_0"), val = tensor([2, 4096, 1, 320])]; tensor var_10634_end_mask_0 = const()[name = tensor("op_10634_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10634_cast_fp16 = slice_by_index(begin = var_10634_begin_0, end = var_10634_end_0, end_mask = var_10634_end_mask_0, x = transpose_5)[name = tensor("op_10634_cast_fp16")]; tensor var_10636_begin_0 = const()[name = tensor("op_10636_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10636_end_0 = const()[name = tensor("op_10636_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_10636_end_mask_0 = const()[name = tensor("op_10636_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10636_cast_fp16 = slice_by_index(begin = var_10636_begin_0, end = var_10636_end_0, end_mask = var_10636_end_mask_0, x = v_53_cast_fp16)[name = tensor("op_10636_cast_fp16")]; tensor var_10640_begin_0 = const()[name = tensor("op_10640_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_10640_end_0 = const()[name = tensor("op_10640_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_10640_end_mask_0 = const()[name = tensor("op_10640_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10640_cast_fp16 = slice_by_index(begin = var_10640_begin_0, end = var_10640_end_0, end_mask = var_10640_end_mask_0, x = v_53_cast_fp16)[name = tensor("op_10640_cast_fp16")]; tensor var_10644_begin_0 = const()[name = tensor("op_10644_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_10644_end_0 = const()[name = tensor("op_10644_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_10644_end_mask_0 = const()[name = tensor("op_10644_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10644_cast_fp16 = slice_by_index(begin = var_10644_begin_0, end = var_10644_end_0, end_mask = var_10644_end_mask_0, x = v_53_cast_fp16)[name = tensor("op_10644_cast_fp16")]; tensor var_10648_begin_0 = const()[name = tensor("op_10648_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_10648_end_0 = const()[name = tensor("op_10648_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_10648_end_mask_0 = const()[name = tensor("op_10648_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10648_cast_fp16 = slice_by_index(begin = var_10648_begin_0, end = var_10648_end_0, end_mask = var_10648_end_mask_0, x = v_53_cast_fp16)[name = tensor("op_10648_cast_fp16")]; tensor var_10652_begin_0 = const()[name = tensor("op_10652_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_10652_end_0 = const()[name = tensor("op_10652_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_10652_end_mask_0 = const()[name = tensor("op_10652_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10652_cast_fp16 = slice_by_index(begin = var_10652_begin_0, end = var_10652_end_0, end_mask = var_10652_end_mask_0, x = v_53_cast_fp16)[name = tensor("op_10652_cast_fp16")]; tensor var_10656_begin_0 = const()[name = tensor("op_10656_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_10656_end_0 = const()[name = tensor("op_10656_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_10656_end_mask_0 = const()[name = tensor("op_10656_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10656_cast_fp16 = slice_by_index(begin = var_10656_begin_0, end = var_10656_end_0, end_mask = var_10656_end_mask_0, x = v_53_cast_fp16)[name = tensor("op_10656_cast_fp16")]; tensor var_10660_begin_0 = const()[name = tensor("op_10660_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_10660_end_0 = const()[name = tensor("op_10660_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_10660_end_mask_0 = const()[name = tensor("op_10660_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10660_cast_fp16 = slice_by_index(begin = var_10660_begin_0, end = var_10660_end_0, end_mask = var_10660_end_mask_0, x = v_53_cast_fp16)[name = tensor("op_10660_cast_fp16")]; tensor var_10664_begin_0 = const()[name = tensor("op_10664_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_10664_end_0 = const()[name = tensor("op_10664_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_10664_end_mask_0 = const()[name = tensor("op_10664_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10664_cast_fp16 = slice_by_index(begin = var_10664_begin_0, end = var_10664_end_0, end_mask = var_10664_end_mask_0, x = v_53_cast_fp16)[name = tensor("op_10664_cast_fp16")]; tensor var_10668_equation_0 = const()[name = tensor("op_10668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10668_cast_fp16 = einsum(equation = var_10668_equation_0, values = (var_10606_cast_fp16, var_10538_cast_fp16))[name = tensor("op_10668_cast_fp16")]; tensor var_10669_to_fp16 = const()[name = tensor("op_10669_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_833_cast_fp16 = mul(x = var_10668_cast_fp16, y = var_10669_to_fp16)[name = tensor("aw_chunk_833_cast_fp16")]; tensor var_10672_equation_0 = const()[name = tensor("op_10672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10672_cast_fp16 = einsum(equation = var_10672_equation_0, values = (var_10606_cast_fp16, var_10539_cast_fp16))[name = tensor("op_10672_cast_fp16")]; tensor var_10673_to_fp16 = const()[name = tensor("op_10673_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_835_cast_fp16 = mul(x = var_10672_cast_fp16, y = var_10673_to_fp16)[name = tensor("aw_chunk_835_cast_fp16")]; tensor var_10676_equation_0 = const()[name = tensor("op_10676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10676_cast_fp16 = einsum(equation = var_10676_equation_0, values = (var_10606_cast_fp16, var_10540_cast_fp16))[name = tensor("op_10676_cast_fp16")]; tensor var_10677_to_fp16 = const()[name = tensor("op_10677_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_837_cast_fp16 = mul(x = var_10676_cast_fp16, y = var_10677_to_fp16)[name = tensor("aw_chunk_837_cast_fp16")]; tensor var_10680_equation_0 = const()[name = tensor("op_10680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10680_cast_fp16 = einsum(equation = var_10680_equation_0, values = (var_10606_cast_fp16, var_10541_cast_fp16))[name = tensor("op_10680_cast_fp16")]; tensor var_10681_to_fp16 = const()[name = tensor("op_10681_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_839_cast_fp16 = mul(x = var_10680_cast_fp16, y = var_10681_to_fp16)[name = tensor("aw_chunk_839_cast_fp16")]; tensor var_10684_equation_0 = const()[name = tensor("op_10684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10684_cast_fp16 = einsum(equation = var_10684_equation_0, values = (var_10606_cast_fp16, var_10542_cast_fp16))[name = tensor("op_10684_cast_fp16")]; tensor var_10685_to_fp16 = const()[name = tensor("op_10685_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_841_cast_fp16 = mul(x = var_10684_cast_fp16, y = var_10685_to_fp16)[name = tensor("aw_chunk_841_cast_fp16")]; tensor var_10688_equation_0 = const()[name = tensor("op_10688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10688_cast_fp16 = einsum(equation = var_10688_equation_0, values = (var_10606_cast_fp16, var_10543_cast_fp16))[name = tensor("op_10688_cast_fp16")]; tensor var_10689_to_fp16 = const()[name = tensor("op_10689_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_843_cast_fp16 = mul(x = var_10688_cast_fp16, y = var_10689_to_fp16)[name = tensor("aw_chunk_843_cast_fp16")]; tensor var_10692_equation_0 = const()[name = tensor("op_10692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10692_cast_fp16 = einsum(equation = var_10692_equation_0, values = (var_10606_cast_fp16, var_10544_cast_fp16))[name = tensor("op_10692_cast_fp16")]; tensor var_10693_to_fp16 = const()[name = tensor("op_10693_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_845_cast_fp16 = mul(x = var_10692_cast_fp16, y = var_10693_to_fp16)[name = tensor("aw_chunk_845_cast_fp16")]; tensor var_10696_equation_0 = const()[name = tensor("op_10696_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10696_cast_fp16 = einsum(equation = var_10696_equation_0, values = (var_10606_cast_fp16, var_10545_cast_fp16))[name = tensor("op_10696_cast_fp16")]; tensor var_10697_to_fp16 = const()[name = tensor("op_10697_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_847_cast_fp16 = mul(x = var_10696_cast_fp16, y = var_10697_to_fp16)[name = tensor("aw_chunk_847_cast_fp16")]; tensor var_10700_equation_0 = const()[name = tensor("op_10700_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10700_cast_fp16 = einsum(equation = var_10700_equation_0, values = (var_10610_cast_fp16, var_10546_cast_fp16))[name = tensor("op_10700_cast_fp16")]; tensor var_10701_to_fp16 = const()[name = tensor("op_10701_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_849_cast_fp16 = mul(x = var_10700_cast_fp16, y = var_10701_to_fp16)[name = tensor("aw_chunk_849_cast_fp16")]; tensor var_10704_equation_0 = const()[name = tensor("op_10704_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10704_cast_fp16 = einsum(equation = var_10704_equation_0, values = (var_10610_cast_fp16, var_10547_cast_fp16))[name = tensor("op_10704_cast_fp16")]; tensor var_10705_to_fp16 = const()[name = tensor("op_10705_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_851_cast_fp16 = mul(x = var_10704_cast_fp16, y = var_10705_to_fp16)[name = tensor("aw_chunk_851_cast_fp16")]; tensor var_10708_equation_0 = const()[name = tensor("op_10708_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10708_cast_fp16 = einsum(equation = var_10708_equation_0, values = (var_10610_cast_fp16, var_10548_cast_fp16))[name = tensor("op_10708_cast_fp16")]; tensor var_10709_to_fp16 = const()[name = tensor("op_10709_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_853_cast_fp16 = mul(x = var_10708_cast_fp16, y = var_10709_to_fp16)[name = tensor("aw_chunk_853_cast_fp16")]; tensor var_10712_equation_0 = const()[name = tensor("op_10712_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10712_cast_fp16 = einsum(equation = var_10712_equation_0, values = (var_10610_cast_fp16, var_10549_cast_fp16))[name = tensor("op_10712_cast_fp16")]; tensor var_10713_to_fp16 = const()[name = tensor("op_10713_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_855_cast_fp16 = mul(x = var_10712_cast_fp16, y = var_10713_to_fp16)[name = tensor("aw_chunk_855_cast_fp16")]; tensor var_10716_equation_0 = const()[name = tensor("op_10716_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10716_cast_fp16 = einsum(equation = var_10716_equation_0, values = (var_10610_cast_fp16, var_10550_cast_fp16))[name = tensor("op_10716_cast_fp16")]; tensor var_10717_to_fp16 = const()[name = tensor("op_10717_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_857_cast_fp16 = mul(x = var_10716_cast_fp16, y = var_10717_to_fp16)[name = tensor("aw_chunk_857_cast_fp16")]; tensor var_10720_equation_0 = const()[name = tensor("op_10720_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10720_cast_fp16 = einsum(equation = var_10720_equation_0, values = (var_10610_cast_fp16, var_10551_cast_fp16))[name = tensor("op_10720_cast_fp16")]; tensor var_10721_to_fp16 = const()[name = tensor("op_10721_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_859_cast_fp16 = mul(x = var_10720_cast_fp16, y = var_10721_to_fp16)[name = tensor("aw_chunk_859_cast_fp16")]; tensor var_10724_equation_0 = const()[name = tensor("op_10724_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10724_cast_fp16 = einsum(equation = var_10724_equation_0, values = (var_10610_cast_fp16, var_10552_cast_fp16))[name = tensor("op_10724_cast_fp16")]; tensor var_10725_to_fp16 = const()[name = tensor("op_10725_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_861_cast_fp16 = mul(x = var_10724_cast_fp16, y = var_10725_to_fp16)[name = tensor("aw_chunk_861_cast_fp16")]; tensor var_10728_equation_0 = const()[name = tensor("op_10728_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10728_cast_fp16 = einsum(equation = var_10728_equation_0, values = (var_10610_cast_fp16, var_10553_cast_fp16))[name = tensor("op_10728_cast_fp16")]; tensor var_10729_to_fp16 = const()[name = tensor("op_10729_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_863_cast_fp16 = mul(x = var_10728_cast_fp16, y = var_10729_to_fp16)[name = tensor("aw_chunk_863_cast_fp16")]; tensor var_10732_equation_0 = const()[name = tensor("op_10732_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10732_cast_fp16 = einsum(equation = var_10732_equation_0, values = (var_10614_cast_fp16, var_10554_cast_fp16))[name = tensor("op_10732_cast_fp16")]; tensor var_10733_to_fp16 = const()[name = tensor("op_10733_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_865_cast_fp16 = mul(x = var_10732_cast_fp16, y = var_10733_to_fp16)[name = tensor("aw_chunk_865_cast_fp16")]; tensor var_10736_equation_0 = const()[name = tensor("op_10736_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10736_cast_fp16 = einsum(equation = var_10736_equation_0, values = (var_10614_cast_fp16, var_10555_cast_fp16))[name = tensor("op_10736_cast_fp16")]; tensor var_10737_to_fp16 = const()[name = tensor("op_10737_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_867_cast_fp16 = mul(x = var_10736_cast_fp16, y = var_10737_to_fp16)[name = tensor("aw_chunk_867_cast_fp16")]; tensor var_10740_equation_0 = const()[name = tensor("op_10740_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10740_cast_fp16 = einsum(equation = var_10740_equation_0, values = (var_10614_cast_fp16, var_10556_cast_fp16))[name = tensor("op_10740_cast_fp16")]; tensor var_10741_to_fp16 = const()[name = tensor("op_10741_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_869_cast_fp16 = mul(x = var_10740_cast_fp16, y = var_10741_to_fp16)[name = tensor("aw_chunk_869_cast_fp16")]; tensor var_10744_equation_0 = const()[name = tensor("op_10744_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10744_cast_fp16 = einsum(equation = var_10744_equation_0, values = (var_10614_cast_fp16, var_10557_cast_fp16))[name = tensor("op_10744_cast_fp16")]; tensor var_10745_to_fp16 = const()[name = tensor("op_10745_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_871_cast_fp16 = mul(x = var_10744_cast_fp16, y = var_10745_to_fp16)[name = tensor("aw_chunk_871_cast_fp16")]; tensor var_10748_equation_0 = const()[name = tensor("op_10748_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10748_cast_fp16 = einsum(equation = var_10748_equation_0, values = (var_10614_cast_fp16, var_10558_cast_fp16))[name = tensor("op_10748_cast_fp16")]; tensor var_10749_to_fp16 = const()[name = tensor("op_10749_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_873_cast_fp16 = mul(x = var_10748_cast_fp16, y = var_10749_to_fp16)[name = tensor("aw_chunk_873_cast_fp16")]; tensor var_10752_equation_0 = const()[name = tensor("op_10752_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10752_cast_fp16 = einsum(equation = var_10752_equation_0, values = (var_10614_cast_fp16, var_10559_cast_fp16))[name = tensor("op_10752_cast_fp16")]; tensor var_10753_to_fp16 = const()[name = tensor("op_10753_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_875_cast_fp16 = mul(x = var_10752_cast_fp16, y = var_10753_to_fp16)[name = tensor("aw_chunk_875_cast_fp16")]; tensor var_10756_equation_0 = const()[name = tensor("op_10756_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10756_cast_fp16 = einsum(equation = var_10756_equation_0, values = (var_10614_cast_fp16, var_10560_cast_fp16))[name = tensor("op_10756_cast_fp16")]; tensor var_10757_to_fp16 = const()[name = tensor("op_10757_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_877_cast_fp16 = mul(x = var_10756_cast_fp16, y = var_10757_to_fp16)[name = tensor("aw_chunk_877_cast_fp16")]; tensor var_10760_equation_0 = const()[name = tensor("op_10760_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10760_cast_fp16 = einsum(equation = var_10760_equation_0, values = (var_10614_cast_fp16, var_10561_cast_fp16))[name = tensor("op_10760_cast_fp16")]; tensor var_10761_to_fp16 = const()[name = tensor("op_10761_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_879_cast_fp16 = mul(x = var_10760_cast_fp16, y = var_10761_to_fp16)[name = tensor("aw_chunk_879_cast_fp16")]; tensor var_10764_equation_0 = const()[name = tensor("op_10764_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10764_cast_fp16 = einsum(equation = var_10764_equation_0, values = (var_10618_cast_fp16, var_10562_cast_fp16))[name = tensor("op_10764_cast_fp16")]; tensor var_10765_to_fp16 = const()[name = tensor("op_10765_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_881_cast_fp16 = mul(x = var_10764_cast_fp16, y = var_10765_to_fp16)[name = tensor("aw_chunk_881_cast_fp16")]; tensor var_10768_equation_0 = const()[name = tensor("op_10768_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10768_cast_fp16 = einsum(equation = var_10768_equation_0, values = (var_10618_cast_fp16, var_10563_cast_fp16))[name = tensor("op_10768_cast_fp16")]; tensor var_10769_to_fp16 = const()[name = tensor("op_10769_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_883_cast_fp16 = mul(x = var_10768_cast_fp16, y = var_10769_to_fp16)[name = tensor("aw_chunk_883_cast_fp16")]; tensor var_10772_equation_0 = const()[name = tensor("op_10772_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10772_cast_fp16 = einsum(equation = var_10772_equation_0, values = (var_10618_cast_fp16, var_10564_cast_fp16))[name = tensor("op_10772_cast_fp16")]; tensor var_10773_to_fp16 = const()[name = tensor("op_10773_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_885_cast_fp16 = mul(x = var_10772_cast_fp16, y = var_10773_to_fp16)[name = tensor("aw_chunk_885_cast_fp16")]; tensor var_10776_equation_0 = const()[name = tensor("op_10776_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10776_cast_fp16 = einsum(equation = var_10776_equation_0, values = (var_10618_cast_fp16, var_10565_cast_fp16))[name = tensor("op_10776_cast_fp16")]; tensor var_10777_to_fp16 = const()[name = tensor("op_10777_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_887_cast_fp16 = mul(x = var_10776_cast_fp16, y = var_10777_to_fp16)[name = tensor("aw_chunk_887_cast_fp16")]; tensor var_10780_equation_0 = const()[name = tensor("op_10780_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10780_cast_fp16 = einsum(equation = var_10780_equation_0, values = (var_10618_cast_fp16, var_10566_cast_fp16))[name = tensor("op_10780_cast_fp16")]; tensor var_10781_to_fp16 = const()[name = tensor("op_10781_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_889_cast_fp16 = mul(x = var_10780_cast_fp16, y = var_10781_to_fp16)[name = tensor("aw_chunk_889_cast_fp16")]; tensor var_10784_equation_0 = const()[name = tensor("op_10784_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10784_cast_fp16 = einsum(equation = var_10784_equation_0, values = (var_10618_cast_fp16, var_10567_cast_fp16))[name = tensor("op_10784_cast_fp16")]; tensor var_10785_to_fp16 = const()[name = tensor("op_10785_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_891_cast_fp16 = mul(x = var_10784_cast_fp16, y = var_10785_to_fp16)[name = tensor("aw_chunk_891_cast_fp16")]; tensor var_10788_equation_0 = const()[name = tensor("op_10788_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10788_cast_fp16 = einsum(equation = var_10788_equation_0, values = (var_10618_cast_fp16, var_10568_cast_fp16))[name = tensor("op_10788_cast_fp16")]; tensor var_10789_to_fp16 = const()[name = tensor("op_10789_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_893_cast_fp16 = mul(x = var_10788_cast_fp16, y = var_10789_to_fp16)[name = tensor("aw_chunk_893_cast_fp16")]; tensor var_10792_equation_0 = const()[name = tensor("op_10792_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10792_cast_fp16 = einsum(equation = var_10792_equation_0, values = (var_10618_cast_fp16, var_10569_cast_fp16))[name = tensor("op_10792_cast_fp16")]; tensor var_10793_to_fp16 = const()[name = tensor("op_10793_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_895_cast_fp16 = mul(x = var_10792_cast_fp16, y = var_10793_to_fp16)[name = tensor("aw_chunk_895_cast_fp16")]; tensor var_10796_equation_0 = const()[name = tensor("op_10796_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10796_cast_fp16 = einsum(equation = var_10796_equation_0, values = (var_10622_cast_fp16, var_10570_cast_fp16))[name = tensor("op_10796_cast_fp16")]; tensor var_10797_to_fp16 = const()[name = tensor("op_10797_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_897_cast_fp16 = mul(x = var_10796_cast_fp16, y = var_10797_to_fp16)[name = tensor("aw_chunk_897_cast_fp16")]; tensor var_10800_equation_0 = const()[name = tensor("op_10800_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10800_cast_fp16 = einsum(equation = var_10800_equation_0, values = (var_10622_cast_fp16, var_10571_cast_fp16))[name = tensor("op_10800_cast_fp16")]; tensor var_10801_to_fp16 = const()[name = tensor("op_10801_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_899_cast_fp16 = mul(x = var_10800_cast_fp16, y = var_10801_to_fp16)[name = tensor("aw_chunk_899_cast_fp16")]; tensor var_10804_equation_0 = const()[name = tensor("op_10804_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10804_cast_fp16 = einsum(equation = var_10804_equation_0, values = (var_10622_cast_fp16, var_10572_cast_fp16))[name = tensor("op_10804_cast_fp16")]; tensor var_10805_to_fp16 = const()[name = tensor("op_10805_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_901_cast_fp16 = mul(x = var_10804_cast_fp16, y = var_10805_to_fp16)[name = tensor("aw_chunk_901_cast_fp16")]; tensor var_10808_equation_0 = const()[name = tensor("op_10808_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10808_cast_fp16 = einsum(equation = var_10808_equation_0, values = (var_10622_cast_fp16, var_10573_cast_fp16))[name = tensor("op_10808_cast_fp16")]; tensor var_10809_to_fp16 = const()[name = tensor("op_10809_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_903_cast_fp16 = mul(x = var_10808_cast_fp16, y = var_10809_to_fp16)[name = tensor("aw_chunk_903_cast_fp16")]; tensor var_10812_equation_0 = const()[name = tensor("op_10812_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10812_cast_fp16 = einsum(equation = var_10812_equation_0, values = (var_10622_cast_fp16, var_10574_cast_fp16))[name = tensor("op_10812_cast_fp16")]; tensor var_10813_to_fp16 = const()[name = tensor("op_10813_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_905_cast_fp16 = mul(x = var_10812_cast_fp16, y = var_10813_to_fp16)[name = tensor("aw_chunk_905_cast_fp16")]; tensor var_10816_equation_0 = const()[name = tensor("op_10816_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10816_cast_fp16 = einsum(equation = var_10816_equation_0, values = (var_10622_cast_fp16, var_10575_cast_fp16))[name = tensor("op_10816_cast_fp16")]; tensor var_10817_to_fp16 = const()[name = tensor("op_10817_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_907_cast_fp16 = mul(x = var_10816_cast_fp16, y = var_10817_to_fp16)[name = tensor("aw_chunk_907_cast_fp16")]; tensor var_10820_equation_0 = const()[name = tensor("op_10820_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10820_cast_fp16 = einsum(equation = var_10820_equation_0, values = (var_10622_cast_fp16, var_10576_cast_fp16))[name = tensor("op_10820_cast_fp16")]; tensor var_10821_to_fp16 = const()[name = tensor("op_10821_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_909_cast_fp16 = mul(x = var_10820_cast_fp16, y = var_10821_to_fp16)[name = tensor("aw_chunk_909_cast_fp16")]; tensor var_10824_equation_0 = const()[name = tensor("op_10824_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10824_cast_fp16 = einsum(equation = var_10824_equation_0, values = (var_10622_cast_fp16, var_10577_cast_fp16))[name = tensor("op_10824_cast_fp16")]; tensor var_10825_to_fp16 = const()[name = tensor("op_10825_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_911_cast_fp16 = mul(x = var_10824_cast_fp16, y = var_10825_to_fp16)[name = tensor("aw_chunk_911_cast_fp16")]; tensor var_10828_equation_0 = const()[name = tensor("op_10828_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10828_cast_fp16 = einsum(equation = var_10828_equation_0, values = (var_10626_cast_fp16, var_10578_cast_fp16))[name = tensor("op_10828_cast_fp16")]; tensor var_10829_to_fp16 = const()[name = tensor("op_10829_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_913_cast_fp16 = mul(x = var_10828_cast_fp16, y = var_10829_to_fp16)[name = tensor("aw_chunk_913_cast_fp16")]; tensor var_10832_equation_0 = const()[name = tensor("op_10832_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10832_cast_fp16 = einsum(equation = var_10832_equation_0, values = (var_10626_cast_fp16, var_10579_cast_fp16))[name = tensor("op_10832_cast_fp16")]; tensor var_10833_to_fp16 = const()[name = tensor("op_10833_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_915_cast_fp16 = mul(x = var_10832_cast_fp16, y = var_10833_to_fp16)[name = tensor("aw_chunk_915_cast_fp16")]; tensor var_10836_equation_0 = const()[name = tensor("op_10836_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10836_cast_fp16 = einsum(equation = var_10836_equation_0, values = (var_10626_cast_fp16, var_10580_cast_fp16))[name = tensor("op_10836_cast_fp16")]; tensor var_10837_to_fp16 = const()[name = tensor("op_10837_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_917_cast_fp16 = mul(x = var_10836_cast_fp16, y = var_10837_to_fp16)[name = tensor("aw_chunk_917_cast_fp16")]; tensor var_10840_equation_0 = const()[name = tensor("op_10840_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10840_cast_fp16 = einsum(equation = var_10840_equation_0, values = (var_10626_cast_fp16, var_10581_cast_fp16))[name = tensor("op_10840_cast_fp16")]; tensor var_10841_to_fp16 = const()[name = tensor("op_10841_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_919_cast_fp16 = mul(x = var_10840_cast_fp16, y = var_10841_to_fp16)[name = tensor("aw_chunk_919_cast_fp16")]; tensor var_10844_equation_0 = const()[name = tensor("op_10844_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10844_cast_fp16 = einsum(equation = var_10844_equation_0, values = (var_10626_cast_fp16, var_10582_cast_fp16))[name = tensor("op_10844_cast_fp16")]; tensor var_10845_to_fp16 = const()[name = tensor("op_10845_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_921_cast_fp16 = mul(x = var_10844_cast_fp16, y = var_10845_to_fp16)[name = tensor("aw_chunk_921_cast_fp16")]; tensor var_10848_equation_0 = const()[name = tensor("op_10848_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10848_cast_fp16 = einsum(equation = var_10848_equation_0, values = (var_10626_cast_fp16, var_10583_cast_fp16))[name = tensor("op_10848_cast_fp16")]; tensor var_10849_to_fp16 = const()[name = tensor("op_10849_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_923_cast_fp16 = mul(x = var_10848_cast_fp16, y = var_10849_to_fp16)[name = tensor("aw_chunk_923_cast_fp16")]; tensor var_10852_equation_0 = const()[name = tensor("op_10852_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10852_cast_fp16 = einsum(equation = var_10852_equation_0, values = (var_10626_cast_fp16, var_10584_cast_fp16))[name = tensor("op_10852_cast_fp16")]; tensor var_10853_to_fp16 = const()[name = tensor("op_10853_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_925_cast_fp16 = mul(x = var_10852_cast_fp16, y = var_10853_to_fp16)[name = tensor("aw_chunk_925_cast_fp16")]; tensor var_10856_equation_0 = const()[name = tensor("op_10856_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10856_cast_fp16 = einsum(equation = var_10856_equation_0, values = (var_10626_cast_fp16, var_10585_cast_fp16))[name = tensor("op_10856_cast_fp16")]; tensor var_10857_to_fp16 = const()[name = tensor("op_10857_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_927_cast_fp16 = mul(x = var_10856_cast_fp16, y = var_10857_to_fp16)[name = tensor("aw_chunk_927_cast_fp16")]; tensor var_10860_equation_0 = const()[name = tensor("op_10860_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10860_cast_fp16 = einsum(equation = var_10860_equation_0, values = (var_10630_cast_fp16, var_10586_cast_fp16))[name = tensor("op_10860_cast_fp16")]; tensor var_10861_to_fp16 = const()[name = tensor("op_10861_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_929_cast_fp16 = mul(x = var_10860_cast_fp16, y = var_10861_to_fp16)[name = tensor("aw_chunk_929_cast_fp16")]; tensor var_10864_equation_0 = const()[name = tensor("op_10864_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10864_cast_fp16 = einsum(equation = var_10864_equation_0, values = (var_10630_cast_fp16, var_10587_cast_fp16))[name = tensor("op_10864_cast_fp16")]; tensor var_10865_to_fp16 = const()[name = tensor("op_10865_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_931_cast_fp16 = mul(x = var_10864_cast_fp16, y = var_10865_to_fp16)[name = tensor("aw_chunk_931_cast_fp16")]; tensor var_10868_equation_0 = const()[name = tensor("op_10868_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10868_cast_fp16 = einsum(equation = var_10868_equation_0, values = (var_10630_cast_fp16, var_10588_cast_fp16))[name = tensor("op_10868_cast_fp16")]; tensor var_10869_to_fp16 = const()[name = tensor("op_10869_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_933_cast_fp16 = mul(x = var_10868_cast_fp16, y = var_10869_to_fp16)[name = tensor("aw_chunk_933_cast_fp16")]; tensor var_10872_equation_0 = const()[name = tensor("op_10872_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10872_cast_fp16 = einsum(equation = var_10872_equation_0, values = (var_10630_cast_fp16, var_10589_cast_fp16))[name = tensor("op_10872_cast_fp16")]; tensor var_10873_to_fp16 = const()[name = tensor("op_10873_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_935_cast_fp16 = mul(x = var_10872_cast_fp16, y = var_10873_to_fp16)[name = tensor("aw_chunk_935_cast_fp16")]; tensor var_10876_equation_0 = const()[name = tensor("op_10876_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10876_cast_fp16 = einsum(equation = var_10876_equation_0, values = (var_10630_cast_fp16, var_10590_cast_fp16))[name = tensor("op_10876_cast_fp16")]; tensor var_10877_to_fp16 = const()[name = tensor("op_10877_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_937_cast_fp16 = mul(x = var_10876_cast_fp16, y = var_10877_to_fp16)[name = tensor("aw_chunk_937_cast_fp16")]; tensor var_10880_equation_0 = const()[name = tensor("op_10880_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10880_cast_fp16 = einsum(equation = var_10880_equation_0, values = (var_10630_cast_fp16, var_10591_cast_fp16))[name = tensor("op_10880_cast_fp16")]; tensor var_10881_to_fp16 = const()[name = tensor("op_10881_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_939_cast_fp16 = mul(x = var_10880_cast_fp16, y = var_10881_to_fp16)[name = tensor("aw_chunk_939_cast_fp16")]; tensor var_10884_equation_0 = const()[name = tensor("op_10884_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10884_cast_fp16 = einsum(equation = var_10884_equation_0, values = (var_10630_cast_fp16, var_10592_cast_fp16))[name = tensor("op_10884_cast_fp16")]; tensor var_10885_to_fp16 = const()[name = tensor("op_10885_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_941_cast_fp16 = mul(x = var_10884_cast_fp16, y = var_10885_to_fp16)[name = tensor("aw_chunk_941_cast_fp16")]; tensor var_10888_equation_0 = const()[name = tensor("op_10888_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10888_cast_fp16 = einsum(equation = var_10888_equation_0, values = (var_10630_cast_fp16, var_10593_cast_fp16))[name = tensor("op_10888_cast_fp16")]; tensor var_10889_to_fp16 = const()[name = tensor("op_10889_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_943_cast_fp16 = mul(x = var_10888_cast_fp16, y = var_10889_to_fp16)[name = tensor("aw_chunk_943_cast_fp16")]; tensor var_10892_equation_0 = const()[name = tensor("op_10892_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10892_cast_fp16 = einsum(equation = var_10892_equation_0, values = (var_10634_cast_fp16, var_10594_cast_fp16))[name = tensor("op_10892_cast_fp16")]; tensor var_10893_to_fp16 = const()[name = tensor("op_10893_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_945_cast_fp16 = mul(x = var_10892_cast_fp16, y = var_10893_to_fp16)[name = tensor("aw_chunk_945_cast_fp16")]; tensor var_10896_equation_0 = const()[name = tensor("op_10896_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10896_cast_fp16 = einsum(equation = var_10896_equation_0, values = (var_10634_cast_fp16, var_10595_cast_fp16))[name = tensor("op_10896_cast_fp16")]; tensor var_10897_to_fp16 = const()[name = tensor("op_10897_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_947_cast_fp16 = mul(x = var_10896_cast_fp16, y = var_10897_to_fp16)[name = tensor("aw_chunk_947_cast_fp16")]; tensor var_10900_equation_0 = const()[name = tensor("op_10900_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10900_cast_fp16 = einsum(equation = var_10900_equation_0, values = (var_10634_cast_fp16, var_10596_cast_fp16))[name = tensor("op_10900_cast_fp16")]; tensor var_10901_to_fp16 = const()[name = tensor("op_10901_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_949_cast_fp16 = mul(x = var_10900_cast_fp16, y = var_10901_to_fp16)[name = tensor("aw_chunk_949_cast_fp16")]; tensor var_10904_equation_0 = const()[name = tensor("op_10904_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10904_cast_fp16 = einsum(equation = var_10904_equation_0, values = (var_10634_cast_fp16, var_10597_cast_fp16))[name = tensor("op_10904_cast_fp16")]; tensor var_10905_to_fp16 = const()[name = tensor("op_10905_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_951_cast_fp16 = mul(x = var_10904_cast_fp16, y = var_10905_to_fp16)[name = tensor("aw_chunk_951_cast_fp16")]; tensor var_10908_equation_0 = const()[name = tensor("op_10908_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10908_cast_fp16 = einsum(equation = var_10908_equation_0, values = (var_10634_cast_fp16, var_10598_cast_fp16))[name = tensor("op_10908_cast_fp16")]; tensor var_10909_to_fp16 = const()[name = tensor("op_10909_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_953_cast_fp16 = mul(x = var_10908_cast_fp16, y = var_10909_to_fp16)[name = tensor("aw_chunk_953_cast_fp16")]; tensor var_10912_equation_0 = const()[name = tensor("op_10912_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10912_cast_fp16 = einsum(equation = var_10912_equation_0, values = (var_10634_cast_fp16, var_10599_cast_fp16))[name = tensor("op_10912_cast_fp16")]; tensor var_10913_to_fp16 = const()[name = tensor("op_10913_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_955_cast_fp16 = mul(x = var_10912_cast_fp16, y = var_10913_to_fp16)[name = tensor("aw_chunk_955_cast_fp16")]; tensor var_10916_equation_0 = const()[name = tensor("op_10916_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10916_cast_fp16 = einsum(equation = var_10916_equation_0, values = (var_10634_cast_fp16, var_10600_cast_fp16))[name = tensor("op_10916_cast_fp16")]; tensor var_10917_to_fp16 = const()[name = tensor("op_10917_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_957_cast_fp16 = mul(x = var_10916_cast_fp16, y = var_10917_to_fp16)[name = tensor("aw_chunk_957_cast_fp16")]; tensor var_10920_equation_0 = const()[name = tensor("op_10920_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_10920_cast_fp16 = einsum(equation = var_10920_equation_0, values = (var_10634_cast_fp16, var_10601_cast_fp16))[name = tensor("op_10920_cast_fp16")]; tensor var_10921_to_fp16 = const()[name = tensor("op_10921_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_959_cast_fp16 = mul(x = var_10920_cast_fp16, y = var_10921_to_fp16)[name = tensor("aw_chunk_959_cast_fp16")]; tensor var_10923_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_833_cast_fp16)[name = tensor("op_10923_cast_fp16")]; tensor var_10924_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_835_cast_fp16)[name = tensor("op_10924_cast_fp16")]; tensor var_10925_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_837_cast_fp16)[name = tensor("op_10925_cast_fp16")]; tensor var_10926_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_839_cast_fp16)[name = tensor("op_10926_cast_fp16")]; tensor var_10927_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_841_cast_fp16)[name = tensor("op_10927_cast_fp16")]; tensor var_10928_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_843_cast_fp16)[name = tensor("op_10928_cast_fp16")]; tensor var_10929_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_845_cast_fp16)[name = tensor("op_10929_cast_fp16")]; tensor var_10930_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_847_cast_fp16)[name = tensor("op_10930_cast_fp16")]; tensor var_10931_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_849_cast_fp16)[name = tensor("op_10931_cast_fp16")]; tensor var_10932_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_851_cast_fp16)[name = tensor("op_10932_cast_fp16")]; tensor var_10933_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_853_cast_fp16)[name = tensor("op_10933_cast_fp16")]; tensor var_10934_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_855_cast_fp16)[name = tensor("op_10934_cast_fp16")]; tensor var_10935_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_857_cast_fp16)[name = tensor("op_10935_cast_fp16")]; tensor var_10936_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_859_cast_fp16)[name = tensor("op_10936_cast_fp16")]; tensor var_10937_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_861_cast_fp16)[name = tensor("op_10937_cast_fp16")]; tensor var_10938_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_863_cast_fp16)[name = tensor("op_10938_cast_fp16")]; tensor var_10939_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_865_cast_fp16)[name = tensor("op_10939_cast_fp16")]; tensor var_10940_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_867_cast_fp16)[name = tensor("op_10940_cast_fp16")]; tensor var_10941_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_869_cast_fp16)[name = tensor("op_10941_cast_fp16")]; tensor var_10942_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_871_cast_fp16)[name = tensor("op_10942_cast_fp16")]; tensor var_10943_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_873_cast_fp16)[name = tensor("op_10943_cast_fp16")]; tensor var_10944_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_875_cast_fp16)[name = tensor("op_10944_cast_fp16")]; tensor var_10945_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_877_cast_fp16)[name = tensor("op_10945_cast_fp16")]; tensor var_10946_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_879_cast_fp16)[name = tensor("op_10946_cast_fp16")]; tensor var_10947_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_881_cast_fp16)[name = tensor("op_10947_cast_fp16")]; tensor var_10948_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_883_cast_fp16)[name = tensor("op_10948_cast_fp16")]; tensor var_10949_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_885_cast_fp16)[name = tensor("op_10949_cast_fp16")]; tensor var_10950_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_887_cast_fp16)[name = tensor("op_10950_cast_fp16")]; tensor var_10951_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_889_cast_fp16)[name = tensor("op_10951_cast_fp16")]; tensor var_10952_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_891_cast_fp16)[name = tensor("op_10952_cast_fp16")]; tensor var_10953_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_893_cast_fp16)[name = tensor("op_10953_cast_fp16")]; tensor var_10954_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_895_cast_fp16)[name = tensor("op_10954_cast_fp16")]; tensor var_10955_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_897_cast_fp16)[name = tensor("op_10955_cast_fp16")]; tensor var_10956_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_899_cast_fp16)[name = tensor("op_10956_cast_fp16")]; tensor var_10957_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_901_cast_fp16)[name = tensor("op_10957_cast_fp16")]; tensor var_10958_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_903_cast_fp16)[name = tensor("op_10958_cast_fp16")]; tensor var_10959_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_905_cast_fp16)[name = tensor("op_10959_cast_fp16")]; tensor var_10960_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_907_cast_fp16)[name = tensor("op_10960_cast_fp16")]; tensor var_10961_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_909_cast_fp16)[name = tensor("op_10961_cast_fp16")]; tensor var_10962_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_911_cast_fp16)[name = tensor("op_10962_cast_fp16")]; tensor var_10963_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_913_cast_fp16)[name = tensor("op_10963_cast_fp16")]; tensor var_10964_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_915_cast_fp16)[name = tensor("op_10964_cast_fp16")]; tensor var_10965_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_917_cast_fp16)[name = tensor("op_10965_cast_fp16")]; tensor var_10966_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_919_cast_fp16)[name = tensor("op_10966_cast_fp16")]; tensor var_10967_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_921_cast_fp16)[name = tensor("op_10967_cast_fp16")]; tensor var_10968_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_923_cast_fp16)[name = tensor("op_10968_cast_fp16")]; tensor var_10969_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_925_cast_fp16)[name = tensor("op_10969_cast_fp16")]; tensor var_10970_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_927_cast_fp16)[name = tensor("op_10970_cast_fp16")]; tensor var_10971_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_929_cast_fp16)[name = tensor("op_10971_cast_fp16")]; tensor var_10972_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_931_cast_fp16)[name = tensor("op_10972_cast_fp16")]; tensor var_10973_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_933_cast_fp16)[name = tensor("op_10973_cast_fp16")]; tensor var_10974_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_935_cast_fp16)[name = tensor("op_10974_cast_fp16")]; tensor var_10975_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_937_cast_fp16)[name = tensor("op_10975_cast_fp16")]; tensor var_10976_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_939_cast_fp16)[name = tensor("op_10976_cast_fp16")]; tensor var_10977_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_941_cast_fp16)[name = tensor("op_10977_cast_fp16")]; tensor var_10978_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_943_cast_fp16)[name = tensor("op_10978_cast_fp16")]; tensor var_10979_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_945_cast_fp16)[name = tensor("op_10979_cast_fp16")]; tensor var_10980_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_947_cast_fp16)[name = tensor("op_10980_cast_fp16")]; tensor var_10981_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_949_cast_fp16)[name = tensor("op_10981_cast_fp16")]; tensor var_10982_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_951_cast_fp16)[name = tensor("op_10982_cast_fp16")]; tensor var_10983_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_953_cast_fp16)[name = tensor("op_10983_cast_fp16")]; tensor var_10984_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_955_cast_fp16)[name = tensor("op_10984_cast_fp16")]; tensor var_10985_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_957_cast_fp16)[name = tensor("op_10985_cast_fp16")]; tensor var_10986_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_959_cast_fp16)[name = tensor("op_10986_cast_fp16")]; tensor var_10988_equation_0 = const()[name = tensor("op_10988_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10988_cast_fp16 = einsum(equation = var_10988_equation_0, values = (var_10636_cast_fp16, var_10923_cast_fp16))[name = tensor("op_10988_cast_fp16")]; tensor var_10990_equation_0 = const()[name = tensor("op_10990_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10990_cast_fp16 = einsum(equation = var_10990_equation_0, values = (var_10636_cast_fp16, var_10924_cast_fp16))[name = tensor("op_10990_cast_fp16")]; tensor var_10992_equation_0 = const()[name = tensor("op_10992_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10992_cast_fp16 = einsum(equation = var_10992_equation_0, values = (var_10636_cast_fp16, var_10925_cast_fp16))[name = tensor("op_10992_cast_fp16")]; tensor var_10994_equation_0 = const()[name = tensor("op_10994_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10994_cast_fp16 = einsum(equation = var_10994_equation_0, values = (var_10636_cast_fp16, var_10926_cast_fp16))[name = tensor("op_10994_cast_fp16")]; tensor var_10996_equation_0 = const()[name = tensor("op_10996_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10996_cast_fp16 = einsum(equation = var_10996_equation_0, values = (var_10636_cast_fp16, var_10927_cast_fp16))[name = tensor("op_10996_cast_fp16")]; tensor var_10998_equation_0 = const()[name = tensor("op_10998_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_10998_cast_fp16 = einsum(equation = var_10998_equation_0, values = (var_10636_cast_fp16, var_10928_cast_fp16))[name = tensor("op_10998_cast_fp16")]; tensor var_11000_equation_0 = const()[name = tensor("op_11000_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11000_cast_fp16 = einsum(equation = var_11000_equation_0, values = (var_10636_cast_fp16, var_10929_cast_fp16))[name = tensor("op_11000_cast_fp16")]; tensor var_11002_equation_0 = const()[name = tensor("op_11002_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11002_cast_fp16 = einsum(equation = var_11002_equation_0, values = (var_10636_cast_fp16, var_10930_cast_fp16))[name = tensor("op_11002_cast_fp16")]; tensor var_11004_equation_0 = const()[name = tensor("op_11004_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11004_cast_fp16 = einsum(equation = var_11004_equation_0, values = (var_10640_cast_fp16, var_10931_cast_fp16))[name = tensor("op_11004_cast_fp16")]; tensor var_11006_equation_0 = const()[name = tensor("op_11006_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11006_cast_fp16 = einsum(equation = var_11006_equation_0, values = (var_10640_cast_fp16, var_10932_cast_fp16))[name = tensor("op_11006_cast_fp16")]; tensor var_11008_equation_0 = const()[name = tensor("op_11008_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11008_cast_fp16 = einsum(equation = var_11008_equation_0, values = (var_10640_cast_fp16, var_10933_cast_fp16))[name = tensor("op_11008_cast_fp16")]; tensor var_11010_equation_0 = const()[name = tensor("op_11010_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11010_cast_fp16 = einsum(equation = var_11010_equation_0, values = (var_10640_cast_fp16, var_10934_cast_fp16))[name = tensor("op_11010_cast_fp16")]; tensor var_11012_equation_0 = const()[name = tensor("op_11012_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11012_cast_fp16 = einsum(equation = var_11012_equation_0, values = (var_10640_cast_fp16, var_10935_cast_fp16))[name = tensor("op_11012_cast_fp16")]; tensor var_11014_equation_0 = const()[name = tensor("op_11014_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11014_cast_fp16 = einsum(equation = var_11014_equation_0, values = (var_10640_cast_fp16, var_10936_cast_fp16))[name = tensor("op_11014_cast_fp16")]; tensor var_11016_equation_0 = const()[name = tensor("op_11016_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11016_cast_fp16 = einsum(equation = var_11016_equation_0, values = (var_10640_cast_fp16, var_10937_cast_fp16))[name = tensor("op_11016_cast_fp16")]; tensor var_11018_equation_0 = const()[name = tensor("op_11018_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11018_cast_fp16 = einsum(equation = var_11018_equation_0, values = (var_10640_cast_fp16, var_10938_cast_fp16))[name = tensor("op_11018_cast_fp16")]; tensor var_11020_equation_0 = const()[name = tensor("op_11020_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11020_cast_fp16 = einsum(equation = var_11020_equation_0, values = (var_10644_cast_fp16, var_10939_cast_fp16))[name = tensor("op_11020_cast_fp16")]; tensor var_11022_equation_0 = const()[name = tensor("op_11022_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11022_cast_fp16 = einsum(equation = var_11022_equation_0, values = (var_10644_cast_fp16, var_10940_cast_fp16))[name = tensor("op_11022_cast_fp16")]; tensor var_11024_equation_0 = const()[name = tensor("op_11024_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11024_cast_fp16 = einsum(equation = var_11024_equation_0, values = (var_10644_cast_fp16, var_10941_cast_fp16))[name = tensor("op_11024_cast_fp16")]; tensor var_11026_equation_0 = const()[name = tensor("op_11026_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11026_cast_fp16 = einsum(equation = var_11026_equation_0, values = (var_10644_cast_fp16, var_10942_cast_fp16))[name = tensor("op_11026_cast_fp16")]; tensor var_11028_equation_0 = const()[name = tensor("op_11028_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11028_cast_fp16 = einsum(equation = var_11028_equation_0, values = (var_10644_cast_fp16, var_10943_cast_fp16))[name = tensor("op_11028_cast_fp16")]; tensor var_11030_equation_0 = const()[name = tensor("op_11030_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11030_cast_fp16 = einsum(equation = var_11030_equation_0, values = (var_10644_cast_fp16, var_10944_cast_fp16))[name = tensor("op_11030_cast_fp16")]; tensor var_11032_equation_0 = const()[name = tensor("op_11032_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11032_cast_fp16 = einsum(equation = var_11032_equation_0, values = (var_10644_cast_fp16, var_10945_cast_fp16))[name = tensor("op_11032_cast_fp16")]; tensor var_11034_equation_0 = const()[name = tensor("op_11034_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11034_cast_fp16 = einsum(equation = var_11034_equation_0, values = (var_10644_cast_fp16, var_10946_cast_fp16))[name = tensor("op_11034_cast_fp16")]; tensor var_11036_equation_0 = const()[name = tensor("op_11036_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11036_cast_fp16 = einsum(equation = var_11036_equation_0, values = (var_10648_cast_fp16, var_10947_cast_fp16))[name = tensor("op_11036_cast_fp16")]; tensor var_11038_equation_0 = const()[name = tensor("op_11038_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11038_cast_fp16 = einsum(equation = var_11038_equation_0, values = (var_10648_cast_fp16, var_10948_cast_fp16))[name = tensor("op_11038_cast_fp16")]; tensor var_11040_equation_0 = const()[name = tensor("op_11040_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11040_cast_fp16 = einsum(equation = var_11040_equation_0, values = (var_10648_cast_fp16, var_10949_cast_fp16))[name = tensor("op_11040_cast_fp16")]; tensor var_11042_equation_0 = const()[name = tensor("op_11042_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11042_cast_fp16 = einsum(equation = var_11042_equation_0, values = (var_10648_cast_fp16, var_10950_cast_fp16))[name = tensor("op_11042_cast_fp16")]; tensor var_11044_equation_0 = const()[name = tensor("op_11044_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11044_cast_fp16 = einsum(equation = var_11044_equation_0, values = (var_10648_cast_fp16, var_10951_cast_fp16))[name = tensor("op_11044_cast_fp16")]; tensor var_11046_equation_0 = const()[name = tensor("op_11046_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11046_cast_fp16 = einsum(equation = var_11046_equation_0, values = (var_10648_cast_fp16, var_10952_cast_fp16))[name = tensor("op_11046_cast_fp16")]; tensor var_11048_equation_0 = const()[name = tensor("op_11048_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11048_cast_fp16 = einsum(equation = var_11048_equation_0, values = (var_10648_cast_fp16, var_10953_cast_fp16))[name = tensor("op_11048_cast_fp16")]; tensor var_11050_equation_0 = const()[name = tensor("op_11050_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11050_cast_fp16 = einsum(equation = var_11050_equation_0, values = (var_10648_cast_fp16, var_10954_cast_fp16))[name = tensor("op_11050_cast_fp16")]; tensor var_11052_equation_0 = const()[name = tensor("op_11052_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11052_cast_fp16 = einsum(equation = var_11052_equation_0, values = (var_10652_cast_fp16, var_10955_cast_fp16))[name = tensor("op_11052_cast_fp16")]; tensor var_11054_equation_0 = const()[name = tensor("op_11054_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11054_cast_fp16 = einsum(equation = var_11054_equation_0, values = (var_10652_cast_fp16, var_10956_cast_fp16))[name = tensor("op_11054_cast_fp16")]; tensor var_11056_equation_0 = const()[name = tensor("op_11056_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11056_cast_fp16 = einsum(equation = var_11056_equation_0, values = (var_10652_cast_fp16, var_10957_cast_fp16))[name = tensor("op_11056_cast_fp16")]; tensor var_11058_equation_0 = const()[name = tensor("op_11058_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11058_cast_fp16 = einsum(equation = var_11058_equation_0, values = (var_10652_cast_fp16, var_10958_cast_fp16))[name = tensor("op_11058_cast_fp16")]; tensor var_11060_equation_0 = const()[name = tensor("op_11060_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11060_cast_fp16 = einsum(equation = var_11060_equation_0, values = (var_10652_cast_fp16, var_10959_cast_fp16))[name = tensor("op_11060_cast_fp16")]; tensor var_11062_equation_0 = const()[name = tensor("op_11062_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11062_cast_fp16 = einsum(equation = var_11062_equation_0, values = (var_10652_cast_fp16, var_10960_cast_fp16))[name = tensor("op_11062_cast_fp16")]; tensor var_11064_equation_0 = const()[name = tensor("op_11064_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11064_cast_fp16 = einsum(equation = var_11064_equation_0, values = (var_10652_cast_fp16, var_10961_cast_fp16))[name = tensor("op_11064_cast_fp16")]; tensor var_11066_equation_0 = const()[name = tensor("op_11066_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11066_cast_fp16 = einsum(equation = var_11066_equation_0, values = (var_10652_cast_fp16, var_10962_cast_fp16))[name = tensor("op_11066_cast_fp16")]; tensor var_11068_equation_0 = const()[name = tensor("op_11068_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11068_cast_fp16 = einsum(equation = var_11068_equation_0, values = (var_10656_cast_fp16, var_10963_cast_fp16))[name = tensor("op_11068_cast_fp16")]; tensor var_11070_equation_0 = const()[name = tensor("op_11070_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11070_cast_fp16 = einsum(equation = var_11070_equation_0, values = (var_10656_cast_fp16, var_10964_cast_fp16))[name = tensor("op_11070_cast_fp16")]; tensor var_11072_equation_0 = const()[name = tensor("op_11072_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11072_cast_fp16 = einsum(equation = var_11072_equation_0, values = (var_10656_cast_fp16, var_10965_cast_fp16))[name = tensor("op_11072_cast_fp16")]; tensor var_11074_equation_0 = const()[name = tensor("op_11074_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11074_cast_fp16 = einsum(equation = var_11074_equation_0, values = (var_10656_cast_fp16, var_10966_cast_fp16))[name = tensor("op_11074_cast_fp16")]; tensor var_11076_equation_0 = const()[name = tensor("op_11076_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11076_cast_fp16 = einsum(equation = var_11076_equation_0, values = (var_10656_cast_fp16, var_10967_cast_fp16))[name = tensor("op_11076_cast_fp16")]; tensor var_11078_equation_0 = const()[name = tensor("op_11078_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11078_cast_fp16 = einsum(equation = var_11078_equation_0, values = (var_10656_cast_fp16, var_10968_cast_fp16))[name = tensor("op_11078_cast_fp16")]; tensor var_11080_equation_0 = const()[name = tensor("op_11080_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11080_cast_fp16 = einsum(equation = var_11080_equation_0, values = (var_10656_cast_fp16, var_10969_cast_fp16))[name = tensor("op_11080_cast_fp16")]; tensor var_11082_equation_0 = const()[name = tensor("op_11082_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11082_cast_fp16 = einsum(equation = var_11082_equation_0, values = (var_10656_cast_fp16, var_10970_cast_fp16))[name = tensor("op_11082_cast_fp16")]; tensor var_11084_equation_0 = const()[name = tensor("op_11084_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11084_cast_fp16 = einsum(equation = var_11084_equation_0, values = (var_10660_cast_fp16, var_10971_cast_fp16))[name = tensor("op_11084_cast_fp16")]; tensor var_11086_equation_0 = const()[name = tensor("op_11086_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11086_cast_fp16 = einsum(equation = var_11086_equation_0, values = (var_10660_cast_fp16, var_10972_cast_fp16))[name = tensor("op_11086_cast_fp16")]; tensor var_11088_equation_0 = const()[name = tensor("op_11088_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11088_cast_fp16 = einsum(equation = var_11088_equation_0, values = (var_10660_cast_fp16, var_10973_cast_fp16))[name = tensor("op_11088_cast_fp16")]; tensor var_11090_equation_0 = const()[name = tensor("op_11090_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11090_cast_fp16 = einsum(equation = var_11090_equation_0, values = (var_10660_cast_fp16, var_10974_cast_fp16))[name = tensor("op_11090_cast_fp16")]; tensor var_11092_equation_0 = const()[name = tensor("op_11092_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11092_cast_fp16 = einsum(equation = var_11092_equation_0, values = (var_10660_cast_fp16, var_10975_cast_fp16))[name = tensor("op_11092_cast_fp16")]; tensor var_11094_equation_0 = const()[name = tensor("op_11094_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11094_cast_fp16 = einsum(equation = var_11094_equation_0, values = (var_10660_cast_fp16, var_10976_cast_fp16))[name = tensor("op_11094_cast_fp16")]; tensor var_11096_equation_0 = const()[name = tensor("op_11096_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11096_cast_fp16 = einsum(equation = var_11096_equation_0, values = (var_10660_cast_fp16, var_10977_cast_fp16))[name = tensor("op_11096_cast_fp16")]; tensor var_11098_equation_0 = const()[name = tensor("op_11098_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11098_cast_fp16 = einsum(equation = var_11098_equation_0, values = (var_10660_cast_fp16, var_10978_cast_fp16))[name = tensor("op_11098_cast_fp16")]; tensor var_11100_equation_0 = const()[name = tensor("op_11100_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11100_cast_fp16 = einsum(equation = var_11100_equation_0, values = (var_10664_cast_fp16, var_10979_cast_fp16))[name = tensor("op_11100_cast_fp16")]; tensor var_11102_equation_0 = const()[name = tensor("op_11102_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11102_cast_fp16 = einsum(equation = var_11102_equation_0, values = (var_10664_cast_fp16, var_10980_cast_fp16))[name = tensor("op_11102_cast_fp16")]; tensor var_11104_equation_0 = const()[name = tensor("op_11104_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11104_cast_fp16 = einsum(equation = var_11104_equation_0, values = (var_10664_cast_fp16, var_10981_cast_fp16))[name = tensor("op_11104_cast_fp16")]; tensor var_11106_equation_0 = const()[name = tensor("op_11106_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11106_cast_fp16 = einsum(equation = var_11106_equation_0, values = (var_10664_cast_fp16, var_10982_cast_fp16))[name = tensor("op_11106_cast_fp16")]; tensor var_11108_equation_0 = const()[name = tensor("op_11108_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11108_cast_fp16 = einsum(equation = var_11108_equation_0, values = (var_10664_cast_fp16, var_10983_cast_fp16))[name = tensor("op_11108_cast_fp16")]; tensor var_11110_equation_0 = const()[name = tensor("op_11110_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11110_cast_fp16 = einsum(equation = var_11110_equation_0, values = (var_10664_cast_fp16, var_10984_cast_fp16))[name = tensor("op_11110_cast_fp16")]; tensor var_11112_equation_0 = const()[name = tensor("op_11112_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11112_cast_fp16 = einsum(equation = var_11112_equation_0, values = (var_10664_cast_fp16, var_10985_cast_fp16))[name = tensor("op_11112_cast_fp16")]; tensor var_11114_equation_0 = const()[name = tensor("op_11114_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11114_cast_fp16 = einsum(equation = var_11114_equation_0, values = (var_10664_cast_fp16, var_10986_cast_fp16))[name = tensor("op_11114_cast_fp16")]; tensor var_11116_interleave_0 = const()[name = tensor("op_11116_interleave_0"), val = tensor(false)]; tensor var_11116_cast_fp16 = concat(axis = var_10347, interleave = var_11116_interleave_0, values = (var_10988_cast_fp16, var_10990_cast_fp16, var_10992_cast_fp16, var_10994_cast_fp16, var_10996_cast_fp16, var_10998_cast_fp16, var_11000_cast_fp16, var_11002_cast_fp16))[name = tensor("op_11116_cast_fp16")]; tensor var_11118_interleave_0 = const()[name = tensor("op_11118_interleave_0"), val = tensor(false)]; tensor var_11118_cast_fp16 = concat(axis = var_10347, interleave = var_11118_interleave_0, values = (var_11004_cast_fp16, var_11006_cast_fp16, var_11008_cast_fp16, var_11010_cast_fp16, var_11012_cast_fp16, var_11014_cast_fp16, var_11016_cast_fp16, var_11018_cast_fp16))[name = tensor("op_11118_cast_fp16")]; tensor var_11120_interleave_0 = const()[name = tensor("op_11120_interleave_0"), val = tensor(false)]; tensor var_11120_cast_fp16 = concat(axis = var_10347, interleave = var_11120_interleave_0, values = (var_11020_cast_fp16, var_11022_cast_fp16, var_11024_cast_fp16, var_11026_cast_fp16, var_11028_cast_fp16, var_11030_cast_fp16, var_11032_cast_fp16, var_11034_cast_fp16))[name = tensor("op_11120_cast_fp16")]; tensor var_11122_interleave_0 = const()[name = tensor("op_11122_interleave_0"), val = tensor(false)]; tensor var_11122_cast_fp16 = concat(axis = var_10347, interleave = var_11122_interleave_0, values = (var_11036_cast_fp16, var_11038_cast_fp16, var_11040_cast_fp16, var_11042_cast_fp16, var_11044_cast_fp16, var_11046_cast_fp16, var_11048_cast_fp16, var_11050_cast_fp16))[name = tensor("op_11122_cast_fp16")]; tensor var_11124_interleave_0 = const()[name = tensor("op_11124_interleave_0"), val = tensor(false)]; tensor var_11124_cast_fp16 = concat(axis = var_10347, interleave = var_11124_interleave_0, values = (var_11052_cast_fp16, var_11054_cast_fp16, var_11056_cast_fp16, var_11058_cast_fp16, var_11060_cast_fp16, var_11062_cast_fp16, var_11064_cast_fp16, var_11066_cast_fp16))[name = tensor("op_11124_cast_fp16")]; tensor var_11126_interleave_0 = const()[name = tensor("op_11126_interleave_0"), val = tensor(false)]; tensor var_11126_cast_fp16 = concat(axis = var_10347, interleave = var_11126_interleave_0, values = (var_11068_cast_fp16, var_11070_cast_fp16, var_11072_cast_fp16, var_11074_cast_fp16, var_11076_cast_fp16, var_11078_cast_fp16, var_11080_cast_fp16, var_11082_cast_fp16))[name = tensor("op_11126_cast_fp16")]; tensor var_11128_interleave_0 = const()[name = tensor("op_11128_interleave_0"), val = tensor(false)]; tensor var_11128_cast_fp16 = concat(axis = var_10347, interleave = var_11128_interleave_0, values = (var_11084_cast_fp16, var_11086_cast_fp16, var_11088_cast_fp16, var_11090_cast_fp16, var_11092_cast_fp16, var_11094_cast_fp16, var_11096_cast_fp16, var_11098_cast_fp16))[name = tensor("op_11128_cast_fp16")]; tensor var_11130_interleave_0 = const()[name = tensor("op_11130_interleave_0"), val = tensor(false)]; tensor var_11130_cast_fp16 = concat(axis = var_10347, interleave = var_11130_interleave_0, values = (var_11100_cast_fp16, var_11102_cast_fp16, var_11104_cast_fp16, var_11106_cast_fp16, var_11108_cast_fp16, var_11110_cast_fp16, var_11112_cast_fp16, var_11114_cast_fp16))[name = tensor("op_11130_cast_fp16")]; tensor input_465_interleave_0 = const()[name = tensor("input_465_interleave_0"), val = tensor(false)]; tensor input_465_cast_fp16 = concat(axis = var_10375, interleave = input_465_interleave_0, values = (var_11116_cast_fp16, var_11118_cast_fp16, var_11120_cast_fp16, var_11122_cast_fp16, var_11124_cast_fp16, var_11126_cast_fp16, var_11128_cast_fp16, var_11130_cast_fp16))[name = tensor("input_465_cast_fp16")]; tensor var_11136 = const()[name = tensor("op_11136"), val = tensor([1, 1])]; tensor var_11138 = const()[name = tensor("op_11138"), val = tensor([1, 1])]; tensor var_11140_pad_type_0 = const()[name = tensor("op_11140_pad_type_0"), val = tensor("custom")]; tensor var_11140_pad_0 = const()[name = tensor("op_11140_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1691102592)))]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1691307456)))]; tensor var_11140_cast_fp16 = conv(bias = up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_11138, groups = var_10375, pad = var_11140_pad_0, pad_type = var_11140_pad_type_0, strides = var_11136, weight = up_blocks_3_attentions_0_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_465_cast_fp16)[name = tensor("op_11140_cast_fp16")]; tensor inputs_81_cast_fp16 = add(x = var_11140_cast_fp16, y = inputs_79_cast_fp16)[name = tensor("inputs_81_cast_fp16")]; tensor hidden_states_289_axes_0 = const()[name = tensor("hidden_states_289_axes_0"), val = tensor([1])]; tensor hidden_states_289_gamma_0_to_fp16 = const()[name = tensor("hidden_states_289_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1691308160)))]; tensor hidden_states_289_beta_0_to_fp16 = const()[name = tensor("hidden_states_289_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1691308864)))]; tensor var_11150_to_fp16 = const()[name = tensor("op_11150_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_289_cast_fp16 = layer_norm(axes = hidden_states_289_axes_0, beta = hidden_states_289_beta_0_to_fp16, epsilon = var_11150_to_fp16, gamma = hidden_states_289_gamma_0_to_fp16, x = inputs_81_cast_fp16)[name = tensor("hidden_states_289_cast_fp16")]; tensor var_11165 = const()[name = tensor("op_11165"), val = tensor([1, 1])]; tensor var_11167 = const()[name = tensor("op_11167"), val = tensor([1, 1])]; tensor q_55_pad_type_0 = const()[name = tensor("q_55_pad_type_0"), val = tensor("custom")]; tensor q_55_pad_0 = const()[name = tensor("q_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1691309568)))]; tensor q_55_cast_fp16 = conv(dilations = var_11167, groups = var_10375, pad = q_55_pad_0, pad_type = q_55_pad_type_0, strides = var_11165, weight = up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_289_cast_fp16)[name = tensor("q_55_cast_fp16")]; tensor var_11171 = const()[name = tensor("op_11171"), val = tensor([1, 1])]; tensor var_11173 = const()[name = tensor("op_11173"), val = tensor([1, 1])]; tensor k_109_pad_type_0 = const()[name = tensor("k_109_pad_type_0"), val = tensor("custom")]; tensor k_109_pad_0 = const()[name = tensor("k_109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1691514432)))]; tensor k_109_cast_fp16 = conv(dilations = var_11173, groups = var_10375, pad = k_109_pad_0, pad_type = k_109_pad_type_0, strides = var_11171, weight = up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_109_cast_fp16")]; tensor var_11177 = const()[name = tensor("op_11177"), val = tensor([1, 1])]; tensor var_11179 = const()[name = tensor("op_11179"), val = tensor([1, 1])]; tensor v_55_pad_type_0 = const()[name = tensor("v_55_pad_type_0"), val = tensor("custom")]; tensor v_55_pad_0 = const()[name = tensor("v_55_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1692006016)))]; tensor v_55_cast_fp16 = conv(dilations = var_11179, groups = var_10375, pad = v_55_pad_0, pad_type = v_55_pad_type_0, strides = var_11177, weight = up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_55_cast_fp16")]; tensor var_11183_begin_0 = const()[name = tensor("op_11183_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11183_end_0 = const()[name = tensor("op_11183_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11183_end_mask_0 = const()[name = tensor("op_11183_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11183_cast_fp16 = slice_by_index(begin = var_11183_begin_0, end = var_11183_end_0, end_mask = var_11183_end_mask_0, x = q_55_cast_fp16)[name = tensor("op_11183_cast_fp16")]; tensor var_11187_begin_0 = const()[name = tensor("op_11187_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_11187_end_0 = const()[name = tensor("op_11187_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_11187_end_mask_0 = const()[name = tensor("op_11187_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11187_cast_fp16 = slice_by_index(begin = var_11187_begin_0, end = var_11187_end_0, end_mask = var_11187_end_mask_0, x = q_55_cast_fp16)[name = tensor("op_11187_cast_fp16")]; tensor var_11191_begin_0 = const()[name = tensor("op_11191_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_11191_end_0 = const()[name = tensor("op_11191_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_11191_end_mask_0 = const()[name = tensor("op_11191_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11191_cast_fp16 = slice_by_index(begin = var_11191_begin_0, end = var_11191_end_0, end_mask = var_11191_end_mask_0, x = q_55_cast_fp16)[name = tensor("op_11191_cast_fp16")]; tensor var_11195_begin_0 = const()[name = tensor("op_11195_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_11195_end_0 = const()[name = tensor("op_11195_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_11195_end_mask_0 = const()[name = tensor("op_11195_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11195_cast_fp16 = slice_by_index(begin = var_11195_begin_0, end = var_11195_end_0, end_mask = var_11195_end_mask_0, x = q_55_cast_fp16)[name = tensor("op_11195_cast_fp16")]; tensor var_11199_begin_0 = const()[name = tensor("op_11199_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_11199_end_0 = const()[name = tensor("op_11199_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_11199_end_mask_0 = const()[name = tensor("op_11199_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11199_cast_fp16 = slice_by_index(begin = var_11199_begin_0, end = var_11199_end_0, end_mask = var_11199_end_mask_0, x = q_55_cast_fp16)[name = tensor("op_11199_cast_fp16")]; tensor var_11203_begin_0 = const()[name = tensor("op_11203_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_11203_end_0 = const()[name = tensor("op_11203_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_11203_end_mask_0 = const()[name = tensor("op_11203_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11203_cast_fp16 = slice_by_index(begin = var_11203_begin_0, end = var_11203_end_0, end_mask = var_11203_end_mask_0, x = q_55_cast_fp16)[name = tensor("op_11203_cast_fp16")]; tensor var_11207_begin_0 = const()[name = tensor("op_11207_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_11207_end_0 = const()[name = tensor("op_11207_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_11207_end_mask_0 = const()[name = tensor("op_11207_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11207_cast_fp16 = slice_by_index(begin = var_11207_begin_0, end = var_11207_end_0, end_mask = var_11207_end_mask_0, x = q_55_cast_fp16)[name = tensor("op_11207_cast_fp16")]; tensor var_11211_begin_0 = const()[name = tensor("op_11211_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_11211_end_0 = const()[name = tensor("op_11211_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_11211_end_mask_0 = const()[name = tensor("op_11211_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11211_cast_fp16 = slice_by_index(begin = var_11211_begin_0, end = var_11211_end_0, end_mask = var_11211_end_mask_0, x = q_55_cast_fp16)[name = tensor("op_11211_cast_fp16")]; tensor var_11214_begin_0 = const()[name = tensor("op_11214_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11214_end_0 = const()[name = tensor("op_11214_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_11214_end_mask_0 = const()[name = tensor("op_11214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11214_cast_fp16 = slice_by_index(begin = var_11214_begin_0, end = var_11214_end_0, end_mask = var_11214_end_mask_0, x = var_11183_cast_fp16)[name = tensor("op_11214_cast_fp16")]; tensor var_11215_begin_0 = const()[name = tensor("op_11215_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_11215_end_0 = const()[name = tensor("op_11215_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_11215_end_mask_0 = const()[name = tensor("op_11215_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11215_cast_fp16 = slice_by_index(begin = var_11215_begin_0, end = var_11215_end_0, end_mask = var_11215_end_mask_0, x = var_11183_cast_fp16)[name = tensor("op_11215_cast_fp16")]; tensor var_11216_begin_0 = const()[name = tensor("op_11216_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_11216_end_0 = const()[name = tensor("op_11216_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_11216_end_mask_0 = const()[name = tensor("op_11216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11216_cast_fp16 = slice_by_index(begin = var_11216_begin_0, end = var_11216_end_0, end_mask = var_11216_end_mask_0, x = var_11183_cast_fp16)[name = tensor("op_11216_cast_fp16")]; tensor var_11217_begin_0 = const()[name = tensor("op_11217_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_11217_end_0 = const()[name = tensor("op_11217_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_11217_end_mask_0 = const()[name = tensor("op_11217_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11217_cast_fp16 = slice_by_index(begin = var_11217_begin_0, end = var_11217_end_0, end_mask = var_11217_end_mask_0, x = var_11183_cast_fp16)[name = tensor("op_11217_cast_fp16")]; tensor var_11218_begin_0 = const()[name = tensor("op_11218_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_11218_end_0 = const()[name = tensor("op_11218_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_11218_end_mask_0 = const()[name = tensor("op_11218_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11218_cast_fp16 = slice_by_index(begin = var_11218_begin_0, end = var_11218_end_0, end_mask = var_11218_end_mask_0, x = var_11183_cast_fp16)[name = tensor("op_11218_cast_fp16")]; tensor var_11219_begin_0 = const()[name = tensor("op_11219_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_11219_end_0 = const()[name = tensor("op_11219_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_11219_end_mask_0 = const()[name = tensor("op_11219_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11219_cast_fp16 = slice_by_index(begin = var_11219_begin_0, end = var_11219_end_0, end_mask = var_11219_end_mask_0, x = var_11183_cast_fp16)[name = tensor("op_11219_cast_fp16")]; tensor var_11220_begin_0 = const()[name = tensor("op_11220_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_11220_end_0 = const()[name = tensor("op_11220_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_11220_end_mask_0 = const()[name = tensor("op_11220_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11220_cast_fp16 = slice_by_index(begin = var_11220_begin_0, end = var_11220_end_0, end_mask = var_11220_end_mask_0, x = var_11183_cast_fp16)[name = tensor("op_11220_cast_fp16")]; tensor var_11221_begin_0 = const()[name = tensor("op_11221_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_11221_end_0 = const()[name = tensor("op_11221_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11221_end_mask_0 = const()[name = tensor("op_11221_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11221_cast_fp16 = slice_by_index(begin = var_11221_begin_0, end = var_11221_end_0, end_mask = var_11221_end_mask_0, x = var_11183_cast_fp16)[name = tensor("op_11221_cast_fp16")]; tensor var_11222_begin_0 = const()[name = tensor("op_11222_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11222_end_0 = const()[name = tensor("op_11222_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_11222_end_mask_0 = const()[name = tensor("op_11222_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11222_cast_fp16 = slice_by_index(begin = var_11222_begin_0, end = var_11222_end_0, end_mask = var_11222_end_mask_0, x = var_11187_cast_fp16)[name = tensor("op_11222_cast_fp16")]; tensor var_11223_begin_0 = const()[name = tensor("op_11223_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_11223_end_0 = const()[name = tensor("op_11223_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_11223_end_mask_0 = const()[name = tensor("op_11223_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11223_cast_fp16 = slice_by_index(begin = var_11223_begin_0, end = var_11223_end_0, end_mask = var_11223_end_mask_0, x = var_11187_cast_fp16)[name = tensor("op_11223_cast_fp16")]; tensor var_11224_begin_0 = const()[name = tensor("op_11224_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_11224_end_0 = const()[name = tensor("op_11224_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_11224_end_mask_0 = const()[name = tensor("op_11224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11224_cast_fp16 = slice_by_index(begin = var_11224_begin_0, end = var_11224_end_0, end_mask = var_11224_end_mask_0, x = var_11187_cast_fp16)[name = tensor("op_11224_cast_fp16")]; tensor var_11225_begin_0 = const()[name = tensor("op_11225_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_11225_end_0 = const()[name = tensor("op_11225_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_11225_end_mask_0 = const()[name = tensor("op_11225_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11225_cast_fp16 = slice_by_index(begin = var_11225_begin_0, end = var_11225_end_0, end_mask = var_11225_end_mask_0, x = var_11187_cast_fp16)[name = tensor("op_11225_cast_fp16")]; tensor var_11226_begin_0 = const()[name = tensor("op_11226_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_11226_end_0 = const()[name = tensor("op_11226_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_11226_end_mask_0 = const()[name = tensor("op_11226_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11226_cast_fp16 = slice_by_index(begin = var_11226_begin_0, end = var_11226_end_0, end_mask = var_11226_end_mask_0, x = var_11187_cast_fp16)[name = tensor("op_11226_cast_fp16")]; tensor var_11227_begin_0 = const()[name = tensor("op_11227_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_11227_end_0 = const()[name = tensor("op_11227_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_11227_end_mask_0 = const()[name = tensor("op_11227_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11227_cast_fp16 = slice_by_index(begin = var_11227_begin_0, end = var_11227_end_0, end_mask = var_11227_end_mask_0, x = var_11187_cast_fp16)[name = tensor("op_11227_cast_fp16")]; tensor var_11228_begin_0 = const()[name = tensor("op_11228_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_11228_end_0 = const()[name = tensor("op_11228_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_11228_end_mask_0 = const()[name = tensor("op_11228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11228_cast_fp16 = slice_by_index(begin = var_11228_begin_0, end = var_11228_end_0, end_mask = var_11228_end_mask_0, x = var_11187_cast_fp16)[name = tensor("op_11228_cast_fp16")]; tensor var_11229_begin_0 = const()[name = tensor("op_11229_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_11229_end_0 = const()[name = tensor("op_11229_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11229_end_mask_0 = const()[name = tensor("op_11229_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11229_cast_fp16 = slice_by_index(begin = var_11229_begin_0, end = var_11229_end_0, end_mask = var_11229_end_mask_0, x = var_11187_cast_fp16)[name = tensor("op_11229_cast_fp16")]; tensor var_11230_begin_0 = const()[name = tensor("op_11230_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11230_end_0 = const()[name = tensor("op_11230_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_11230_end_mask_0 = const()[name = tensor("op_11230_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11230_cast_fp16 = slice_by_index(begin = var_11230_begin_0, end = var_11230_end_0, end_mask = var_11230_end_mask_0, x = var_11191_cast_fp16)[name = tensor("op_11230_cast_fp16")]; tensor var_11231_begin_0 = const()[name = tensor("op_11231_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_11231_end_0 = const()[name = tensor("op_11231_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_11231_end_mask_0 = const()[name = tensor("op_11231_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11231_cast_fp16 = slice_by_index(begin = var_11231_begin_0, end = var_11231_end_0, end_mask = var_11231_end_mask_0, x = var_11191_cast_fp16)[name = tensor("op_11231_cast_fp16")]; tensor var_11232_begin_0 = const()[name = tensor("op_11232_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_11232_end_0 = const()[name = tensor("op_11232_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_11232_end_mask_0 = const()[name = tensor("op_11232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11232_cast_fp16 = slice_by_index(begin = var_11232_begin_0, end = var_11232_end_0, end_mask = var_11232_end_mask_0, x = var_11191_cast_fp16)[name = tensor("op_11232_cast_fp16")]; tensor var_11233_begin_0 = const()[name = tensor("op_11233_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_11233_end_0 = const()[name = tensor("op_11233_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_11233_end_mask_0 = const()[name = tensor("op_11233_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11233_cast_fp16 = slice_by_index(begin = var_11233_begin_0, end = var_11233_end_0, end_mask = var_11233_end_mask_0, x = var_11191_cast_fp16)[name = tensor("op_11233_cast_fp16")]; tensor var_11234_begin_0 = const()[name = tensor("op_11234_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_11234_end_0 = const()[name = tensor("op_11234_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_11234_end_mask_0 = const()[name = tensor("op_11234_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11234_cast_fp16 = slice_by_index(begin = var_11234_begin_0, end = var_11234_end_0, end_mask = var_11234_end_mask_0, x = var_11191_cast_fp16)[name = tensor("op_11234_cast_fp16")]; tensor var_11235_begin_0 = const()[name = tensor("op_11235_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_11235_end_0 = const()[name = tensor("op_11235_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_11235_end_mask_0 = const()[name = tensor("op_11235_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11235_cast_fp16 = slice_by_index(begin = var_11235_begin_0, end = var_11235_end_0, end_mask = var_11235_end_mask_0, x = var_11191_cast_fp16)[name = tensor("op_11235_cast_fp16")]; tensor var_11236_begin_0 = const()[name = tensor("op_11236_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_11236_end_0 = const()[name = tensor("op_11236_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_11236_end_mask_0 = const()[name = tensor("op_11236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11236_cast_fp16 = slice_by_index(begin = var_11236_begin_0, end = var_11236_end_0, end_mask = var_11236_end_mask_0, x = var_11191_cast_fp16)[name = tensor("op_11236_cast_fp16")]; tensor var_11237_begin_0 = const()[name = tensor("op_11237_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_11237_end_0 = const()[name = tensor("op_11237_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11237_end_mask_0 = const()[name = tensor("op_11237_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11237_cast_fp16 = slice_by_index(begin = var_11237_begin_0, end = var_11237_end_0, end_mask = var_11237_end_mask_0, x = var_11191_cast_fp16)[name = tensor("op_11237_cast_fp16")]; tensor var_11238_begin_0 = const()[name = tensor("op_11238_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11238_end_0 = const()[name = tensor("op_11238_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_11238_end_mask_0 = const()[name = tensor("op_11238_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11238_cast_fp16 = slice_by_index(begin = var_11238_begin_0, end = var_11238_end_0, end_mask = var_11238_end_mask_0, x = var_11195_cast_fp16)[name = tensor("op_11238_cast_fp16")]; tensor var_11239_begin_0 = const()[name = tensor("op_11239_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_11239_end_0 = const()[name = tensor("op_11239_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_11239_end_mask_0 = const()[name = tensor("op_11239_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11239_cast_fp16 = slice_by_index(begin = var_11239_begin_0, end = var_11239_end_0, end_mask = var_11239_end_mask_0, x = var_11195_cast_fp16)[name = tensor("op_11239_cast_fp16")]; tensor var_11240_begin_0 = const()[name = tensor("op_11240_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_11240_end_0 = const()[name = tensor("op_11240_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_11240_end_mask_0 = const()[name = tensor("op_11240_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11240_cast_fp16 = slice_by_index(begin = var_11240_begin_0, end = var_11240_end_0, end_mask = var_11240_end_mask_0, x = var_11195_cast_fp16)[name = tensor("op_11240_cast_fp16")]; tensor var_11241_begin_0 = const()[name = tensor("op_11241_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_11241_end_0 = const()[name = tensor("op_11241_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_11241_end_mask_0 = const()[name = tensor("op_11241_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11241_cast_fp16 = slice_by_index(begin = var_11241_begin_0, end = var_11241_end_0, end_mask = var_11241_end_mask_0, x = var_11195_cast_fp16)[name = tensor("op_11241_cast_fp16")]; tensor var_11242_begin_0 = const()[name = tensor("op_11242_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_11242_end_0 = const()[name = tensor("op_11242_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_11242_end_mask_0 = const()[name = tensor("op_11242_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11242_cast_fp16 = slice_by_index(begin = var_11242_begin_0, end = var_11242_end_0, end_mask = var_11242_end_mask_0, x = var_11195_cast_fp16)[name = tensor("op_11242_cast_fp16")]; tensor var_11243_begin_0 = const()[name = tensor("op_11243_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_11243_end_0 = const()[name = tensor("op_11243_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_11243_end_mask_0 = const()[name = tensor("op_11243_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11243_cast_fp16 = slice_by_index(begin = var_11243_begin_0, end = var_11243_end_0, end_mask = var_11243_end_mask_0, x = var_11195_cast_fp16)[name = tensor("op_11243_cast_fp16")]; tensor var_11244_begin_0 = const()[name = tensor("op_11244_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_11244_end_0 = const()[name = tensor("op_11244_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_11244_end_mask_0 = const()[name = tensor("op_11244_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11244_cast_fp16 = slice_by_index(begin = var_11244_begin_0, end = var_11244_end_0, end_mask = var_11244_end_mask_0, x = var_11195_cast_fp16)[name = tensor("op_11244_cast_fp16")]; tensor var_11245_begin_0 = const()[name = tensor("op_11245_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_11245_end_0 = const()[name = tensor("op_11245_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11245_end_mask_0 = const()[name = tensor("op_11245_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11245_cast_fp16 = slice_by_index(begin = var_11245_begin_0, end = var_11245_end_0, end_mask = var_11245_end_mask_0, x = var_11195_cast_fp16)[name = tensor("op_11245_cast_fp16")]; tensor var_11246_begin_0 = const()[name = tensor("op_11246_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11246_end_0 = const()[name = tensor("op_11246_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_11246_end_mask_0 = const()[name = tensor("op_11246_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11246_cast_fp16 = slice_by_index(begin = var_11246_begin_0, end = var_11246_end_0, end_mask = var_11246_end_mask_0, x = var_11199_cast_fp16)[name = tensor("op_11246_cast_fp16")]; tensor var_11247_begin_0 = const()[name = tensor("op_11247_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_11247_end_0 = const()[name = tensor("op_11247_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_11247_end_mask_0 = const()[name = tensor("op_11247_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11247_cast_fp16 = slice_by_index(begin = var_11247_begin_0, end = var_11247_end_0, end_mask = var_11247_end_mask_0, x = var_11199_cast_fp16)[name = tensor("op_11247_cast_fp16")]; tensor var_11248_begin_0 = const()[name = tensor("op_11248_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_11248_end_0 = const()[name = tensor("op_11248_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_11248_end_mask_0 = const()[name = tensor("op_11248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11248_cast_fp16 = slice_by_index(begin = var_11248_begin_0, end = var_11248_end_0, end_mask = var_11248_end_mask_0, x = var_11199_cast_fp16)[name = tensor("op_11248_cast_fp16")]; tensor var_11249_begin_0 = const()[name = tensor("op_11249_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_11249_end_0 = const()[name = tensor("op_11249_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_11249_end_mask_0 = const()[name = tensor("op_11249_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11249_cast_fp16 = slice_by_index(begin = var_11249_begin_0, end = var_11249_end_0, end_mask = var_11249_end_mask_0, x = var_11199_cast_fp16)[name = tensor("op_11249_cast_fp16")]; tensor var_11250_begin_0 = const()[name = tensor("op_11250_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_11250_end_0 = const()[name = tensor("op_11250_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_11250_end_mask_0 = const()[name = tensor("op_11250_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11250_cast_fp16 = slice_by_index(begin = var_11250_begin_0, end = var_11250_end_0, end_mask = var_11250_end_mask_0, x = var_11199_cast_fp16)[name = tensor("op_11250_cast_fp16")]; tensor var_11251_begin_0 = const()[name = tensor("op_11251_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_11251_end_0 = const()[name = tensor("op_11251_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_11251_end_mask_0 = const()[name = tensor("op_11251_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11251_cast_fp16 = slice_by_index(begin = var_11251_begin_0, end = var_11251_end_0, end_mask = var_11251_end_mask_0, x = var_11199_cast_fp16)[name = tensor("op_11251_cast_fp16")]; tensor var_11252_begin_0 = const()[name = tensor("op_11252_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_11252_end_0 = const()[name = tensor("op_11252_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_11252_end_mask_0 = const()[name = tensor("op_11252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11252_cast_fp16 = slice_by_index(begin = var_11252_begin_0, end = var_11252_end_0, end_mask = var_11252_end_mask_0, x = var_11199_cast_fp16)[name = tensor("op_11252_cast_fp16")]; tensor var_11253_begin_0 = const()[name = tensor("op_11253_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_11253_end_0 = const()[name = tensor("op_11253_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11253_end_mask_0 = const()[name = tensor("op_11253_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11253_cast_fp16 = slice_by_index(begin = var_11253_begin_0, end = var_11253_end_0, end_mask = var_11253_end_mask_0, x = var_11199_cast_fp16)[name = tensor("op_11253_cast_fp16")]; tensor var_11254_begin_0 = const()[name = tensor("op_11254_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11254_end_0 = const()[name = tensor("op_11254_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_11254_end_mask_0 = const()[name = tensor("op_11254_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11254_cast_fp16 = slice_by_index(begin = var_11254_begin_0, end = var_11254_end_0, end_mask = var_11254_end_mask_0, x = var_11203_cast_fp16)[name = tensor("op_11254_cast_fp16")]; tensor var_11255_begin_0 = const()[name = tensor("op_11255_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_11255_end_0 = const()[name = tensor("op_11255_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_11255_end_mask_0 = const()[name = tensor("op_11255_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11255_cast_fp16 = slice_by_index(begin = var_11255_begin_0, end = var_11255_end_0, end_mask = var_11255_end_mask_0, x = var_11203_cast_fp16)[name = tensor("op_11255_cast_fp16")]; tensor var_11256_begin_0 = const()[name = tensor("op_11256_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_11256_end_0 = const()[name = tensor("op_11256_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_11256_end_mask_0 = const()[name = tensor("op_11256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11256_cast_fp16 = slice_by_index(begin = var_11256_begin_0, end = var_11256_end_0, end_mask = var_11256_end_mask_0, x = var_11203_cast_fp16)[name = tensor("op_11256_cast_fp16")]; tensor var_11257_begin_0 = const()[name = tensor("op_11257_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_11257_end_0 = const()[name = tensor("op_11257_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_11257_end_mask_0 = const()[name = tensor("op_11257_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11257_cast_fp16 = slice_by_index(begin = var_11257_begin_0, end = var_11257_end_0, end_mask = var_11257_end_mask_0, x = var_11203_cast_fp16)[name = tensor("op_11257_cast_fp16")]; tensor var_11258_begin_0 = const()[name = tensor("op_11258_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_11258_end_0 = const()[name = tensor("op_11258_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_11258_end_mask_0 = const()[name = tensor("op_11258_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11258_cast_fp16 = slice_by_index(begin = var_11258_begin_0, end = var_11258_end_0, end_mask = var_11258_end_mask_0, x = var_11203_cast_fp16)[name = tensor("op_11258_cast_fp16")]; tensor var_11259_begin_0 = const()[name = tensor("op_11259_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_11259_end_0 = const()[name = tensor("op_11259_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_11259_end_mask_0 = const()[name = tensor("op_11259_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11259_cast_fp16 = slice_by_index(begin = var_11259_begin_0, end = var_11259_end_0, end_mask = var_11259_end_mask_0, x = var_11203_cast_fp16)[name = tensor("op_11259_cast_fp16")]; tensor var_11260_begin_0 = const()[name = tensor("op_11260_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_11260_end_0 = const()[name = tensor("op_11260_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_11260_end_mask_0 = const()[name = tensor("op_11260_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11260_cast_fp16 = slice_by_index(begin = var_11260_begin_0, end = var_11260_end_0, end_mask = var_11260_end_mask_0, x = var_11203_cast_fp16)[name = tensor("op_11260_cast_fp16")]; tensor var_11261_begin_0 = const()[name = tensor("op_11261_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_11261_end_0 = const()[name = tensor("op_11261_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11261_end_mask_0 = const()[name = tensor("op_11261_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11261_cast_fp16 = slice_by_index(begin = var_11261_begin_0, end = var_11261_end_0, end_mask = var_11261_end_mask_0, x = var_11203_cast_fp16)[name = tensor("op_11261_cast_fp16")]; tensor var_11262_begin_0 = const()[name = tensor("op_11262_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11262_end_0 = const()[name = tensor("op_11262_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_11262_end_mask_0 = const()[name = tensor("op_11262_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11262_cast_fp16 = slice_by_index(begin = var_11262_begin_0, end = var_11262_end_0, end_mask = var_11262_end_mask_0, x = var_11207_cast_fp16)[name = tensor("op_11262_cast_fp16")]; tensor var_11263_begin_0 = const()[name = tensor("op_11263_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_11263_end_0 = const()[name = tensor("op_11263_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_11263_end_mask_0 = const()[name = tensor("op_11263_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11263_cast_fp16 = slice_by_index(begin = var_11263_begin_0, end = var_11263_end_0, end_mask = var_11263_end_mask_0, x = var_11207_cast_fp16)[name = tensor("op_11263_cast_fp16")]; tensor var_11264_begin_0 = const()[name = tensor("op_11264_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_11264_end_0 = const()[name = tensor("op_11264_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_11264_end_mask_0 = const()[name = tensor("op_11264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11264_cast_fp16 = slice_by_index(begin = var_11264_begin_0, end = var_11264_end_0, end_mask = var_11264_end_mask_0, x = var_11207_cast_fp16)[name = tensor("op_11264_cast_fp16")]; tensor var_11265_begin_0 = const()[name = tensor("op_11265_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_11265_end_0 = const()[name = tensor("op_11265_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_11265_end_mask_0 = const()[name = tensor("op_11265_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11265_cast_fp16 = slice_by_index(begin = var_11265_begin_0, end = var_11265_end_0, end_mask = var_11265_end_mask_0, x = var_11207_cast_fp16)[name = tensor("op_11265_cast_fp16")]; tensor var_11266_begin_0 = const()[name = tensor("op_11266_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_11266_end_0 = const()[name = tensor("op_11266_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_11266_end_mask_0 = const()[name = tensor("op_11266_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11266_cast_fp16 = slice_by_index(begin = var_11266_begin_0, end = var_11266_end_0, end_mask = var_11266_end_mask_0, x = var_11207_cast_fp16)[name = tensor("op_11266_cast_fp16")]; tensor var_11267_begin_0 = const()[name = tensor("op_11267_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_11267_end_0 = const()[name = tensor("op_11267_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_11267_end_mask_0 = const()[name = tensor("op_11267_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11267_cast_fp16 = slice_by_index(begin = var_11267_begin_0, end = var_11267_end_0, end_mask = var_11267_end_mask_0, x = var_11207_cast_fp16)[name = tensor("op_11267_cast_fp16")]; tensor var_11268_begin_0 = const()[name = tensor("op_11268_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_11268_end_0 = const()[name = tensor("op_11268_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_11268_end_mask_0 = const()[name = tensor("op_11268_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11268_cast_fp16 = slice_by_index(begin = var_11268_begin_0, end = var_11268_end_0, end_mask = var_11268_end_mask_0, x = var_11207_cast_fp16)[name = tensor("op_11268_cast_fp16")]; tensor var_11269_begin_0 = const()[name = tensor("op_11269_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_11269_end_0 = const()[name = tensor("op_11269_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11269_end_mask_0 = const()[name = tensor("op_11269_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11269_cast_fp16 = slice_by_index(begin = var_11269_begin_0, end = var_11269_end_0, end_mask = var_11269_end_mask_0, x = var_11207_cast_fp16)[name = tensor("op_11269_cast_fp16")]; tensor var_11270_begin_0 = const()[name = tensor("op_11270_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11270_end_0 = const()[name = tensor("op_11270_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_11270_end_mask_0 = const()[name = tensor("op_11270_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11270_cast_fp16 = slice_by_index(begin = var_11270_begin_0, end = var_11270_end_0, end_mask = var_11270_end_mask_0, x = var_11211_cast_fp16)[name = tensor("op_11270_cast_fp16")]; tensor var_11271_begin_0 = const()[name = tensor("op_11271_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_11271_end_0 = const()[name = tensor("op_11271_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_11271_end_mask_0 = const()[name = tensor("op_11271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11271_cast_fp16 = slice_by_index(begin = var_11271_begin_0, end = var_11271_end_0, end_mask = var_11271_end_mask_0, x = var_11211_cast_fp16)[name = tensor("op_11271_cast_fp16")]; tensor var_11272_begin_0 = const()[name = tensor("op_11272_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_11272_end_0 = const()[name = tensor("op_11272_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_11272_end_mask_0 = const()[name = tensor("op_11272_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11272_cast_fp16 = slice_by_index(begin = var_11272_begin_0, end = var_11272_end_0, end_mask = var_11272_end_mask_0, x = var_11211_cast_fp16)[name = tensor("op_11272_cast_fp16")]; tensor var_11273_begin_0 = const()[name = tensor("op_11273_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_11273_end_0 = const()[name = tensor("op_11273_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_11273_end_mask_0 = const()[name = tensor("op_11273_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11273_cast_fp16 = slice_by_index(begin = var_11273_begin_0, end = var_11273_end_0, end_mask = var_11273_end_mask_0, x = var_11211_cast_fp16)[name = tensor("op_11273_cast_fp16")]; tensor var_11274_begin_0 = const()[name = tensor("op_11274_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_11274_end_0 = const()[name = tensor("op_11274_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_11274_end_mask_0 = const()[name = tensor("op_11274_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11274_cast_fp16 = slice_by_index(begin = var_11274_begin_0, end = var_11274_end_0, end_mask = var_11274_end_mask_0, x = var_11211_cast_fp16)[name = tensor("op_11274_cast_fp16")]; tensor var_11275_begin_0 = const()[name = tensor("op_11275_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_11275_end_0 = const()[name = tensor("op_11275_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_11275_end_mask_0 = const()[name = tensor("op_11275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11275_cast_fp16 = slice_by_index(begin = var_11275_begin_0, end = var_11275_end_0, end_mask = var_11275_end_mask_0, x = var_11211_cast_fp16)[name = tensor("op_11275_cast_fp16")]; tensor var_11276_begin_0 = const()[name = tensor("op_11276_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_11276_end_0 = const()[name = tensor("op_11276_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_11276_end_mask_0 = const()[name = tensor("op_11276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11276_cast_fp16 = slice_by_index(begin = var_11276_begin_0, end = var_11276_end_0, end_mask = var_11276_end_mask_0, x = var_11211_cast_fp16)[name = tensor("op_11276_cast_fp16")]; tensor var_11277_begin_0 = const()[name = tensor("op_11277_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_11277_end_0 = const()[name = tensor("op_11277_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11277_end_mask_0 = const()[name = tensor("op_11277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11277_cast_fp16 = slice_by_index(begin = var_11277_begin_0, end = var_11277_end_0, end_mask = var_11277_end_mask_0, x = var_11211_cast_fp16)[name = tensor("op_11277_cast_fp16")]; tensor k_111_perm_0 = const()[name = tensor("k_111_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_11282_begin_0 = const()[name = tensor("op_11282_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11282_end_0 = const()[name = tensor("op_11282_end_0"), val = tensor([2, 77, 1, 40])]; tensor var_11282_end_mask_0 = const()[name = tensor("op_11282_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_4 = transpose(perm = k_111_perm_0, x = k_109_cast_fp16)[name = tensor("transpose_4")]; tensor var_11282_cast_fp16 = slice_by_index(begin = var_11282_begin_0, end = var_11282_end_0, end_mask = var_11282_end_mask_0, x = transpose_4)[name = tensor("op_11282_cast_fp16")]; tensor var_11286_begin_0 = const()[name = tensor("op_11286_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_11286_end_0 = const()[name = tensor("op_11286_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_11286_end_mask_0 = const()[name = tensor("op_11286_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11286_cast_fp16 = slice_by_index(begin = var_11286_begin_0, end = var_11286_end_0, end_mask = var_11286_end_mask_0, x = transpose_4)[name = tensor("op_11286_cast_fp16")]; tensor var_11290_begin_0 = const()[name = tensor("op_11290_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_11290_end_0 = const()[name = tensor("op_11290_end_0"), val = tensor([2, 77, 1, 120])]; tensor var_11290_end_mask_0 = const()[name = tensor("op_11290_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11290_cast_fp16 = slice_by_index(begin = var_11290_begin_0, end = var_11290_end_0, end_mask = var_11290_end_mask_0, x = transpose_4)[name = tensor("op_11290_cast_fp16")]; tensor var_11294_begin_0 = const()[name = tensor("op_11294_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_11294_end_0 = const()[name = tensor("op_11294_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_11294_end_mask_0 = const()[name = tensor("op_11294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11294_cast_fp16 = slice_by_index(begin = var_11294_begin_0, end = var_11294_end_0, end_mask = var_11294_end_mask_0, x = transpose_4)[name = tensor("op_11294_cast_fp16")]; tensor var_11298_begin_0 = const()[name = tensor("op_11298_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_11298_end_0 = const()[name = tensor("op_11298_end_0"), val = tensor([2, 77, 1, 200])]; tensor var_11298_end_mask_0 = const()[name = tensor("op_11298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11298_cast_fp16 = slice_by_index(begin = var_11298_begin_0, end = var_11298_end_0, end_mask = var_11298_end_mask_0, x = transpose_4)[name = tensor("op_11298_cast_fp16")]; tensor var_11302_begin_0 = const()[name = tensor("op_11302_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_11302_end_0 = const()[name = tensor("op_11302_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_11302_end_mask_0 = const()[name = tensor("op_11302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11302_cast_fp16 = slice_by_index(begin = var_11302_begin_0, end = var_11302_end_0, end_mask = var_11302_end_mask_0, x = transpose_4)[name = tensor("op_11302_cast_fp16")]; tensor var_11306_begin_0 = const()[name = tensor("op_11306_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_11306_end_0 = const()[name = tensor("op_11306_end_0"), val = tensor([2, 77, 1, 280])]; tensor var_11306_end_mask_0 = const()[name = tensor("op_11306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11306_cast_fp16 = slice_by_index(begin = var_11306_begin_0, end = var_11306_end_0, end_mask = var_11306_end_mask_0, x = transpose_4)[name = tensor("op_11306_cast_fp16")]; tensor var_11310_begin_0 = const()[name = tensor("op_11310_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_11310_end_0 = const()[name = tensor("op_11310_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_11310_end_mask_0 = const()[name = tensor("op_11310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11310_cast_fp16 = slice_by_index(begin = var_11310_begin_0, end = var_11310_end_0, end_mask = var_11310_end_mask_0, x = transpose_4)[name = tensor("op_11310_cast_fp16")]; tensor var_11312_begin_0 = const()[name = tensor("op_11312_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11312_end_0 = const()[name = tensor("op_11312_end_0"), val = tensor([2, 40, 1, 77])]; tensor var_11312_end_mask_0 = const()[name = tensor("op_11312_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11312_cast_fp16 = slice_by_index(begin = var_11312_begin_0, end = var_11312_end_0, end_mask = var_11312_end_mask_0, x = v_55_cast_fp16)[name = tensor("op_11312_cast_fp16")]; tensor var_11316_begin_0 = const()[name = tensor("op_11316_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_11316_end_0 = const()[name = tensor("op_11316_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_11316_end_mask_0 = const()[name = tensor("op_11316_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11316_cast_fp16 = slice_by_index(begin = var_11316_begin_0, end = var_11316_end_0, end_mask = var_11316_end_mask_0, x = v_55_cast_fp16)[name = tensor("op_11316_cast_fp16")]; tensor var_11320_begin_0 = const()[name = tensor("op_11320_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_11320_end_0 = const()[name = tensor("op_11320_end_0"), val = tensor([2, 120, 1, 77])]; tensor var_11320_end_mask_0 = const()[name = tensor("op_11320_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11320_cast_fp16 = slice_by_index(begin = var_11320_begin_0, end = var_11320_end_0, end_mask = var_11320_end_mask_0, x = v_55_cast_fp16)[name = tensor("op_11320_cast_fp16")]; tensor var_11324_begin_0 = const()[name = tensor("op_11324_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_11324_end_0 = const()[name = tensor("op_11324_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_11324_end_mask_0 = const()[name = tensor("op_11324_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11324_cast_fp16 = slice_by_index(begin = var_11324_begin_0, end = var_11324_end_0, end_mask = var_11324_end_mask_0, x = v_55_cast_fp16)[name = tensor("op_11324_cast_fp16")]; tensor var_11328_begin_0 = const()[name = tensor("op_11328_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_11328_end_0 = const()[name = tensor("op_11328_end_0"), val = tensor([2, 200, 1, 77])]; tensor var_11328_end_mask_0 = const()[name = tensor("op_11328_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11328_cast_fp16 = slice_by_index(begin = var_11328_begin_0, end = var_11328_end_0, end_mask = var_11328_end_mask_0, x = v_55_cast_fp16)[name = tensor("op_11328_cast_fp16")]; tensor var_11332_begin_0 = const()[name = tensor("op_11332_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_11332_end_0 = const()[name = tensor("op_11332_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_11332_end_mask_0 = const()[name = tensor("op_11332_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11332_cast_fp16 = slice_by_index(begin = var_11332_begin_0, end = var_11332_end_0, end_mask = var_11332_end_mask_0, x = v_55_cast_fp16)[name = tensor("op_11332_cast_fp16")]; tensor var_11336_begin_0 = const()[name = tensor("op_11336_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_11336_end_0 = const()[name = tensor("op_11336_end_0"), val = tensor([2, 280, 1, 77])]; tensor var_11336_end_mask_0 = const()[name = tensor("op_11336_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11336_cast_fp16 = slice_by_index(begin = var_11336_begin_0, end = var_11336_end_0, end_mask = var_11336_end_mask_0, x = v_55_cast_fp16)[name = tensor("op_11336_cast_fp16")]; tensor var_11340_begin_0 = const()[name = tensor("op_11340_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_11340_end_0 = const()[name = tensor("op_11340_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_11340_end_mask_0 = const()[name = tensor("op_11340_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11340_cast_fp16 = slice_by_index(begin = var_11340_begin_0, end = var_11340_end_0, end_mask = var_11340_end_mask_0, x = v_55_cast_fp16)[name = tensor("op_11340_cast_fp16")]; tensor var_11344_equation_0 = const()[name = tensor("op_11344_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11344_cast_fp16 = einsum(equation = var_11344_equation_0, values = (var_11282_cast_fp16, var_11214_cast_fp16))[name = tensor("op_11344_cast_fp16")]; tensor var_11345_to_fp16 = const()[name = tensor("op_11345_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_961_cast_fp16 = mul(x = var_11344_cast_fp16, y = var_11345_to_fp16)[name = tensor("aw_chunk_961_cast_fp16")]; tensor var_11348_equation_0 = const()[name = tensor("op_11348_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11348_cast_fp16 = einsum(equation = var_11348_equation_0, values = (var_11282_cast_fp16, var_11215_cast_fp16))[name = tensor("op_11348_cast_fp16")]; tensor var_11349_to_fp16 = const()[name = tensor("op_11349_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_963_cast_fp16 = mul(x = var_11348_cast_fp16, y = var_11349_to_fp16)[name = tensor("aw_chunk_963_cast_fp16")]; tensor var_11352_equation_0 = const()[name = tensor("op_11352_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11352_cast_fp16 = einsum(equation = var_11352_equation_0, values = (var_11282_cast_fp16, var_11216_cast_fp16))[name = tensor("op_11352_cast_fp16")]; tensor var_11353_to_fp16 = const()[name = tensor("op_11353_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_965_cast_fp16 = mul(x = var_11352_cast_fp16, y = var_11353_to_fp16)[name = tensor("aw_chunk_965_cast_fp16")]; tensor var_11356_equation_0 = const()[name = tensor("op_11356_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11356_cast_fp16 = einsum(equation = var_11356_equation_0, values = (var_11282_cast_fp16, var_11217_cast_fp16))[name = tensor("op_11356_cast_fp16")]; tensor var_11357_to_fp16 = const()[name = tensor("op_11357_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_967_cast_fp16 = mul(x = var_11356_cast_fp16, y = var_11357_to_fp16)[name = tensor("aw_chunk_967_cast_fp16")]; tensor var_11360_equation_0 = const()[name = tensor("op_11360_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11360_cast_fp16 = einsum(equation = var_11360_equation_0, values = (var_11282_cast_fp16, var_11218_cast_fp16))[name = tensor("op_11360_cast_fp16")]; tensor var_11361_to_fp16 = const()[name = tensor("op_11361_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_969_cast_fp16 = mul(x = var_11360_cast_fp16, y = var_11361_to_fp16)[name = tensor("aw_chunk_969_cast_fp16")]; tensor var_11364_equation_0 = const()[name = tensor("op_11364_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11364_cast_fp16 = einsum(equation = var_11364_equation_0, values = (var_11282_cast_fp16, var_11219_cast_fp16))[name = tensor("op_11364_cast_fp16")]; tensor var_11365_to_fp16 = const()[name = tensor("op_11365_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_971_cast_fp16 = mul(x = var_11364_cast_fp16, y = var_11365_to_fp16)[name = tensor("aw_chunk_971_cast_fp16")]; tensor var_11368_equation_0 = const()[name = tensor("op_11368_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11368_cast_fp16 = einsum(equation = var_11368_equation_0, values = (var_11282_cast_fp16, var_11220_cast_fp16))[name = tensor("op_11368_cast_fp16")]; tensor var_11369_to_fp16 = const()[name = tensor("op_11369_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_973_cast_fp16 = mul(x = var_11368_cast_fp16, y = var_11369_to_fp16)[name = tensor("aw_chunk_973_cast_fp16")]; tensor var_11372_equation_0 = const()[name = tensor("op_11372_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11372_cast_fp16 = einsum(equation = var_11372_equation_0, values = (var_11282_cast_fp16, var_11221_cast_fp16))[name = tensor("op_11372_cast_fp16")]; tensor var_11373_to_fp16 = const()[name = tensor("op_11373_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_975_cast_fp16 = mul(x = var_11372_cast_fp16, y = var_11373_to_fp16)[name = tensor("aw_chunk_975_cast_fp16")]; tensor var_11376_equation_0 = const()[name = tensor("op_11376_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11376_cast_fp16 = einsum(equation = var_11376_equation_0, values = (var_11286_cast_fp16, var_11222_cast_fp16))[name = tensor("op_11376_cast_fp16")]; tensor var_11377_to_fp16 = const()[name = tensor("op_11377_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_977_cast_fp16 = mul(x = var_11376_cast_fp16, y = var_11377_to_fp16)[name = tensor("aw_chunk_977_cast_fp16")]; tensor var_11380_equation_0 = const()[name = tensor("op_11380_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11380_cast_fp16 = einsum(equation = var_11380_equation_0, values = (var_11286_cast_fp16, var_11223_cast_fp16))[name = tensor("op_11380_cast_fp16")]; tensor var_11381_to_fp16 = const()[name = tensor("op_11381_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_979_cast_fp16 = mul(x = var_11380_cast_fp16, y = var_11381_to_fp16)[name = tensor("aw_chunk_979_cast_fp16")]; tensor var_11384_equation_0 = const()[name = tensor("op_11384_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11384_cast_fp16 = einsum(equation = var_11384_equation_0, values = (var_11286_cast_fp16, var_11224_cast_fp16))[name = tensor("op_11384_cast_fp16")]; tensor var_11385_to_fp16 = const()[name = tensor("op_11385_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_981_cast_fp16 = mul(x = var_11384_cast_fp16, y = var_11385_to_fp16)[name = tensor("aw_chunk_981_cast_fp16")]; tensor var_11388_equation_0 = const()[name = tensor("op_11388_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11388_cast_fp16 = einsum(equation = var_11388_equation_0, values = (var_11286_cast_fp16, var_11225_cast_fp16))[name = tensor("op_11388_cast_fp16")]; tensor var_11389_to_fp16 = const()[name = tensor("op_11389_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_983_cast_fp16 = mul(x = var_11388_cast_fp16, y = var_11389_to_fp16)[name = tensor("aw_chunk_983_cast_fp16")]; tensor var_11392_equation_0 = const()[name = tensor("op_11392_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11392_cast_fp16 = einsum(equation = var_11392_equation_0, values = (var_11286_cast_fp16, var_11226_cast_fp16))[name = tensor("op_11392_cast_fp16")]; tensor var_11393_to_fp16 = const()[name = tensor("op_11393_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_985_cast_fp16 = mul(x = var_11392_cast_fp16, y = var_11393_to_fp16)[name = tensor("aw_chunk_985_cast_fp16")]; tensor var_11396_equation_0 = const()[name = tensor("op_11396_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11396_cast_fp16 = einsum(equation = var_11396_equation_0, values = (var_11286_cast_fp16, var_11227_cast_fp16))[name = tensor("op_11396_cast_fp16")]; tensor var_11397_to_fp16 = const()[name = tensor("op_11397_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_987_cast_fp16 = mul(x = var_11396_cast_fp16, y = var_11397_to_fp16)[name = tensor("aw_chunk_987_cast_fp16")]; tensor var_11400_equation_0 = const()[name = tensor("op_11400_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11400_cast_fp16 = einsum(equation = var_11400_equation_0, values = (var_11286_cast_fp16, var_11228_cast_fp16))[name = tensor("op_11400_cast_fp16")]; tensor var_11401_to_fp16 = const()[name = tensor("op_11401_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_989_cast_fp16 = mul(x = var_11400_cast_fp16, y = var_11401_to_fp16)[name = tensor("aw_chunk_989_cast_fp16")]; tensor var_11404_equation_0 = const()[name = tensor("op_11404_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11404_cast_fp16 = einsum(equation = var_11404_equation_0, values = (var_11286_cast_fp16, var_11229_cast_fp16))[name = tensor("op_11404_cast_fp16")]; tensor var_11405_to_fp16 = const()[name = tensor("op_11405_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_991_cast_fp16 = mul(x = var_11404_cast_fp16, y = var_11405_to_fp16)[name = tensor("aw_chunk_991_cast_fp16")]; tensor var_11408_equation_0 = const()[name = tensor("op_11408_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11408_cast_fp16 = einsum(equation = var_11408_equation_0, values = (var_11290_cast_fp16, var_11230_cast_fp16))[name = tensor("op_11408_cast_fp16")]; tensor var_11409_to_fp16 = const()[name = tensor("op_11409_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_993_cast_fp16 = mul(x = var_11408_cast_fp16, y = var_11409_to_fp16)[name = tensor("aw_chunk_993_cast_fp16")]; tensor var_11412_equation_0 = const()[name = tensor("op_11412_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11412_cast_fp16 = einsum(equation = var_11412_equation_0, values = (var_11290_cast_fp16, var_11231_cast_fp16))[name = tensor("op_11412_cast_fp16")]; tensor var_11413_to_fp16 = const()[name = tensor("op_11413_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_995_cast_fp16 = mul(x = var_11412_cast_fp16, y = var_11413_to_fp16)[name = tensor("aw_chunk_995_cast_fp16")]; tensor var_11416_equation_0 = const()[name = tensor("op_11416_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11416_cast_fp16 = einsum(equation = var_11416_equation_0, values = (var_11290_cast_fp16, var_11232_cast_fp16))[name = tensor("op_11416_cast_fp16")]; tensor var_11417_to_fp16 = const()[name = tensor("op_11417_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_997_cast_fp16 = mul(x = var_11416_cast_fp16, y = var_11417_to_fp16)[name = tensor("aw_chunk_997_cast_fp16")]; tensor var_11420_equation_0 = const()[name = tensor("op_11420_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11420_cast_fp16 = einsum(equation = var_11420_equation_0, values = (var_11290_cast_fp16, var_11233_cast_fp16))[name = tensor("op_11420_cast_fp16")]; tensor var_11421_to_fp16 = const()[name = tensor("op_11421_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_999_cast_fp16 = mul(x = var_11420_cast_fp16, y = var_11421_to_fp16)[name = tensor("aw_chunk_999_cast_fp16")]; tensor var_11424_equation_0 = const()[name = tensor("op_11424_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11424_cast_fp16 = einsum(equation = var_11424_equation_0, values = (var_11290_cast_fp16, var_11234_cast_fp16))[name = tensor("op_11424_cast_fp16")]; tensor var_11425_to_fp16 = const()[name = tensor("op_11425_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1001_cast_fp16 = mul(x = var_11424_cast_fp16, y = var_11425_to_fp16)[name = tensor("aw_chunk_1001_cast_fp16")]; tensor var_11428_equation_0 = const()[name = tensor("op_11428_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11428_cast_fp16 = einsum(equation = var_11428_equation_0, values = (var_11290_cast_fp16, var_11235_cast_fp16))[name = tensor("op_11428_cast_fp16")]; tensor var_11429_to_fp16 = const()[name = tensor("op_11429_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1003_cast_fp16 = mul(x = var_11428_cast_fp16, y = var_11429_to_fp16)[name = tensor("aw_chunk_1003_cast_fp16")]; tensor var_11432_equation_0 = const()[name = tensor("op_11432_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11432_cast_fp16 = einsum(equation = var_11432_equation_0, values = (var_11290_cast_fp16, var_11236_cast_fp16))[name = tensor("op_11432_cast_fp16")]; tensor var_11433_to_fp16 = const()[name = tensor("op_11433_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1005_cast_fp16 = mul(x = var_11432_cast_fp16, y = var_11433_to_fp16)[name = tensor("aw_chunk_1005_cast_fp16")]; tensor var_11436_equation_0 = const()[name = tensor("op_11436_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11436_cast_fp16 = einsum(equation = var_11436_equation_0, values = (var_11290_cast_fp16, var_11237_cast_fp16))[name = tensor("op_11436_cast_fp16")]; tensor var_11437_to_fp16 = const()[name = tensor("op_11437_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1007_cast_fp16 = mul(x = var_11436_cast_fp16, y = var_11437_to_fp16)[name = tensor("aw_chunk_1007_cast_fp16")]; tensor var_11440_equation_0 = const()[name = tensor("op_11440_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11440_cast_fp16 = einsum(equation = var_11440_equation_0, values = (var_11294_cast_fp16, var_11238_cast_fp16))[name = tensor("op_11440_cast_fp16")]; tensor var_11441_to_fp16 = const()[name = tensor("op_11441_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1009_cast_fp16 = mul(x = var_11440_cast_fp16, y = var_11441_to_fp16)[name = tensor("aw_chunk_1009_cast_fp16")]; tensor var_11444_equation_0 = const()[name = tensor("op_11444_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11444_cast_fp16 = einsum(equation = var_11444_equation_0, values = (var_11294_cast_fp16, var_11239_cast_fp16))[name = tensor("op_11444_cast_fp16")]; tensor var_11445_to_fp16 = const()[name = tensor("op_11445_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1011_cast_fp16 = mul(x = var_11444_cast_fp16, y = var_11445_to_fp16)[name = tensor("aw_chunk_1011_cast_fp16")]; tensor var_11448_equation_0 = const()[name = tensor("op_11448_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11448_cast_fp16 = einsum(equation = var_11448_equation_0, values = (var_11294_cast_fp16, var_11240_cast_fp16))[name = tensor("op_11448_cast_fp16")]; tensor var_11449_to_fp16 = const()[name = tensor("op_11449_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1013_cast_fp16 = mul(x = var_11448_cast_fp16, y = var_11449_to_fp16)[name = tensor("aw_chunk_1013_cast_fp16")]; tensor var_11452_equation_0 = const()[name = tensor("op_11452_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11452_cast_fp16 = einsum(equation = var_11452_equation_0, values = (var_11294_cast_fp16, var_11241_cast_fp16))[name = tensor("op_11452_cast_fp16")]; tensor var_11453_to_fp16 = const()[name = tensor("op_11453_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1015_cast_fp16 = mul(x = var_11452_cast_fp16, y = var_11453_to_fp16)[name = tensor("aw_chunk_1015_cast_fp16")]; tensor var_11456_equation_0 = const()[name = tensor("op_11456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11456_cast_fp16 = einsum(equation = var_11456_equation_0, values = (var_11294_cast_fp16, var_11242_cast_fp16))[name = tensor("op_11456_cast_fp16")]; tensor var_11457_to_fp16 = const()[name = tensor("op_11457_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1017_cast_fp16 = mul(x = var_11456_cast_fp16, y = var_11457_to_fp16)[name = tensor("aw_chunk_1017_cast_fp16")]; tensor var_11460_equation_0 = const()[name = tensor("op_11460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11460_cast_fp16 = einsum(equation = var_11460_equation_0, values = (var_11294_cast_fp16, var_11243_cast_fp16))[name = tensor("op_11460_cast_fp16")]; tensor var_11461_to_fp16 = const()[name = tensor("op_11461_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1019_cast_fp16 = mul(x = var_11460_cast_fp16, y = var_11461_to_fp16)[name = tensor("aw_chunk_1019_cast_fp16")]; tensor var_11464_equation_0 = const()[name = tensor("op_11464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11464_cast_fp16 = einsum(equation = var_11464_equation_0, values = (var_11294_cast_fp16, var_11244_cast_fp16))[name = tensor("op_11464_cast_fp16")]; tensor var_11465_to_fp16 = const()[name = tensor("op_11465_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1021_cast_fp16 = mul(x = var_11464_cast_fp16, y = var_11465_to_fp16)[name = tensor("aw_chunk_1021_cast_fp16")]; tensor var_11468_equation_0 = const()[name = tensor("op_11468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11468_cast_fp16 = einsum(equation = var_11468_equation_0, values = (var_11294_cast_fp16, var_11245_cast_fp16))[name = tensor("op_11468_cast_fp16")]; tensor var_11469_to_fp16 = const()[name = tensor("op_11469_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1023_cast_fp16 = mul(x = var_11468_cast_fp16, y = var_11469_to_fp16)[name = tensor("aw_chunk_1023_cast_fp16")]; tensor var_11472_equation_0 = const()[name = tensor("op_11472_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11472_cast_fp16 = einsum(equation = var_11472_equation_0, values = (var_11298_cast_fp16, var_11246_cast_fp16))[name = tensor("op_11472_cast_fp16")]; tensor var_11473_to_fp16 = const()[name = tensor("op_11473_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1025_cast_fp16 = mul(x = var_11472_cast_fp16, y = var_11473_to_fp16)[name = tensor("aw_chunk_1025_cast_fp16")]; tensor var_11476_equation_0 = const()[name = tensor("op_11476_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11476_cast_fp16 = einsum(equation = var_11476_equation_0, values = (var_11298_cast_fp16, var_11247_cast_fp16))[name = tensor("op_11476_cast_fp16")]; tensor var_11477_to_fp16 = const()[name = tensor("op_11477_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1027_cast_fp16 = mul(x = var_11476_cast_fp16, y = var_11477_to_fp16)[name = tensor("aw_chunk_1027_cast_fp16")]; tensor var_11480_equation_0 = const()[name = tensor("op_11480_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11480_cast_fp16 = einsum(equation = var_11480_equation_0, values = (var_11298_cast_fp16, var_11248_cast_fp16))[name = tensor("op_11480_cast_fp16")]; tensor var_11481_to_fp16 = const()[name = tensor("op_11481_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1029_cast_fp16 = mul(x = var_11480_cast_fp16, y = var_11481_to_fp16)[name = tensor("aw_chunk_1029_cast_fp16")]; tensor var_11484_equation_0 = const()[name = tensor("op_11484_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11484_cast_fp16 = einsum(equation = var_11484_equation_0, values = (var_11298_cast_fp16, var_11249_cast_fp16))[name = tensor("op_11484_cast_fp16")]; tensor var_11485_to_fp16 = const()[name = tensor("op_11485_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1031_cast_fp16 = mul(x = var_11484_cast_fp16, y = var_11485_to_fp16)[name = tensor("aw_chunk_1031_cast_fp16")]; tensor var_11488_equation_0 = const()[name = tensor("op_11488_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11488_cast_fp16 = einsum(equation = var_11488_equation_0, values = (var_11298_cast_fp16, var_11250_cast_fp16))[name = tensor("op_11488_cast_fp16")]; tensor var_11489_to_fp16 = const()[name = tensor("op_11489_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1033_cast_fp16 = mul(x = var_11488_cast_fp16, y = var_11489_to_fp16)[name = tensor("aw_chunk_1033_cast_fp16")]; tensor var_11492_equation_0 = const()[name = tensor("op_11492_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11492_cast_fp16 = einsum(equation = var_11492_equation_0, values = (var_11298_cast_fp16, var_11251_cast_fp16))[name = tensor("op_11492_cast_fp16")]; tensor var_11493_to_fp16 = const()[name = tensor("op_11493_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1035_cast_fp16 = mul(x = var_11492_cast_fp16, y = var_11493_to_fp16)[name = tensor("aw_chunk_1035_cast_fp16")]; tensor var_11496_equation_0 = const()[name = tensor("op_11496_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11496_cast_fp16 = einsum(equation = var_11496_equation_0, values = (var_11298_cast_fp16, var_11252_cast_fp16))[name = tensor("op_11496_cast_fp16")]; tensor var_11497_to_fp16 = const()[name = tensor("op_11497_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1037_cast_fp16 = mul(x = var_11496_cast_fp16, y = var_11497_to_fp16)[name = tensor("aw_chunk_1037_cast_fp16")]; tensor var_11500_equation_0 = const()[name = tensor("op_11500_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11500_cast_fp16 = einsum(equation = var_11500_equation_0, values = (var_11298_cast_fp16, var_11253_cast_fp16))[name = tensor("op_11500_cast_fp16")]; tensor var_11501_to_fp16 = const()[name = tensor("op_11501_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1039_cast_fp16 = mul(x = var_11500_cast_fp16, y = var_11501_to_fp16)[name = tensor("aw_chunk_1039_cast_fp16")]; tensor var_11504_equation_0 = const()[name = tensor("op_11504_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11504_cast_fp16 = einsum(equation = var_11504_equation_0, values = (var_11302_cast_fp16, var_11254_cast_fp16))[name = tensor("op_11504_cast_fp16")]; tensor var_11505_to_fp16 = const()[name = tensor("op_11505_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1041_cast_fp16 = mul(x = var_11504_cast_fp16, y = var_11505_to_fp16)[name = tensor("aw_chunk_1041_cast_fp16")]; tensor var_11508_equation_0 = const()[name = tensor("op_11508_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11508_cast_fp16 = einsum(equation = var_11508_equation_0, values = (var_11302_cast_fp16, var_11255_cast_fp16))[name = tensor("op_11508_cast_fp16")]; tensor var_11509_to_fp16 = const()[name = tensor("op_11509_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1043_cast_fp16 = mul(x = var_11508_cast_fp16, y = var_11509_to_fp16)[name = tensor("aw_chunk_1043_cast_fp16")]; tensor var_11512_equation_0 = const()[name = tensor("op_11512_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11512_cast_fp16 = einsum(equation = var_11512_equation_0, values = (var_11302_cast_fp16, var_11256_cast_fp16))[name = tensor("op_11512_cast_fp16")]; tensor var_11513_to_fp16 = const()[name = tensor("op_11513_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1045_cast_fp16 = mul(x = var_11512_cast_fp16, y = var_11513_to_fp16)[name = tensor("aw_chunk_1045_cast_fp16")]; tensor var_11516_equation_0 = const()[name = tensor("op_11516_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11516_cast_fp16 = einsum(equation = var_11516_equation_0, values = (var_11302_cast_fp16, var_11257_cast_fp16))[name = tensor("op_11516_cast_fp16")]; tensor var_11517_to_fp16 = const()[name = tensor("op_11517_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1047_cast_fp16 = mul(x = var_11516_cast_fp16, y = var_11517_to_fp16)[name = tensor("aw_chunk_1047_cast_fp16")]; tensor var_11520_equation_0 = const()[name = tensor("op_11520_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11520_cast_fp16 = einsum(equation = var_11520_equation_0, values = (var_11302_cast_fp16, var_11258_cast_fp16))[name = tensor("op_11520_cast_fp16")]; tensor var_11521_to_fp16 = const()[name = tensor("op_11521_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1049_cast_fp16 = mul(x = var_11520_cast_fp16, y = var_11521_to_fp16)[name = tensor("aw_chunk_1049_cast_fp16")]; tensor var_11524_equation_0 = const()[name = tensor("op_11524_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11524_cast_fp16 = einsum(equation = var_11524_equation_0, values = (var_11302_cast_fp16, var_11259_cast_fp16))[name = tensor("op_11524_cast_fp16")]; tensor var_11525_to_fp16 = const()[name = tensor("op_11525_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1051_cast_fp16 = mul(x = var_11524_cast_fp16, y = var_11525_to_fp16)[name = tensor("aw_chunk_1051_cast_fp16")]; tensor var_11528_equation_0 = const()[name = tensor("op_11528_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11528_cast_fp16 = einsum(equation = var_11528_equation_0, values = (var_11302_cast_fp16, var_11260_cast_fp16))[name = tensor("op_11528_cast_fp16")]; tensor var_11529_to_fp16 = const()[name = tensor("op_11529_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1053_cast_fp16 = mul(x = var_11528_cast_fp16, y = var_11529_to_fp16)[name = tensor("aw_chunk_1053_cast_fp16")]; tensor var_11532_equation_0 = const()[name = tensor("op_11532_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11532_cast_fp16 = einsum(equation = var_11532_equation_0, values = (var_11302_cast_fp16, var_11261_cast_fp16))[name = tensor("op_11532_cast_fp16")]; tensor var_11533_to_fp16 = const()[name = tensor("op_11533_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1055_cast_fp16 = mul(x = var_11532_cast_fp16, y = var_11533_to_fp16)[name = tensor("aw_chunk_1055_cast_fp16")]; tensor var_11536_equation_0 = const()[name = tensor("op_11536_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11536_cast_fp16 = einsum(equation = var_11536_equation_0, values = (var_11306_cast_fp16, var_11262_cast_fp16))[name = tensor("op_11536_cast_fp16")]; tensor var_11537_to_fp16 = const()[name = tensor("op_11537_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1057_cast_fp16 = mul(x = var_11536_cast_fp16, y = var_11537_to_fp16)[name = tensor("aw_chunk_1057_cast_fp16")]; tensor var_11540_equation_0 = const()[name = tensor("op_11540_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11540_cast_fp16 = einsum(equation = var_11540_equation_0, values = (var_11306_cast_fp16, var_11263_cast_fp16))[name = tensor("op_11540_cast_fp16")]; tensor var_11541_to_fp16 = const()[name = tensor("op_11541_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1059_cast_fp16 = mul(x = var_11540_cast_fp16, y = var_11541_to_fp16)[name = tensor("aw_chunk_1059_cast_fp16")]; tensor var_11544_equation_0 = const()[name = tensor("op_11544_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11544_cast_fp16 = einsum(equation = var_11544_equation_0, values = (var_11306_cast_fp16, var_11264_cast_fp16))[name = tensor("op_11544_cast_fp16")]; tensor var_11545_to_fp16 = const()[name = tensor("op_11545_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1061_cast_fp16 = mul(x = var_11544_cast_fp16, y = var_11545_to_fp16)[name = tensor("aw_chunk_1061_cast_fp16")]; tensor var_11548_equation_0 = const()[name = tensor("op_11548_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11548_cast_fp16 = einsum(equation = var_11548_equation_0, values = (var_11306_cast_fp16, var_11265_cast_fp16))[name = tensor("op_11548_cast_fp16")]; tensor var_11549_to_fp16 = const()[name = tensor("op_11549_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1063_cast_fp16 = mul(x = var_11548_cast_fp16, y = var_11549_to_fp16)[name = tensor("aw_chunk_1063_cast_fp16")]; tensor var_11552_equation_0 = const()[name = tensor("op_11552_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11552_cast_fp16 = einsum(equation = var_11552_equation_0, values = (var_11306_cast_fp16, var_11266_cast_fp16))[name = tensor("op_11552_cast_fp16")]; tensor var_11553_to_fp16 = const()[name = tensor("op_11553_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1065_cast_fp16 = mul(x = var_11552_cast_fp16, y = var_11553_to_fp16)[name = tensor("aw_chunk_1065_cast_fp16")]; tensor var_11556_equation_0 = const()[name = tensor("op_11556_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11556_cast_fp16 = einsum(equation = var_11556_equation_0, values = (var_11306_cast_fp16, var_11267_cast_fp16))[name = tensor("op_11556_cast_fp16")]; tensor var_11557_to_fp16 = const()[name = tensor("op_11557_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1067_cast_fp16 = mul(x = var_11556_cast_fp16, y = var_11557_to_fp16)[name = tensor("aw_chunk_1067_cast_fp16")]; tensor var_11560_equation_0 = const()[name = tensor("op_11560_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11560_cast_fp16 = einsum(equation = var_11560_equation_0, values = (var_11306_cast_fp16, var_11268_cast_fp16))[name = tensor("op_11560_cast_fp16")]; tensor var_11561_to_fp16 = const()[name = tensor("op_11561_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1069_cast_fp16 = mul(x = var_11560_cast_fp16, y = var_11561_to_fp16)[name = tensor("aw_chunk_1069_cast_fp16")]; tensor var_11564_equation_0 = const()[name = tensor("op_11564_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11564_cast_fp16 = einsum(equation = var_11564_equation_0, values = (var_11306_cast_fp16, var_11269_cast_fp16))[name = tensor("op_11564_cast_fp16")]; tensor var_11565_to_fp16 = const()[name = tensor("op_11565_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1071_cast_fp16 = mul(x = var_11564_cast_fp16, y = var_11565_to_fp16)[name = tensor("aw_chunk_1071_cast_fp16")]; tensor var_11568_equation_0 = const()[name = tensor("op_11568_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11568_cast_fp16 = einsum(equation = var_11568_equation_0, values = (var_11310_cast_fp16, var_11270_cast_fp16))[name = tensor("op_11568_cast_fp16")]; tensor var_11569_to_fp16 = const()[name = tensor("op_11569_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1073_cast_fp16 = mul(x = var_11568_cast_fp16, y = var_11569_to_fp16)[name = tensor("aw_chunk_1073_cast_fp16")]; tensor var_11572_equation_0 = const()[name = tensor("op_11572_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11572_cast_fp16 = einsum(equation = var_11572_equation_0, values = (var_11310_cast_fp16, var_11271_cast_fp16))[name = tensor("op_11572_cast_fp16")]; tensor var_11573_to_fp16 = const()[name = tensor("op_11573_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1075_cast_fp16 = mul(x = var_11572_cast_fp16, y = var_11573_to_fp16)[name = tensor("aw_chunk_1075_cast_fp16")]; tensor var_11576_equation_0 = const()[name = tensor("op_11576_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11576_cast_fp16 = einsum(equation = var_11576_equation_0, values = (var_11310_cast_fp16, var_11272_cast_fp16))[name = tensor("op_11576_cast_fp16")]; tensor var_11577_to_fp16 = const()[name = tensor("op_11577_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1077_cast_fp16 = mul(x = var_11576_cast_fp16, y = var_11577_to_fp16)[name = tensor("aw_chunk_1077_cast_fp16")]; tensor var_11580_equation_0 = const()[name = tensor("op_11580_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11580_cast_fp16 = einsum(equation = var_11580_equation_0, values = (var_11310_cast_fp16, var_11273_cast_fp16))[name = tensor("op_11580_cast_fp16")]; tensor var_11581_to_fp16 = const()[name = tensor("op_11581_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1079_cast_fp16 = mul(x = var_11580_cast_fp16, y = var_11581_to_fp16)[name = tensor("aw_chunk_1079_cast_fp16")]; tensor var_11584_equation_0 = const()[name = tensor("op_11584_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11584_cast_fp16 = einsum(equation = var_11584_equation_0, values = (var_11310_cast_fp16, var_11274_cast_fp16))[name = tensor("op_11584_cast_fp16")]; tensor var_11585_to_fp16 = const()[name = tensor("op_11585_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1081_cast_fp16 = mul(x = var_11584_cast_fp16, y = var_11585_to_fp16)[name = tensor("aw_chunk_1081_cast_fp16")]; tensor var_11588_equation_0 = const()[name = tensor("op_11588_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11588_cast_fp16 = einsum(equation = var_11588_equation_0, values = (var_11310_cast_fp16, var_11275_cast_fp16))[name = tensor("op_11588_cast_fp16")]; tensor var_11589_to_fp16 = const()[name = tensor("op_11589_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1083_cast_fp16 = mul(x = var_11588_cast_fp16, y = var_11589_to_fp16)[name = tensor("aw_chunk_1083_cast_fp16")]; tensor var_11592_equation_0 = const()[name = tensor("op_11592_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11592_cast_fp16 = einsum(equation = var_11592_equation_0, values = (var_11310_cast_fp16, var_11276_cast_fp16))[name = tensor("op_11592_cast_fp16")]; tensor var_11593_to_fp16 = const()[name = tensor("op_11593_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1085_cast_fp16 = mul(x = var_11592_cast_fp16, y = var_11593_to_fp16)[name = tensor("aw_chunk_1085_cast_fp16")]; tensor var_11596_equation_0 = const()[name = tensor("op_11596_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_11596_cast_fp16 = einsum(equation = var_11596_equation_0, values = (var_11310_cast_fp16, var_11277_cast_fp16))[name = tensor("op_11596_cast_fp16")]; tensor var_11597_to_fp16 = const()[name = tensor("op_11597_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1087_cast_fp16 = mul(x = var_11596_cast_fp16, y = var_11597_to_fp16)[name = tensor("aw_chunk_1087_cast_fp16")]; tensor var_11599_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_961_cast_fp16)[name = tensor("op_11599_cast_fp16")]; tensor var_11600_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_963_cast_fp16)[name = tensor("op_11600_cast_fp16")]; tensor var_11601_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_965_cast_fp16)[name = tensor("op_11601_cast_fp16")]; tensor var_11602_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_967_cast_fp16)[name = tensor("op_11602_cast_fp16")]; tensor var_11603_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_969_cast_fp16)[name = tensor("op_11603_cast_fp16")]; tensor var_11604_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_971_cast_fp16)[name = tensor("op_11604_cast_fp16")]; tensor var_11605_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_973_cast_fp16)[name = tensor("op_11605_cast_fp16")]; tensor var_11606_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_975_cast_fp16)[name = tensor("op_11606_cast_fp16")]; tensor var_11607_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_977_cast_fp16)[name = tensor("op_11607_cast_fp16")]; tensor var_11608_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_979_cast_fp16)[name = tensor("op_11608_cast_fp16")]; tensor var_11609_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_981_cast_fp16)[name = tensor("op_11609_cast_fp16")]; tensor var_11610_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_983_cast_fp16)[name = tensor("op_11610_cast_fp16")]; tensor var_11611_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_985_cast_fp16)[name = tensor("op_11611_cast_fp16")]; tensor var_11612_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_987_cast_fp16)[name = tensor("op_11612_cast_fp16")]; tensor var_11613_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_989_cast_fp16)[name = tensor("op_11613_cast_fp16")]; tensor var_11614_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_991_cast_fp16)[name = tensor("op_11614_cast_fp16")]; tensor var_11615_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_993_cast_fp16)[name = tensor("op_11615_cast_fp16")]; tensor var_11616_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_995_cast_fp16)[name = tensor("op_11616_cast_fp16")]; tensor var_11617_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_997_cast_fp16)[name = tensor("op_11617_cast_fp16")]; tensor var_11618_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_999_cast_fp16)[name = tensor("op_11618_cast_fp16")]; tensor var_11619_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1001_cast_fp16)[name = tensor("op_11619_cast_fp16")]; tensor var_11620_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1003_cast_fp16)[name = tensor("op_11620_cast_fp16")]; tensor var_11621_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1005_cast_fp16)[name = tensor("op_11621_cast_fp16")]; tensor var_11622_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1007_cast_fp16)[name = tensor("op_11622_cast_fp16")]; tensor var_11623_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1009_cast_fp16)[name = tensor("op_11623_cast_fp16")]; tensor var_11624_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1011_cast_fp16)[name = tensor("op_11624_cast_fp16")]; tensor var_11625_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1013_cast_fp16)[name = tensor("op_11625_cast_fp16")]; tensor var_11626_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1015_cast_fp16)[name = tensor("op_11626_cast_fp16")]; tensor var_11627_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1017_cast_fp16)[name = tensor("op_11627_cast_fp16")]; tensor var_11628_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1019_cast_fp16)[name = tensor("op_11628_cast_fp16")]; tensor var_11629_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1021_cast_fp16)[name = tensor("op_11629_cast_fp16")]; tensor var_11630_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1023_cast_fp16)[name = tensor("op_11630_cast_fp16")]; tensor var_11631_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1025_cast_fp16)[name = tensor("op_11631_cast_fp16")]; tensor var_11632_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1027_cast_fp16)[name = tensor("op_11632_cast_fp16")]; tensor var_11633_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1029_cast_fp16)[name = tensor("op_11633_cast_fp16")]; tensor var_11634_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1031_cast_fp16)[name = tensor("op_11634_cast_fp16")]; tensor var_11635_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1033_cast_fp16)[name = tensor("op_11635_cast_fp16")]; tensor var_11636_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1035_cast_fp16)[name = tensor("op_11636_cast_fp16")]; tensor var_11637_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1037_cast_fp16)[name = tensor("op_11637_cast_fp16")]; tensor var_11638_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1039_cast_fp16)[name = tensor("op_11638_cast_fp16")]; tensor var_11639_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1041_cast_fp16)[name = tensor("op_11639_cast_fp16")]; tensor var_11640_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1043_cast_fp16)[name = tensor("op_11640_cast_fp16")]; tensor var_11641_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1045_cast_fp16)[name = tensor("op_11641_cast_fp16")]; tensor var_11642_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1047_cast_fp16)[name = tensor("op_11642_cast_fp16")]; tensor var_11643_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1049_cast_fp16)[name = tensor("op_11643_cast_fp16")]; tensor var_11644_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1051_cast_fp16)[name = tensor("op_11644_cast_fp16")]; tensor var_11645_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1053_cast_fp16)[name = tensor("op_11645_cast_fp16")]; tensor var_11646_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1055_cast_fp16)[name = tensor("op_11646_cast_fp16")]; tensor var_11647_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1057_cast_fp16)[name = tensor("op_11647_cast_fp16")]; tensor var_11648_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1059_cast_fp16)[name = tensor("op_11648_cast_fp16")]; tensor var_11649_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1061_cast_fp16)[name = tensor("op_11649_cast_fp16")]; tensor var_11650_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1063_cast_fp16)[name = tensor("op_11650_cast_fp16")]; tensor var_11651_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1065_cast_fp16)[name = tensor("op_11651_cast_fp16")]; tensor var_11652_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1067_cast_fp16)[name = tensor("op_11652_cast_fp16")]; tensor var_11653_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1069_cast_fp16)[name = tensor("op_11653_cast_fp16")]; tensor var_11654_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1071_cast_fp16)[name = tensor("op_11654_cast_fp16")]; tensor var_11655_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1073_cast_fp16)[name = tensor("op_11655_cast_fp16")]; tensor var_11656_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1075_cast_fp16)[name = tensor("op_11656_cast_fp16")]; tensor var_11657_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1077_cast_fp16)[name = tensor("op_11657_cast_fp16")]; tensor var_11658_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1079_cast_fp16)[name = tensor("op_11658_cast_fp16")]; tensor var_11659_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1081_cast_fp16)[name = tensor("op_11659_cast_fp16")]; tensor var_11660_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1083_cast_fp16)[name = tensor("op_11660_cast_fp16")]; tensor var_11661_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1085_cast_fp16)[name = tensor("op_11661_cast_fp16")]; tensor var_11662_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1087_cast_fp16)[name = tensor("op_11662_cast_fp16")]; tensor var_11664_equation_0 = const()[name = tensor("op_11664_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11664_cast_fp16 = einsum(equation = var_11664_equation_0, values = (var_11312_cast_fp16, var_11599_cast_fp16))[name = tensor("op_11664_cast_fp16")]; tensor var_11666_equation_0 = const()[name = tensor("op_11666_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11666_cast_fp16 = einsum(equation = var_11666_equation_0, values = (var_11312_cast_fp16, var_11600_cast_fp16))[name = tensor("op_11666_cast_fp16")]; tensor var_11668_equation_0 = const()[name = tensor("op_11668_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11668_cast_fp16 = einsum(equation = var_11668_equation_0, values = (var_11312_cast_fp16, var_11601_cast_fp16))[name = tensor("op_11668_cast_fp16")]; tensor var_11670_equation_0 = const()[name = tensor("op_11670_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11670_cast_fp16 = einsum(equation = var_11670_equation_0, values = (var_11312_cast_fp16, var_11602_cast_fp16))[name = tensor("op_11670_cast_fp16")]; tensor var_11672_equation_0 = const()[name = tensor("op_11672_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11672_cast_fp16 = einsum(equation = var_11672_equation_0, values = (var_11312_cast_fp16, var_11603_cast_fp16))[name = tensor("op_11672_cast_fp16")]; tensor var_11674_equation_0 = const()[name = tensor("op_11674_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11674_cast_fp16 = einsum(equation = var_11674_equation_0, values = (var_11312_cast_fp16, var_11604_cast_fp16))[name = tensor("op_11674_cast_fp16")]; tensor var_11676_equation_0 = const()[name = tensor("op_11676_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11676_cast_fp16 = einsum(equation = var_11676_equation_0, values = (var_11312_cast_fp16, var_11605_cast_fp16))[name = tensor("op_11676_cast_fp16")]; tensor var_11678_equation_0 = const()[name = tensor("op_11678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11678_cast_fp16 = einsum(equation = var_11678_equation_0, values = (var_11312_cast_fp16, var_11606_cast_fp16))[name = tensor("op_11678_cast_fp16")]; tensor var_11680_equation_0 = const()[name = tensor("op_11680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11680_cast_fp16 = einsum(equation = var_11680_equation_0, values = (var_11316_cast_fp16, var_11607_cast_fp16))[name = tensor("op_11680_cast_fp16")]; tensor var_11682_equation_0 = const()[name = tensor("op_11682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11682_cast_fp16 = einsum(equation = var_11682_equation_0, values = (var_11316_cast_fp16, var_11608_cast_fp16))[name = tensor("op_11682_cast_fp16")]; tensor var_11684_equation_0 = const()[name = tensor("op_11684_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11684_cast_fp16 = einsum(equation = var_11684_equation_0, values = (var_11316_cast_fp16, var_11609_cast_fp16))[name = tensor("op_11684_cast_fp16")]; tensor var_11686_equation_0 = const()[name = tensor("op_11686_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11686_cast_fp16 = einsum(equation = var_11686_equation_0, values = (var_11316_cast_fp16, var_11610_cast_fp16))[name = tensor("op_11686_cast_fp16")]; tensor var_11688_equation_0 = const()[name = tensor("op_11688_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11688_cast_fp16 = einsum(equation = var_11688_equation_0, values = (var_11316_cast_fp16, var_11611_cast_fp16))[name = tensor("op_11688_cast_fp16")]; tensor var_11690_equation_0 = const()[name = tensor("op_11690_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11690_cast_fp16 = einsum(equation = var_11690_equation_0, values = (var_11316_cast_fp16, var_11612_cast_fp16))[name = tensor("op_11690_cast_fp16")]; tensor var_11692_equation_0 = const()[name = tensor("op_11692_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11692_cast_fp16 = einsum(equation = var_11692_equation_0, values = (var_11316_cast_fp16, var_11613_cast_fp16))[name = tensor("op_11692_cast_fp16")]; tensor var_11694_equation_0 = const()[name = tensor("op_11694_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11694_cast_fp16 = einsum(equation = var_11694_equation_0, values = (var_11316_cast_fp16, var_11614_cast_fp16))[name = tensor("op_11694_cast_fp16")]; tensor var_11696_equation_0 = const()[name = tensor("op_11696_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11696_cast_fp16 = einsum(equation = var_11696_equation_0, values = (var_11320_cast_fp16, var_11615_cast_fp16))[name = tensor("op_11696_cast_fp16")]; tensor var_11698_equation_0 = const()[name = tensor("op_11698_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11698_cast_fp16 = einsum(equation = var_11698_equation_0, values = (var_11320_cast_fp16, var_11616_cast_fp16))[name = tensor("op_11698_cast_fp16")]; tensor var_11700_equation_0 = const()[name = tensor("op_11700_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11700_cast_fp16 = einsum(equation = var_11700_equation_0, values = (var_11320_cast_fp16, var_11617_cast_fp16))[name = tensor("op_11700_cast_fp16")]; tensor var_11702_equation_0 = const()[name = tensor("op_11702_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11702_cast_fp16 = einsum(equation = var_11702_equation_0, values = (var_11320_cast_fp16, var_11618_cast_fp16))[name = tensor("op_11702_cast_fp16")]; tensor var_11704_equation_0 = const()[name = tensor("op_11704_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11704_cast_fp16 = einsum(equation = var_11704_equation_0, values = (var_11320_cast_fp16, var_11619_cast_fp16))[name = tensor("op_11704_cast_fp16")]; tensor var_11706_equation_0 = const()[name = tensor("op_11706_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11706_cast_fp16 = einsum(equation = var_11706_equation_0, values = (var_11320_cast_fp16, var_11620_cast_fp16))[name = tensor("op_11706_cast_fp16")]; tensor var_11708_equation_0 = const()[name = tensor("op_11708_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11708_cast_fp16 = einsum(equation = var_11708_equation_0, values = (var_11320_cast_fp16, var_11621_cast_fp16))[name = tensor("op_11708_cast_fp16")]; tensor var_11710_equation_0 = const()[name = tensor("op_11710_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11710_cast_fp16 = einsum(equation = var_11710_equation_0, values = (var_11320_cast_fp16, var_11622_cast_fp16))[name = tensor("op_11710_cast_fp16")]; tensor var_11712_equation_0 = const()[name = tensor("op_11712_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11712_cast_fp16 = einsum(equation = var_11712_equation_0, values = (var_11324_cast_fp16, var_11623_cast_fp16))[name = tensor("op_11712_cast_fp16")]; tensor var_11714_equation_0 = const()[name = tensor("op_11714_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11714_cast_fp16 = einsum(equation = var_11714_equation_0, values = (var_11324_cast_fp16, var_11624_cast_fp16))[name = tensor("op_11714_cast_fp16")]; tensor var_11716_equation_0 = const()[name = tensor("op_11716_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11716_cast_fp16 = einsum(equation = var_11716_equation_0, values = (var_11324_cast_fp16, var_11625_cast_fp16))[name = tensor("op_11716_cast_fp16")]; tensor var_11718_equation_0 = const()[name = tensor("op_11718_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11718_cast_fp16 = einsum(equation = var_11718_equation_0, values = (var_11324_cast_fp16, var_11626_cast_fp16))[name = tensor("op_11718_cast_fp16")]; tensor var_11720_equation_0 = const()[name = tensor("op_11720_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11720_cast_fp16 = einsum(equation = var_11720_equation_0, values = (var_11324_cast_fp16, var_11627_cast_fp16))[name = tensor("op_11720_cast_fp16")]; tensor var_11722_equation_0 = const()[name = tensor("op_11722_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11722_cast_fp16 = einsum(equation = var_11722_equation_0, values = (var_11324_cast_fp16, var_11628_cast_fp16))[name = tensor("op_11722_cast_fp16")]; tensor var_11724_equation_0 = const()[name = tensor("op_11724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11724_cast_fp16 = einsum(equation = var_11724_equation_0, values = (var_11324_cast_fp16, var_11629_cast_fp16))[name = tensor("op_11724_cast_fp16")]; tensor var_11726_equation_0 = const()[name = tensor("op_11726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11726_cast_fp16 = einsum(equation = var_11726_equation_0, values = (var_11324_cast_fp16, var_11630_cast_fp16))[name = tensor("op_11726_cast_fp16")]; tensor var_11728_equation_0 = const()[name = tensor("op_11728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11728_cast_fp16 = einsum(equation = var_11728_equation_0, values = (var_11328_cast_fp16, var_11631_cast_fp16))[name = tensor("op_11728_cast_fp16")]; tensor var_11730_equation_0 = const()[name = tensor("op_11730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11730_cast_fp16 = einsum(equation = var_11730_equation_0, values = (var_11328_cast_fp16, var_11632_cast_fp16))[name = tensor("op_11730_cast_fp16")]; tensor var_11732_equation_0 = const()[name = tensor("op_11732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11732_cast_fp16 = einsum(equation = var_11732_equation_0, values = (var_11328_cast_fp16, var_11633_cast_fp16))[name = tensor("op_11732_cast_fp16")]; tensor var_11734_equation_0 = const()[name = tensor("op_11734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11734_cast_fp16 = einsum(equation = var_11734_equation_0, values = (var_11328_cast_fp16, var_11634_cast_fp16))[name = tensor("op_11734_cast_fp16")]; tensor var_11736_equation_0 = const()[name = tensor("op_11736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11736_cast_fp16 = einsum(equation = var_11736_equation_0, values = (var_11328_cast_fp16, var_11635_cast_fp16))[name = tensor("op_11736_cast_fp16")]; tensor var_11738_equation_0 = const()[name = tensor("op_11738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11738_cast_fp16 = einsum(equation = var_11738_equation_0, values = (var_11328_cast_fp16, var_11636_cast_fp16))[name = tensor("op_11738_cast_fp16")]; tensor var_11740_equation_0 = const()[name = tensor("op_11740_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11740_cast_fp16 = einsum(equation = var_11740_equation_0, values = (var_11328_cast_fp16, var_11637_cast_fp16))[name = tensor("op_11740_cast_fp16")]; tensor var_11742_equation_0 = const()[name = tensor("op_11742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11742_cast_fp16 = einsum(equation = var_11742_equation_0, values = (var_11328_cast_fp16, var_11638_cast_fp16))[name = tensor("op_11742_cast_fp16")]; tensor var_11744_equation_0 = const()[name = tensor("op_11744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11744_cast_fp16 = einsum(equation = var_11744_equation_0, values = (var_11332_cast_fp16, var_11639_cast_fp16))[name = tensor("op_11744_cast_fp16")]; tensor var_11746_equation_0 = const()[name = tensor("op_11746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11746_cast_fp16 = einsum(equation = var_11746_equation_0, values = (var_11332_cast_fp16, var_11640_cast_fp16))[name = tensor("op_11746_cast_fp16")]; tensor var_11748_equation_0 = const()[name = tensor("op_11748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11748_cast_fp16 = einsum(equation = var_11748_equation_0, values = (var_11332_cast_fp16, var_11641_cast_fp16))[name = tensor("op_11748_cast_fp16")]; tensor var_11750_equation_0 = const()[name = tensor("op_11750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11750_cast_fp16 = einsum(equation = var_11750_equation_0, values = (var_11332_cast_fp16, var_11642_cast_fp16))[name = tensor("op_11750_cast_fp16")]; tensor var_11752_equation_0 = const()[name = tensor("op_11752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11752_cast_fp16 = einsum(equation = var_11752_equation_0, values = (var_11332_cast_fp16, var_11643_cast_fp16))[name = tensor("op_11752_cast_fp16")]; tensor var_11754_equation_0 = const()[name = tensor("op_11754_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11754_cast_fp16 = einsum(equation = var_11754_equation_0, values = (var_11332_cast_fp16, var_11644_cast_fp16))[name = tensor("op_11754_cast_fp16")]; tensor var_11756_equation_0 = const()[name = tensor("op_11756_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11756_cast_fp16 = einsum(equation = var_11756_equation_0, values = (var_11332_cast_fp16, var_11645_cast_fp16))[name = tensor("op_11756_cast_fp16")]; tensor var_11758_equation_0 = const()[name = tensor("op_11758_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11758_cast_fp16 = einsum(equation = var_11758_equation_0, values = (var_11332_cast_fp16, var_11646_cast_fp16))[name = tensor("op_11758_cast_fp16")]; tensor var_11760_equation_0 = const()[name = tensor("op_11760_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11760_cast_fp16 = einsum(equation = var_11760_equation_0, values = (var_11336_cast_fp16, var_11647_cast_fp16))[name = tensor("op_11760_cast_fp16")]; tensor var_11762_equation_0 = const()[name = tensor("op_11762_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11762_cast_fp16 = einsum(equation = var_11762_equation_0, values = (var_11336_cast_fp16, var_11648_cast_fp16))[name = tensor("op_11762_cast_fp16")]; tensor var_11764_equation_0 = const()[name = tensor("op_11764_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11764_cast_fp16 = einsum(equation = var_11764_equation_0, values = (var_11336_cast_fp16, var_11649_cast_fp16))[name = tensor("op_11764_cast_fp16")]; tensor var_11766_equation_0 = const()[name = tensor("op_11766_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11766_cast_fp16 = einsum(equation = var_11766_equation_0, values = (var_11336_cast_fp16, var_11650_cast_fp16))[name = tensor("op_11766_cast_fp16")]; tensor var_11768_equation_0 = const()[name = tensor("op_11768_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11768_cast_fp16 = einsum(equation = var_11768_equation_0, values = (var_11336_cast_fp16, var_11651_cast_fp16))[name = tensor("op_11768_cast_fp16")]; tensor var_11770_equation_0 = const()[name = tensor("op_11770_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11770_cast_fp16 = einsum(equation = var_11770_equation_0, values = (var_11336_cast_fp16, var_11652_cast_fp16))[name = tensor("op_11770_cast_fp16")]; tensor var_11772_equation_0 = const()[name = tensor("op_11772_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11772_cast_fp16 = einsum(equation = var_11772_equation_0, values = (var_11336_cast_fp16, var_11653_cast_fp16))[name = tensor("op_11772_cast_fp16")]; tensor var_11774_equation_0 = const()[name = tensor("op_11774_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11774_cast_fp16 = einsum(equation = var_11774_equation_0, values = (var_11336_cast_fp16, var_11654_cast_fp16))[name = tensor("op_11774_cast_fp16")]; tensor var_11776_equation_0 = const()[name = tensor("op_11776_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11776_cast_fp16 = einsum(equation = var_11776_equation_0, values = (var_11340_cast_fp16, var_11655_cast_fp16))[name = tensor("op_11776_cast_fp16")]; tensor var_11778_equation_0 = const()[name = tensor("op_11778_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11778_cast_fp16 = einsum(equation = var_11778_equation_0, values = (var_11340_cast_fp16, var_11656_cast_fp16))[name = tensor("op_11778_cast_fp16")]; tensor var_11780_equation_0 = const()[name = tensor("op_11780_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11780_cast_fp16 = einsum(equation = var_11780_equation_0, values = (var_11340_cast_fp16, var_11657_cast_fp16))[name = tensor("op_11780_cast_fp16")]; tensor var_11782_equation_0 = const()[name = tensor("op_11782_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11782_cast_fp16 = einsum(equation = var_11782_equation_0, values = (var_11340_cast_fp16, var_11658_cast_fp16))[name = tensor("op_11782_cast_fp16")]; tensor var_11784_equation_0 = const()[name = tensor("op_11784_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11784_cast_fp16 = einsum(equation = var_11784_equation_0, values = (var_11340_cast_fp16, var_11659_cast_fp16))[name = tensor("op_11784_cast_fp16")]; tensor var_11786_equation_0 = const()[name = tensor("op_11786_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11786_cast_fp16 = einsum(equation = var_11786_equation_0, values = (var_11340_cast_fp16, var_11660_cast_fp16))[name = tensor("op_11786_cast_fp16")]; tensor var_11788_equation_0 = const()[name = tensor("op_11788_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11788_cast_fp16 = einsum(equation = var_11788_equation_0, values = (var_11340_cast_fp16, var_11661_cast_fp16))[name = tensor("op_11788_cast_fp16")]; tensor var_11790_equation_0 = const()[name = tensor("op_11790_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11790_cast_fp16 = einsum(equation = var_11790_equation_0, values = (var_11340_cast_fp16, var_11662_cast_fp16))[name = tensor("op_11790_cast_fp16")]; tensor var_11792_interleave_0 = const()[name = tensor("op_11792_interleave_0"), val = tensor(false)]; tensor var_11792_cast_fp16 = concat(axis = var_10347, interleave = var_11792_interleave_0, values = (var_11664_cast_fp16, var_11666_cast_fp16, var_11668_cast_fp16, var_11670_cast_fp16, var_11672_cast_fp16, var_11674_cast_fp16, var_11676_cast_fp16, var_11678_cast_fp16))[name = tensor("op_11792_cast_fp16")]; tensor var_11794_interleave_0 = const()[name = tensor("op_11794_interleave_0"), val = tensor(false)]; tensor var_11794_cast_fp16 = concat(axis = var_10347, interleave = var_11794_interleave_0, values = (var_11680_cast_fp16, var_11682_cast_fp16, var_11684_cast_fp16, var_11686_cast_fp16, var_11688_cast_fp16, var_11690_cast_fp16, var_11692_cast_fp16, var_11694_cast_fp16))[name = tensor("op_11794_cast_fp16")]; tensor var_11796_interleave_0 = const()[name = tensor("op_11796_interleave_0"), val = tensor(false)]; tensor var_11796_cast_fp16 = concat(axis = var_10347, interleave = var_11796_interleave_0, values = (var_11696_cast_fp16, var_11698_cast_fp16, var_11700_cast_fp16, var_11702_cast_fp16, var_11704_cast_fp16, var_11706_cast_fp16, var_11708_cast_fp16, var_11710_cast_fp16))[name = tensor("op_11796_cast_fp16")]; tensor var_11798_interleave_0 = const()[name = tensor("op_11798_interleave_0"), val = tensor(false)]; tensor var_11798_cast_fp16 = concat(axis = var_10347, interleave = var_11798_interleave_0, values = (var_11712_cast_fp16, var_11714_cast_fp16, var_11716_cast_fp16, var_11718_cast_fp16, var_11720_cast_fp16, var_11722_cast_fp16, var_11724_cast_fp16, var_11726_cast_fp16))[name = tensor("op_11798_cast_fp16")]; tensor var_11800_interleave_0 = const()[name = tensor("op_11800_interleave_0"), val = tensor(false)]; tensor var_11800_cast_fp16 = concat(axis = var_10347, interleave = var_11800_interleave_0, values = (var_11728_cast_fp16, var_11730_cast_fp16, var_11732_cast_fp16, var_11734_cast_fp16, var_11736_cast_fp16, var_11738_cast_fp16, var_11740_cast_fp16, var_11742_cast_fp16))[name = tensor("op_11800_cast_fp16")]; tensor var_11802_interleave_0 = const()[name = tensor("op_11802_interleave_0"), val = tensor(false)]; tensor var_11802_cast_fp16 = concat(axis = var_10347, interleave = var_11802_interleave_0, values = (var_11744_cast_fp16, var_11746_cast_fp16, var_11748_cast_fp16, var_11750_cast_fp16, var_11752_cast_fp16, var_11754_cast_fp16, var_11756_cast_fp16, var_11758_cast_fp16))[name = tensor("op_11802_cast_fp16")]; tensor var_11804_interleave_0 = const()[name = tensor("op_11804_interleave_0"), val = tensor(false)]; tensor var_11804_cast_fp16 = concat(axis = var_10347, interleave = var_11804_interleave_0, values = (var_11760_cast_fp16, var_11762_cast_fp16, var_11764_cast_fp16, var_11766_cast_fp16, var_11768_cast_fp16, var_11770_cast_fp16, var_11772_cast_fp16, var_11774_cast_fp16))[name = tensor("op_11804_cast_fp16")]; tensor var_11806_interleave_0 = const()[name = tensor("op_11806_interleave_0"), val = tensor(false)]; tensor var_11806_cast_fp16 = concat(axis = var_10347, interleave = var_11806_interleave_0, values = (var_11776_cast_fp16, var_11778_cast_fp16, var_11780_cast_fp16, var_11782_cast_fp16, var_11784_cast_fp16, var_11786_cast_fp16, var_11788_cast_fp16, var_11790_cast_fp16))[name = tensor("op_11806_cast_fp16")]; tensor input_467_interleave_0 = const()[name = tensor("input_467_interleave_0"), val = tensor(false)]; tensor input_467_cast_fp16 = concat(axis = var_10375, interleave = input_467_interleave_0, values = (var_11792_cast_fp16, var_11794_cast_fp16, var_11796_cast_fp16, var_11798_cast_fp16, var_11800_cast_fp16, var_11802_cast_fp16, var_11804_cast_fp16, var_11806_cast_fp16))[name = tensor("input_467_cast_fp16")]; tensor var_11812 = const()[name = tensor("op_11812"), val = tensor([1, 1])]; tensor var_11814 = const()[name = tensor("op_11814"), val = tensor([1, 1])]; tensor var_11816_pad_type_0 = const()[name = tensor("op_11816_pad_type_0"), val = tensor("custom")]; tensor var_11816_pad_0 = const()[name = tensor("op_11816_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1692497600)))]; tensor up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1692702464)))]; tensor var_11816_cast_fp16 = conv(bias = up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_11814, groups = var_10375, pad = var_11816_pad_0, pad_type = var_11816_pad_type_0, strides = var_11812, weight = up_blocks_3_attentions_0_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_467_cast_fp16)[name = tensor("op_11816_cast_fp16")]; tensor inputs_83_cast_fp16 = add(x = var_11816_cast_fp16, y = inputs_81_cast_fp16)[name = tensor("inputs_83_cast_fp16")]; tensor input_469_axes_0 = const()[name = tensor("input_469_axes_0"), val = tensor([1])]; tensor input_469_gamma_0_to_fp16 = const()[name = tensor("input_469_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1692703168)))]; tensor input_469_beta_0_to_fp16 = const()[name = tensor("input_469_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1692703872)))]; tensor var_11826_to_fp16 = const()[name = tensor("op_11826_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_469_cast_fp16 = layer_norm(axes = input_469_axes_0, beta = input_469_beta_0_to_fp16, epsilon = var_11826_to_fp16, gamma = input_469_gamma_0_to_fp16, x = inputs_83_cast_fp16)[name = tensor("input_469_cast_fp16")]; tensor var_11842 = const()[name = tensor("op_11842"), val = tensor([1, 1])]; tensor var_11844 = const()[name = tensor("op_11844"), val = tensor([1, 1])]; tensor var_11846_pad_type_0 = const()[name = tensor("op_11846_pad_type_0"), val = tensor("custom")]; tensor var_11846_pad_0 = const()[name = tensor("op_11846_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1692704576)))]; tensor up_blocks_3_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1694343040)))]; tensor var_11846_cast_fp16 = conv(bias = up_blocks_3_attentions_0_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_11844, groups = var_10375, pad = var_11846_pad_0, pad_type = var_11846_pad_type_0, strides = var_11842, weight = up_blocks_3_attentions_0_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_469_cast_fp16)[name = tensor("op_11846_cast_fp16")]; tensor var_11847_split_sizes_0 = const()[name = tensor("op_11847_split_sizes_0"), val = tensor([1280, 1280])]; tensor var_11847_axis_0 = const()[name = tensor("op_11847_axis_0"), val = tensor(1)]; tensor var_11847_cast_fp16_0, tensor var_11847_cast_fp16_1 = split(axis = var_11847_axis_0, split_sizes = var_11847_split_sizes_0, x = var_11846_cast_fp16)[name = tensor("op_11847_cast_fp16")]; tensor var_11849_mode_0 = const()[name = tensor("op_11849_mode_0"), val = tensor("EXACT")]; tensor var_11849_cast_fp16 = gelu(mode = var_11849_mode_0, x = var_11847_cast_fp16_1)[name = tensor("op_11849_cast_fp16")]; tensor input_471_cast_fp16 = mul(x = var_11847_cast_fp16_0, y = var_11849_cast_fp16)[name = tensor("input_471_cast_fp16")]; tensor var_11853 = const()[name = tensor("op_11853"), val = tensor([1, 1])]; tensor var_11855 = const()[name = tensor("op_11855"), val = tensor([1, 1])]; tensor var_11857_pad_type_0 = const()[name = tensor("op_11857_pad_type_0"), val = tensor("custom")]; tensor var_11857_pad_0 = const()[name = tensor("op_11857_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1694348224)))]; tensor up_blocks_3_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1695167488)))]; tensor var_11857_cast_fp16 = conv(bias = up_blocks_3_attentions_0_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_11855, groups = var_10375, pad = var_11857_pad_0, pad_type = var_11857_pad_type_0, strides = var_11853, weight = up_blocks_3_attentions_0_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_471_cast_fp16)[name = tensor("op_11857_cast_fp16")]; tensor hidden_states_293_cast_fp16 = add(x = var_11857_cast_fp16, y = inputs_83_cast_fp16)[name = tensor("hidden_states_293_cast_fp16")]; tensor var_11859 = const()[name = tensor("op_11859"), val = tensor([2, 320, 64, 64])]; tensor input_473_cast_fp16 = reshape(shape = var_11859, x = hidden_states_293_cast_fp16)[name = tensor("input_473_cast_fp16")]; tensor var_11863 = const()[name = tensor("op_11863"), val = tensor([1, 1])]; tensor var_11865 = const()[name = tensor("op_11865"), val = tensor([1, 1])]; tensor hidden_states_295_pad_type_0 = const()[name = tensor("hidden_states_295_pad_type_0"), val = tensor("custom")]; tensor hidden_states_295_pad_0 = const()[name = tensor("hidden_states_295_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_0_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1695168192)))]; tensor up_blocks_3_attentions_0_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_0_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1695373056)))]; tensor hidden_states_295_cast_fp16 = conv(bias = up_blocks_3_attentions_0_proj_out_bias_to_fp16, dilations = var_11865, groups = var_10375, pad = hidden_states_295_pad_0, pad_type = hidden_states_295_pad_type_0, strides = var_11863, weight = up_blocks_3_attentions_0_proj_out_weight_to_fp16, x = input_473_cast_fp16)[name = tensor("hidden_states_295_cast_fp16")]; tensor hidden_states_297_cast_fp16 = add(x = hidden_states_295_cast_fp16, y = hidden_states_283_cast_fp16)[name = tensor("hidden_states_297_cast_fp16")]; tensor input_475_interleave_0 = const()[name = tensor("input_475_interleave_0"), val = tensor(false)]; tensor input_475_cast_fp16 = concat(axis = var_10375, interleave = input_475_interleave_0, values = (hidden_states_297_cast_fp16, input_35_cast_fp16))[name = tensor("input_475_cast_fp16")]; tensor reshape_216_shape_0 = const()[name = tensor("reshape_216_shape_0"), val = tensor([2, 32, 20, 64, 64])]; tensor reshape_216_cast_fp16 = reshape(shape = reshape_216_shape_0, x = input_475_cast_fp16)[name = tensor("reshape_216_cast_fp16")]; tensor reduce_mean_162_axes_0 = const()[name = tensor("reduce_mean_162_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_162_keep_dims_0 = const()[name = tensor("reduce_mean_162_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_162_cast_fp16 = reduce_mean(axes = reduce_mean_162_axes_0, keep_dims = reduce_mean_162_keep_dims_0, x = reshape_216_cast_fp16)[name = tensor("reduce_mean_162_cast_fp16")]; tensor sub_108_cast_fp16 = sub(x = reshape_216_cast_fp16, y = reduce_mean_162_cast_fp16)[name = tensor("sub_108_cast_fp16")]; tensor square_54_cast_fp16 = square(x = sub_108_cast_fp16)[name = tensor("square_54_cast_fp16")]; tensor reduce_mean_164_axes_0 = const()[name = tensor("reduce_mean_164_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_164_keep_dims_0 = const()[name = tensor("reduce_mean_164_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_164_cast_fp16 = reduce_mean(axes = reduce_mean_164_axes_0, keep_dims = reduce_mean_164_keep_dims_0, x = square_54_cast_fp16)[name = tensor("reduce_mean_164_cast_fp16")]; tensor add_108_y_0_to_fp16 = const()[name = tensor("add_108_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_108_cast_fp16 = add(x = reduce_mean_164_cast_fp16, y = add_108_y_0_to_fp16)[name = tensor("add_108_cast_fp16")]; tensor sqrt_54_cast_fp16 = sqrt(x = add_108_cast_fp16)[name = tensor("sqrt_54_cast_fp16")]; tensor real_div_54_cast_fp16 = real_div(x = sub_108_cast_fp16, y = sqrt_54_cast_fp16)[name = tensor("real_div_54_cast_fp16")]; tensor reshape_217_shape_0 = const()[name = tensor("reshape_217_shape_0"), val = tensor([2, 640, 64, 64])]; tensor reshape_217_cast_fp16 = reshape(shape = reshape_217_shape_0, x = real_div_54_cast_fp16)[name = tensor("reshape_217_cast_fp16")]; tensor add_109_gamma_0_to_fp16 = const()[name = tensor("add_109_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1695373760)))]; tensor add_109_beta_0_to_fp16 = const()[name = tensor("add_109_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1695375104)))]; tensor add_109_epsilon_0_to_fp16 = const()[name = tensor("add_109_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_109_cast_fp16 = batch_norm(beta = add_109_beta_0_to_fp16, epsilon = add_109_epsilon_0_to_fp16, gamma = add_109_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_217_cast_fp16)[name = tensor("add_109_cast_fp16")]; tensor input_479_cast_fp16 = silu(x = add_109_cast_fp16)[name = tensor("input_479_cast_fp16")]; tensor var_11883 = const()[name = tensor("op_11883"), val = tensor([1, 1])]; tensor var_11885 = const()[name = tensor("op_11885"), val = tensor([1, 1])]; tensor hidden_states_299_pad_type_0 = const()[name = tensor("hidden_states_299_pad_type_0"), val = tensor("custom")]; tensor hidden_states_299_pad_0 = const()[name = tensor("hidden_states_299_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_3_resnets_1_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_1_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1695376448)))]; tensor up_blocks_3_resnets_1_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_1_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1699062912)))]; tensor hidden_states_299_cast_fp16 = conv(bias = up_blocks_3_resnets_1_conv1_bias_to_fp16, dilations = var_11885, groups = var_10375, pad = hidden_states_299_pad_0, pad_type = hidden_states_299_pad_type_0, strides = var_11883, weight = up_blocks_3_resnets_1_conv1_weight_to_fp16, x = input_479_cast_fp16)[name = tensor("hidden_states_299_cast_fp16")]; tensor var_11891 = const()[name = tensor("op_11891"), val = tensor([1, 1])]; tensor var_11893 = const()[name = tensor("op_11893"), val = tensor([1, 1])]; tensor temb_41_pad_type_0 = const()[name = tensor("temb_41_pad_type_0"), val = tensor("custom")]; tensor temb_41_pad_0 = const()[name = tensor("temb_41_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_resnets_1_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_1_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1699063616)))]; tensor up_blocks_3_resnets_1_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_1_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1699882880)))]; tensor temb_41_cast_fp16 = conv(bias = up_blocks_3_resnets_1_time_emb_proj_bias_to_fp16, dilations = var_11893, groups = var_10375, pad = temb_41_pad_0, pad_type = temb_41_pad_type_0, strides = var_11891, weight = up_blocks_3_resnets_1_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_41_cast_fp16")]; tensor input_483_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = temb_41_cast_fp16)[name = tensor("input_483_cast_fp16")]; tensor reshape_220_shape_0 = const()[name = tensor("reshape_220_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_220_cast_fp16 = reshape(shape = reshape_220_shape_0, x = input_483_cast_fp16)[name = tensor("reshape_220_cast_fp16")]; tensor reduce_mean_165_axes_0 = const()[name = tensor("reduce_mean_165_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_165_keep_dims_0 = const()[name = tensor("reduce_mean_165_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_165_cast_fp16 = reduce_mean(axes = reduce_mean_165_axes_0, keep_dims = reduce_mean_165_keep_dims_0, x = reshape_220_cast_fp16)[name = tensor("reduce_mean_165_cast_fp16")]; tensor sub_110_cast_fp16 = sub(x = reshape_220_cast_fp16, y = reduce_mean_165_cast_fp16)[name = tensor("sub_110_cast_fp16")]; tensor square_55_cast_fp16 = square(x = sub_110_cast_fp16)[name = tensor("square_55_cast_fp16")]; tensor reduce_mean_167_axes_0 = const()[name = tensor("reduce_mean_167_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_167_keep_dims_0 = const()[name = tensor("reduce_mean_167_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_167_cast_fp16 = reduce_mean(axes = reduce_mean_167_axes_0, keep_dims = reduce_mean_167_keep_dims_0, x = square_55_cast_fp16)[name = tensor("reduce_mean_167_cast_fp16")]; tensor add_110_y_0_to_fp16 = const()[name = tensor("add_110_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_110_cast_fp16 = add(x = reduce_mean_167_cast_fp16, y = add_110_y_0_to_fp16)[name = tensor("add_110_cast_fp16")]; tensor sqrt_55_cast_fp16 = sqrt(x = add_110_cast_fp16)[name = tensor("sqrt_55_cast_fp16")]; tensor real_div_55_cast_fp16 = real_div(x = sub_110_cast_fp16, y = sqrt_55_cast_fp16)[name = tensor("real_div_55_cast_fp16")]; tensor reshape_221_shape_0 = const()[name = tensor("reshape_221_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_221_cast_fp16 = reshape(shape = reshape_221_shape_0, x = real_div_55_cast_fp16)[name = tensor("reshape_221_cast_fp16")]; tensor add_111_gamma_0_to_fp16 = const()[name = tensor("add_111_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1699883584)))]; tensor add_111_beta_0_to_fp16 = const()[name = tensor("add_111_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1699884288)))]; tensor add_111_epsilon_0_to_fp16 = const()[name = tensor("add_111_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_111_cast_fp16 = batch_norm(beta = add_111_beta_0_to_fp16, epsilon = add_111_epsilon_0_to_fp16, gamma = add_111_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_221_cast_fp16)[name = tensor("add_111_cast_fp16")]; tensor input_487_cast_fp16 = silu(x = add_111_cast_fp16)[name = tensor("input_487_cast_fp16")]; tensor var_11903 = const()[name = tensor("op_11903"), val = tensor([1, 1])]; tensor var_11905 = const()[name = tensor("op_11905"), val = tensor([1, 1])]; tensor hidden_states_301_pad_type_0 = const()[name = tensor("hidden_states_301_pad_type_0"), val = tensor("custom")]; tensor hidden_states_301_pad_0 = const()[name = tensor("hidden_states_301_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_3_resnets_1_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_1_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1699884992)))]; tensor up_blocks_3_resnets_1_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_1_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1701728256)))]; tensor hidden_states_301_cast_fp16 = conv(bias = up_blocks_3_resnets_1_conv2_bias_to_fp16, dilations = var_11905, groups = var_10375, pad = hidden_states_301_pad_0, pad_type = hidden_states_301_pad_type_0, strides = var_11903, weight = up_blocks_3_resnets_1_conv2_weight_to_fp16, x = input_487_cast_fp16)[name = tensor("hidden_states_301_cast_fp16")]; tensor var_11910 = const()[name = tensor("op_11910"), val = tensor([1, 1])]; tensor var_11912 = const()[name = tensor("op_11912"), val = tensor([1, 1])]; tensor x_25_pad_type_0 = const()[name = tensor("x_25_pad_type_0"), val = tensor("custom")]; tensor x_25_pad_0 = const()[name = tensor("x_25_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_resnets_1_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_1_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1701728960)))]; tensor up_blocks_3_resnets_1_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_1_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702138624)))]; tensor x_25_cast_fp16 = conv(bias = up_blocks_3_resnets_1_conv_shortcut_bias_to_fp16, dilations = var_11912, groups = var_10375, pad = x_25_pad_0, pad_type = x_25_pad_type_0, strides = var_11910, weight = up_blocks_3_resnets_1_conv_shortcut_weight_to_fp16, x = input_475_cast_fp16)[name = tensor("x_25_cast_fp16")]; tensor hidden_states_303_cast_fp16 = add(x = x_25_cast_fp16, y = hidden_states_301_cast_fp16)[name = tensor("hidden_states_303_cast_fp16")]; tensor reshape_224_shape_0 = const()[name = tensor("reshape_224_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_224_cast_fp16 = reshape(shape = reshape_224_shape_0, x = hidden_states_303_cast_fp16)[name = tensor("reshape_224_cast_fp16")]; tensor reduce_mean_168_axes_0 = const()[name = tensor("reduce_mean_168_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_168_keep_dims_0 = const()[name = tensor("reduce_mean_168_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_168_cast_fp16 = reduce_mean(axes = reduce_mean_168_axes_0, keep_dims = reduce_mean_168_keep_dims_0, x = reshape_224_cast_fp16)[name = tensor("reduce_mean_168_cast_fp16")]; tensor sub_112_cast_fp16 = sub(x = reshape_224_cast_fp16, y = reduce_mean_168_cast_fp16)[name = tensor("sub_112_cast_fp16")]; tensor square_56_cast_fp16 = square(x = sub_112_cast_fp16)[name = tensor("square_56_cast_fp16")]; tensor reduce_mean_170_axes_0 = const()[name = tensor("reduce_mean_170_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_170_keep_dims_0 = const()[name = tensor("reduce_mean_170_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_170_cast_fp16 = reduce_mean(axes = reduce_mean_170_axes_0, keep_dims = reduce_mean_170_keep_dims_0, x = square_56_cast_fp16)[name = tensor("reduce_mean_170_cast_fp16")]; tensor add_112_y_0_to_fp16 = const()[name = tensor("add_112_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_112_cast_fp16 = add(x = reduce_mean_170_cast_fp16, y = add_112_y_0_to_fp16)[name = tensor("add_112_cast_fp16")]; tensor sqrt_56_cast_fp16 = sqrt(x = add_112_cast_fp16)[name = tensor("sqrt_56_cast_fp16")]; tensor real_div_56_cast_fp16 = real_div(x = sub_112_cast_fp16, y = sqrt_56_cast_fp16)[name = tensor("real_div_56_cast_fp16")]; tensor reshape_225_shape_0 = const()[name = tensor("reshape_225_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_225_cast_fp16 = reshape(shape = reshape_225_shape_0, x = real_div_56_cast_fp16)[name = tensor("reshape_225_cast_fp16")]; tensor add_113_gamma_0_to_fp16 = const()[name = tensor("add_113_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702139328)))]; tensor add_113_beta_0_to_fp16 = const()[name = tensor("add_113_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702140032)))]; tensor add_113_epsilon_0_to_fp16 = const()[name = tensor("add_113_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_113_cast_fp16 = batch_norm(beta = add_113_beta_0_to_fp16, epsilon = add_113_epsilon_0_to_fp16, gamma = add_113_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_225_cast_fp16)[name = tensor("add_113_cast_fp16")]; tensor var_11932 = const()[name = tensor("op_11932"), val = tensor([1, 1])]; tensor var_11934 = const()[name = tensor("op_11934"), val = tensor([1, 1])]; tensor hidden_states_305_pad_type_0 = const()[name = tensor("hidden_states_305_pad_type_0"), val = tensor("custom")]; tensor hidden_states_305_pad_0 = const()[name = tensor("hidden_states_305_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702140736)))]; tensor up_blocks_3_attentions_1_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702345600)))]; tensor hidden_states_305_cast_fp16 = conv(bias = up_blocks_3_attentions_1_proj_in_bias_to_fp16, dilations = var_11934, groups = var_10375, pad = hidden_states_305_pad_0, pad_type = hidden_states_305_pad_type_0, strides = var_11932, weight = up_blocks_3_attentions_1_proj_in_weight_to_fp16, x = add_113_cast_fp16)[name = tensor("hidden_states_305_cast_fp16")]; tensor var_11939 = const()[name = tensor("op_11939"), val = tensor([2, 320, 1, 4096])]; tensor inputs_85_cast_fp16 = reshape(shape = var_11939, x = hidden_states_305_cast_fp16)[name = tensor("inputs_85_cast_fp16")]; tensor hidden_states_307_axes_0 = const()[name = tensor("hidden_states_307_axes_0"), val = tensor([1])]; tensor hidden_states_307_gamma_0_to_fp16 = const()[name = tensor("hidden_states_307_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702346304)))]; tensor hidden_states_307_beta_0_to_fp16 = const()[name = tensor("hidden_states_307_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702347008)))]; tensor var_11955_to_fp16 = const()[name = tensor("op_11955_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_307_cast_fp16 = layer_norm(axes = hidden_states_307_axes_0, beta = hidden_states_307_beta_0_to_fp16, epsilon = var_11955_to_fp16, gamma = hidden_states_307_gamma_0_to_fp16, x = inputs_85_cast_fp16)[name = tensor("hidden_states_307_cast_fp16")]; tensor var_11970 = const()[name = tensor("op_11970"), val = tensor([1, 1])]; tensor var_11972 = const()[name = tensor("op_11972"), val = tensor([1, 1])]; tensor q_57_pad_type_0 = const()[name = tensor("q_57_pad_type_0"), val = tensor("custom")]; tensor q_57_pad_0 = const()[name = tensor("q_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702347712)))]; tensor q_57_cast_fp16 = conv(dilations = var_11972, groups = var_10375, pad = q_57_pad_0, pad_type = q_57_pad_type_0, strides = var_11970, weight = up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_307_cast_fp16)[name = tensor("q_57_cast_fp16")]; tensor var_11976 = const()[name = tensor("op_11976"), val = tensor([1, 1])]; tensor var_11978 = const()[name = tensor("op_11978"), val = tensor([1, 1])]; tensor k_113_pad_type_0 = const()[name = tensor("k_113_pad_type_0"), val = tensor("custom")]; tensor k_113_pad_0 = const()[name = tensor("k_113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702552576)))]; tensor k_113_cast_fp16 = conv(dilations = var_11978, groups = var_10375, pad = k_113_pad_0, pad_type = k_113_pad_type_0, strides = var_11976, weight = up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_307_cast_fp16)[name = tensor("k_113_cast_fp16")]; tensor var_11982 = const()[name = tensor("op_11982"), val = tensor([1, 1])]; tensor var_11984 = const()[name = tensor("op_11984"), val = tensor([1, 1])]; tensor v_57_pad_type_0 = const()[name = tensor("v_57_pad_type_0"), val = tensor("custom")]; tensor v_57_pad_0 = const()[name = tensor("v_57_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702757440)))]; tensor v_57_cast_fp16 = conv(dilations = var_11984, groups = var_10375, pad = v_57_pad_0, pad_type = v_57_pad_type_0, strides = var_11982, weight = up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_307_cast_fp16)[name = tensor("v_57_cast_fp16")]; tensor var_11988_begin_0 = const()[name = tensor("op_11988_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11988_end_0 = const()[name = tensor("op_11988_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_11988_end_mask_0 = const()[name = tensor("op_11988_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11988_cast_fp16 = slice_by_index(begin = var_11988_begin_0, end = var_11988_end_0, end_mask = var_11988_end_mask_0, x = q_57_cast_fp16)[name = tensor("op_11988_cast_fp16")]; tensor var_11992_begin_0 = const()[name = tensor("op_11992_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_11992_end_0 = const()[name = tensor("op_11992_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_11992_end_mask_0 = const()[name = tensor("op_11992_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11992_cast_fp16 = slice_by_index(begin = var_11992_begin_0, end = var_11992_end_0, end_mask = var_11992_end_mask_0, x = q_57_cast_fp16)[name = tensor("op_11992_cast_fp16")]; tensor var_11996_begin_0 = const()[name = tensor("op_11996_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_11996_end_0 = const()[name = tensor("op_11996_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_11996_end_mask_0 = const()[name = tensor("op_11996_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11996_cast_fp16 = slice_by_index(begin = var_11996_begin_0, end = var_11996_end_0, end_mask = var_11996_end_mask_0, x = q_57_cast_fp16)[name = tensor("op_11996_cast_fp16")]; tensor var_12000_begin_0 = const()[name = tensor("op_12000_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_12000_end_0 = const()[name = tensor("op_12000_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_12000_end_mask_0 = const()[name = tensor("op_12000_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12000_cast_fp16 = slice_by_index(begin = var_12000_begin_0, end = var_12000_end_0, end_mask = var_12000_end_mask_0, x = q_57_cast_fp16)[name = tensor("op_12000_cast_fp16")]; tensor var_12004_begin_0 = const()[name = tensor("op_12004_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_12004_end_0 = const()[name = tensor("op_12004_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_12004_end_mask_0 = const()[name = tensor("op_12004_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12004_cast_fp16 = slice_by_index(begin = var_12004_begin_0, end = var_12004_end_0, end_mask = var_12004_end_mask_0, x = q_57_cast_fp16)[name = tensor("op_12004_cast_fp16")]; tensor var_12008_begin_0 = const()[name = tensor("op_12008_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_12008_end_0 = const()[name = tensor("op_12008_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_12008_end_mask_0 = const()[name = tensor("op_12008_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12008_cast_fp16 = slice_by_index(begin = var_12008_begin_0, end = var_12008_end_0, end_mask = var_12008_end_mask_0, x = q_57_cast_fp16)[name = tensor("op_12008_cast_fp16")]; tensor var_12012_begin_0 = const()[name = tensor("op_12012_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_12012_end_0 = const()[name = tensor("op_12012_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_12012_end_mask_0 = const()[name = tensor("op_12012_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12012_cast_fp16 = slice_by_index(begin = var_12012_begin_0, end = var_12012_end_0, end_mask = var_12012_end_mask_0, x = q_57_cast_fp16)[name = tensor("op_12012_cast_fp16")]; tensor var_12016_begin_0 = const()[name = tensor("op_12016_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_12016_end_0 = const()[name = tensor("op_12016_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_12016_end_mask_0 = const()[name = tensor("op_12016_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12016_cast_fp16 = slice_by_index(begin = var_12016_begin_0, end = var_12016_end_0, end_mask = var_12016_end_mask_0, x = q_57_cast_fp16)[name = tensor("op_12016_cast_fp16")]; tensor var_12019_begin_0 = const()[name = tensor("op_12019_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12019_end_0 = const()[name = tensor("op_12019_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12019_end_mask_0 = const()[name = tensor("op_12019_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12019_cast_fp16 = slice_by_index(begin = var_12019_begin_0, end = var_12019_end_0, end_mask = var_12019_end_mask_0, x = var_11988_cast_fp16)[name = tensor("op_12019_cast_fp16")]; tensor var_12020_begin_0 = const()[name = tensor("op_12020_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12020_end_0 = const()[name = tensor("op_12020_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12020_end_mask_0 = const()[name = tensor("op_12020_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12020_cast_fp16 = slice_by_index(begin = var_12020_begin_0, end = var_12020_end_0, end_mask = var_12020_end_mask_0, x = var_11988_cast_fp16)[name = tensor("op_12020_cast_fp16")]; tensor var_12021_begin_0 = const()[name = tensor("op_12021_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12021_end_0 = const()[name = tensor("op_12021_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12021_end_mask_0 = const()[name = tensor("op_12021_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12021_cast_fp16 = slice_by_index(begin = var_12021_begin_0, end = var_12021_end_0, end_mask = var_12021_end_mask_0, x = var_11988_cast_fp16)[name = tensor("op_12021_cast_fp16")]; tensor var_12022_begin_0 = const()[name = tensor("op_12022_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12022_end_0 = const()[name = tensor("op_12022_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12022_end_mask_0 = const()[name = tensor("op_12022_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12022_cast_fp16 = slice_by_index(begin = var_12022_begin_0, end = var_12022_end_0, end_mask = var_12022_end_mask_0, x = var_11988_cast_fp16)[name = tensor("op_12022_cast_fp16")]; tensor var_12023_begin_0 = const()[name = tensor("op_12023_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12023_end_0 = const()[name = tensor("op_12023_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12023_end_mask_0 = const()[name = tensor("op_12023_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12023_cast_fp16 = slice_by_index(begin = var_12023_begin_0, end = var_12023_end_0, end_mask = var_12023_end_mask_0, x = var_11988_cast_fp16)[name = tensor("op_12023_cast_fp16")]; tensor var_12024_begin_0 = const()[name = tensor("op_12024_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12024_end_0 = const()[name = tensor("op_12024_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12024_end_mask_0 = const()[name = tensor("op_12024_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12024_cast_fp16 = slice_by_index(begin = var_12024_begin_0, end = var_12024_end_0, end_mask = var_12024_end_mask_0, x = var_11988_cast_fp16)[name = tensor("op_12024_cast_fp16")]; tensor var_12025_begin_0 = const()[name = tensor("op_12025_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12025_end_0 = const()[name = tensor("op_12025_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12025_end_mask_0 = const()[name = tensor("op_12025_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12025_cast_fp16 = slice_by_index(begin = var_12025_begin_0, end = var_12025_end_0, end_mask = var_12025_end_mask_0, x = var_11988_cast_fp16)[name = tensor("op_12025_cast_fp16")]; tensor var_12026_begin_0 = const()[name = tensor("op_12026_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12026_end_0 = const()[name = tensor("op_12026_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12026_end_mask_0 = const()[name = tensor("op_12026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12026_cast_fp16 = slice_by_index(begin = var_12026_begin_0, end = var_12026_end_0, end_mask = var_12026_end_mask_0, x = var_11988_cast_fp16)[name = tensor("op_12026_cast_fp16")]; tensor var_12027_begin_0 = const()[name = tensor("op_12027_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12027_end_0 = const()[name = tensor("op_12027_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12027_end_mask_0 = const()[name = tensor("op_12027_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12027_cast_fp16 = slice_by_index(begin = var_12027_begin_0, end = var_12027_end_0, end_mask = var_12027_end_mask_0, x = var_11992_cast_fp16)[name = tensor("op_12027_cast_fp16")]; tensor var_12028_begin_0 = const()[name = tensor("op_12028_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12028_end_0 = const()[name = tensor("op_12028_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12028_end_mask_0 = const()[name = tensor("op_12028_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12028_cast_fp16 = slice_by_index(begin = var_12028_begin_0, end = var_12028_end_0, end_mask = var_12028_end_mask_0, x = var_11992_cast_fp16)[name = tensor("op_12028_cast_fp16")]; tensor var_12029_begin_0 = const()[name = tensor("op_12029_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12029_end_0 = const()[name = tensor("op_12029_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12029_end_mask_0 = const()[name = tensor("op_12029_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12029_cast_fp16 = slice_by_index(begin = var_12029_begin_0, end = var_12029_end_0, end_mask = var_12029_end_mask_0, x = var_11992_cast_fp16)[name = tensor("op_12029_cast_fp16")]; tensor var_12030_begin_0 = const()[name = tensor("op_12030_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12030_end_0 = const()[name = tensor("op_12030_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12030_end_mask_0 = const()[name = tensor("op_12030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12030_cast_fp16 = slice_by_index(begin = var_12030_begin_0, end = var_12030_end_0, end_mask = var_12030_end_mask_0, x = var_11992_cast_fp16)[name = tensor("op_12030_cast_fp16")]; tensor var_12031_begin_0 = const()[name = tensor("op_12031_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12031_end_0 = const()[name = tensor("op_12031_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12031_end_mask_0 = const()[name = tensor("op_12031_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12031_cast_fp16 = slice_by_index(begin = var_12031_begin_0, end = var_12031_end_0, end_mask = var_12031_end_mask_0, x = var_11992_cast_fp16)[name = tensor("op_12031_cast_fp16")]; tensor var_12032_begin_0 = const()[name = tensor("op_12032_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12032_end_0 = const()[name = tensor("op_12032_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12032_end_mask_0 = const()[name = tensor("op_12032_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12032_cast_fp16 = slice_by_index(begin = var_12032_begin_0, end = var_12032_end_0, end_mask = var_12032_end_mask_0, x = var_11992_cast_fp16)[name = tensor("op_12032_cast_fp16")]; tensor var_12033_begin_0 = const()[name = tensor("op_12033_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12033_end_0 = const()[name = tensor("op_12033_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12033_end_mask_0 = const()[name = tensor("op_12033_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12033_cast_fp16 = slice_by_index(begin = var_12033_begin_0, end = var_12033_end_0, end_mask = var_12033_end_mask_0, x = var_11992_cast_fp16)[name = tensor("op_12033_cast_fp16")]; tensor var_12034_begin_0 = const()[name = tensor("op_12034_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12034_end_0 = const()[name = tensor("op_12034_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12034_end_mask_0 = const()[name = tensor("op_12034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12034_cast_fp16 = slice_by_index(begin = var_12034_begin_0, end = var_12034_end_0, end_mask = var_12034_end_mask_0, x = var_11992_cast_fp16)[name = tensor("op_12034_cast_fp16")]; tensor var_12035_begin_0 = const()[name = tensor("op_12035_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12035_end_0 = const()[name = tensor("op_12035_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12035_end_mask_0 = const()[name = tensor("op_12035_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12035_cast_fp16 = slice_by_index(begin = var_12035_begin_0, end = var_12035_end_0, end_mask = var_12035_end_mask_0, x = var_11996_cast_fp16)[name = tensor("op_12035_cast_fp16")]; tensor var_12036_begin_0 = const()[name = tensor("op_12036_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12036_end_0 = const()[name = tensor("op_12036_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12036_end_mask_0 = const()[name = tensor("op_12036_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12036_cast_fp16 = slice_by_index(begin = var_12036_begin_0, end = var_12036_end_0, end_mask = var_12036_end_mask_0, x = var_11996_cast_fp16)[name = tensor("op_12036_cast_fp16")]; tensor var_12037_begin_0 = const()[name = tensor("op_12037_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12037_end_0 = const()[name = tensor("op_12037_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12037_end_mask_0 = const()[name = tensor("op_12037_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12037_cast_fp16 = slice_by_index(begin = var_12037_begin_0, end = var_12037_end_0, end_mask = var_12037_end_mask_0, x = var_11996_cast_fp16)[name = tensor("op_12037_cast_fp16")]; tensor var_12038_begin_0 = const()[name = tensor("op_12038_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12038_end_0 = const()[name = tensor("op_12038_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12038_end_mask_0 = const()[name = tensor("op_12038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12038_cast_fp16 = slice_by_index(begin = var_12038_begin_0, end = var_12038_end_0, end_mask = var_12038_end_mask_0, x = var_11996_cast_fp16)[name = tensor("op_12038_cast_fp16")]; tensor var_12039_begin_0 = const()[name = tensor("op_12039_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12039_end_0 = const()[name = tensor("op_12039_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12039_end_mask_0 = const()[name = tensor("op_12039_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12039_cast_fp16 = slice_by_index(begin = var_12039_begin_0, end = var_12039_end_0, end_mask = var_12039_end_mask_0, x = var_11996_cast_fp16)[name = tensor("op_12039_cast_fp16")]; tensor var_12040_begin_0 = const()[name = tensor("op_12040_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12040_end_0 = const()[name = tensor("op_12040_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12040_end_mask_0 = const()[name = tensor("op_12040_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12040_cast_fp16 = slice_by_index(begin = var_12040_begin_0, end = var_12040_end_0, end_mask = var_12040_end_mask_0, x = var_11996_cast_fp16)[name = tensor("op_12040_cast_fp16")]; tensor var_12041_begin_0 = const()[name = tensor("op_12041_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12041_end_0 = const()[name = tensor("op_12041_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12041_end_mask_0 = const()[name = tensor("op_12041_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12041_cast_fp16 = slice_by_index(begin = var_12041_begin_0, end = var_12041_end_0, end_mask = var_12041_end_mask_0, x = var_11996_cast_fp16)[name = tensor("op_12041_cast_fp16")]; tensor var_12042_begin_0 = const()[name = tensor("op_12042_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12042_end_0 = const()[name = tensor("op_12042_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12042_end_mask_0 = const()[name = tensor("op_12042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12042_cast_fp16 = slice_by_index(begin = var_12042_begin_0, end = var_12042_end_0, end_mask = var_12042_end_mask_0, x = var_11996_cast_fp16)[name = tensor("op_12042_cast_fp16")]; tensor var_12043_begin_0 = const()[name = tensor("op_12043_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12043_end_0 = const()[name = tensor("op_12043_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12043_end_mask_0 = const()[name = tensor("op_12043_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12043_cast_fp16 = slice_by_index(begin = var_12043_begin_0, end = var_12043_end_0, end_mask = var_12043_end_mask_0, x = var_12000_cast_fp16)[name = tensor("op_12043_cast_fp16")]; tensor var_12044_begin_0 = const()[name = tensor("op_12044_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12044_end_0 = const()[name = tensor("op_12044_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12044_end_mask_0 = const()[name = tensor("op_12044_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12044_cast_fp16 = slice_by_index(begin = var_12044_begin_0, end = var_12044_end_0, end_mask = var_12044_end_mask_0, x = var_12000_cast_fp16)[name = tensor("op_12044_cast_fp16")]; tensor var_12045_begin_0 = const()[name = tensor("op_12045_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12045_end_0 = const()[name = tensor("op_12045_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12045_end_mask_0 = const()[name = tensor("op_12045_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12045_cast_fp16 = slice_by_index(begin = var_12045_begin_0, end = var_12045_end_0, end_mask = var_12045_end_mask_0, x = var_12000_cast_fp16)[name = tensor("op_12045_cast_fp16")]; tensor var_12046_begin_0 = const()[name = tensor("op_12046_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12046_end_0 = const()[name = tensor("op_12046_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12046_end_mask_0 = const()[name = tensor("op_12046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12046_cast_fp16 = slice_by_index(begin = var_12046_begin_0, end = var_12046_end_0, end_mask = var_12046_end_mask_0, x = var_12000_cast_fp16)[name = tensor("op_12046_cast_fp16")]; tensor var_12047_begin_0 = const()[name = tensor("op_12047_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12047_end_0 = const()[name = tensor("op_12047_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12047_end_mask_0 = const()[name = tensor("op_12047_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12047_cast_fp16 = slice_by_index(begin = var_12047_begin_0, end = var_12047_end_0, end_mask = var_12047_end_mask_0, x = var_12000_cast_fp16)[name = tensor("op_12047_cast_fp16")]; tensor var_12048_begin_0 = const()[name = tensor("op_12048_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12048_end_0 = const()[name = tensor("op_12048_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12048_end_mask_0 = const()[name = tensor("op_12048_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12048_cast_fp16 = slice_by_index(begin = var_12048_begin_0, end = var_12048_end_0, end_mask = var_12048_end_mask_0, x = var_12000_cast_fp16)[name = tensor("op_12048_cast_fp16")]; tensor var_12049_begin_0 = const()[name = tensor("op_12049_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12049_end_0 = const()[name = tensor("op_12049_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12049_end_mask_0 = const()[name = tensor("op_12049_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12049_cast_fp16 = slice_by_index(begin = var_12049_begin_0, end = var_12049_end_0, end_mask = var_12049_end_mask_0, x = var_12000_cast_fp16)[name = tensor("op_12049_cast_fp16")]; tensor var_12050_begin_0 = const()[name = tensor("op_12050_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12050_end_0 = const()[name = tensor("op_12050_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12050_end_mask_0 = const()[name = tensor("op_12050_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12050_cast_fp16 = slice_by_index(begin = var_12050_begin_0, end = var_12050_end_0, end_mask = var_12050_end_mask_0, x = var_12000_cast_fp16)[name = tensor("op_12050_cast_fp16")]; tensor var_12051_begin_0 = const()[name = tensor("op_12051_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12051_end_0 = const()[name = tensor("op_12051_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12051_end_mask_0 = const()[name = tensor("op_12051_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12051_cast_fp16 = slice_by_index(begin = var_12051_begin_0, end = var_12051_end_0, end_mask = var_12051_end_mask_0, x = var_12004_cast_fp16)[name = tensor("op_12051_cast_fp16")]; tensor var_12052_begin_0 = const()[name = tensor("op_12052_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12052_end_0 = const()[name = tensor("op_12052_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12052_end_mask_0 = const()[name = tensor("op_12052_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12052_cast_fp16 = slice_by_index(begin = var_12052_begin_0, end = var_12052_end_0, end_mask = var_12052_end_mask_0, x = var_12004_cast_fp16)[name = tensor("op_12052_cast_fp16")]; tensor var_12053_begin_0 = const()[name = tensor("op_12053_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12053_end_0 = const()[name = tensor("op_12053_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12053_end_mask_0 = const()[name = tensor("op_12053_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12053_cast_fp16 = slice_by_index(begin = var_12053_begin_0, end = var_12053_end_0, end_mask = var_12053_end_mask_0, x = var_12004_cast_fp16)[name = tensor("op_12053_cast_fp16")]; tensor var_12054_begin_0 = const()[name = tensor("op_12054_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12054_end_0 = const()[name = tensor("op_12054_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12054_end_mask_0 = const()[name = tensor("op_12054_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12054_cast_fp16 = slice_by_index(begin = var_12054_begin_0, end = var_12054_end_0, end_mask = var_12054_end_mask_0, x = var_12004_cast_fp16)[name = tensor("op_12054_cast_fp16")]; tensor var_12055_begin_0 = const()[name = tensor("op_12055_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12055_end_0 = const()[name = tensor("op_12055_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12055_end_mask_0 = const()[name = tensor("op_12055_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12055_cast_fp16 = slice_by_index(begin = var_12055_begin_0, end = var_12055_end_0, end_mask = var_12055_end_mask_0, x = var_12004_cast_fp16)[name = tensor("op_12055_cast_fp16")]; tensor var_12056_begin_0 = const()[name = tensor("op_12056_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12056_end_0 = const()[name = tensor("op_12056_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12056_end_mask_0 = const()[name = tensor("op_12056_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12056_cast_fp16 = slice_by_index(begin = var_12056_begin_0, end = var_12056_end_0, end_mask = var_12056_end_mask_0, x = var_12004_cast_fp16)[name = tensor("op_12056_cast_fp16")]; tensor var_12057_begin_0 = const()[name = tensor("op_12057_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12057_end_0 = const()[name = tensor("op_12057_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12057_end_mask_0 = const()[name = tensor("op_12057_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12057_cast_fp16 = slice_by_index(begin = var_12057_begin_0, end = var_12057_end_0, end_mask = var_12057_end_mask_0, x = var_12004_cast_fp16)[name = tensor("op_12057_cast_fp16")]; tensor var_12058_begin_0 = const()[name = tensor("op_12058_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12058_end_0 = const()[name = tensor("op_12058_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12058_end_mask_0 = const()[name = tensor("op_12058_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12058_cast_fp16 = slice_by_index(begin = var_12058_begin_0, end = var_12058_end_0, end_mask = var_12058_end_mask_0, x = var_12004_cast_fp16)[name = tensor("op_12058_cast_fp16")]; tensor var_12059_begin_0 = const()[name = tensor("op_12059_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12059_end_0 = const()[name = tensor("op_12059_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12059_end_mask_0 = const()[name = tensor("op_12059_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12059_cast_fp16 = slice_by_index(begin = var_12059_begin_0, end = var_12059_end_0, end_mask = var_12059_end_mask_0, x = var_12008_cast_fp16)[name = tensor("op_12059_cast_fp16")]; tensor var_12060_begin_0 = const()[name = tensor("op_12060_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12060_end_0 = const()[name = tensor("op_12060_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12060_end_mask_0 = const()[name = tensor("op_12060_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12060_cast_fp16 = slice_by_index(begin = var_12060_begin_0, end = var_12060_end_0, end_mask = var_12060_end_mask_0, x = var_12008_cast_fp16)[name = tensor("op_12060_cast_fp16")]; tensor var_12061_begin_0 = const()[name = tensor("op_12061_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12061_end_0 = const()[name = tensor("op_12061_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12061_end_mask_0 = const()[name = tensor("op_12061_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12061_cast_fp16 = slice_by_index(begin = var_12061_begin_0, end = var_12061_end_0, end_mask = var_12061_end_mask_0, x = var_12008_cast_fp16)[name = tensor("op_12061_cast_fp16")]; tensor var_12062_begin_0 = const()[name = tensor("op_12062_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12062_end_0 = const()[name = tensor("op_12062_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12062_end_mask_0 = const()[name = tensor("op_12062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12062_cast_fp16 = slice_by_index(begin = var_12062_begin_0, end = var_12062_end_0, end_mask = var_12062_end_mask_0, x = var_12008_cast_fp16)[name = tensor("op_12062_cast_fp16")]; tensor var_12063_begin_0 = const()[name = tensor("op_12063_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12063_end_0 = const()[name = tensor("op_12063_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12063_end_mask_0 = const()[name = tensor("op_12063_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12063_cast_fp16 = slice_by_index(begin = var_12063_begin_0, end = var_12063_end_0, end_mask = var_12063_end_mask_0, x = var_12008_cast_fp16)[name = tensor("op_12063_cast_fp16")]; tensor var_12064_begin_0 = const()[name = tensor("op_12064_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12064_end_0 = const()[name = tensor("op_12064_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12064_end_mask_0 = const()[name = tensor("op_12064_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12064_cast_fp16 = slice_by_index(begin = var_12064_begin_0, end = var_12064_end_0, end_mask = var_12064_end_mask_0, x = var_12008_cast_fp16)[name = tensor("op_12064_cast_fp16")]; tensor var_12065_begin_0 = const()[name = tensor("op_12065_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12065_end_0 = const()[name = tensor("op_12065_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12065_end_mask_0 = const()[name = tensor("op_12065_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12065_cast_fp16 = slice_by_index(begin = var_12065_begin_0, end = var_12065_end_0, end_mask = var_12065_end_mask_0, x = var_12008_cast_fp16)[name = tensor("op_12065_cast_fp16")]; tensor var_12066_begin_0 = const()[name = tensor("op_12066_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12066_end_0 = const()[name = tensor("op_12066_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12066_end_mask_0 = const()[name = tensor("op_12066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12066_cast_fp16 = slice_by_index(begin = var_12066_begin_0, end = var_12066_end_0, end_mask = var_12066_end_mask_0, x = var_12008_cast_fp16)[name = tensor("op_12066_cast_fp16")]; tensor var_12067_begin_0 = const()[name = tensor("op_12067_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12067_end_0 = const()[name = tensor("op_12067_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12067_end_mask_0 = const()[name = tensor("op_12067_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12067_cast_fp16 = slice_by_index(begin = var_12067_begin_0, end = var_12067_end_0, end_mask = var_12067_end_mask_0, x = var_12012_cast_fp16)[name = tensor("op_12067_cast_fp16")]; tensor var_12068_begin_0 = const()[name = tensor("op_12068_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12068_end_0 = const()[name = tensor("op_12068_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12068_end_mask_0 = const()[name = tensor("op_12068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12068_cast_fp16 = slice_by_index(begin = var_12068_begin_0, end = var_12068_end_0, end_mask = var_12068_end_mask_0, x = var_12012_cast_fp16)[name = tensor("op_12068_cast_fp16")]; tensor var_12069_begin_0 = const()[name = tensor("op_12069_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12069_end_0 = const()[name = tensor("op_12069_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12069_end_mask_0 = const()[name = tensor("op_12069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12069_cast_fp16 = slice_by_index(begin = var_12069_begin_0, end = var_12069_end_0, end_mask = var_12069_end_mask_0, x = var_12012_cast_fp16)[name = tensor("op_12069_cast_fp16")]; tensor var_12070_begin_0 = const()[name = tensor("op_12070_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12070_end_0 = const()[name = tensor("op_12070_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12070_end_mask_0 = const()[name = tensor("op_12070_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12070_cast_fp16 = slice_by_index(begin = var_12070_begin_0, end = var_12070_end_0, end_mask = var_12070_end_mask_0, x = var_12012_cast_fp16)[name = tensor("op_12070_cast_fp16")]; tensor var_12071_begin_0 = const()[name = tensor("op_12071_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12071_end_0 = const()[name = tensor("op_12071_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12071_end_mask_0 = const()[name = tensor("op_12071_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12071_cast_fp16 = slice_by_index(begin = var_12071_begin_0, end = var_12071_end_0, end_mask = var_12071_end_mask_0, x = var_12012_cast_fp16)[name = tensor("op_12071_cast_fp16")]; tensor var_12072_begin_0 = const()[name = tensor("op_12072_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12072_end_0 = const()[name = tensor("op_12072_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12072_end_mask_0 = const()[name = tensor("op_12072_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12072_cast_fp16 = slice_by_index(begin = var_12072_begin_0, end = var_12072_end_0, end_mask = var_12072_end_mask_0, x = var_12012_cast_fp16)[name = tensor("op_12072_cast_fp16")]; tensor var_12073_begin_0 = const()[name = tensor("op_12073_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12073_end_0 = const()[name = tensor("op_12073_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12073_end_mask_0 = const()[name = tensor("op_12073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12073_cast_fp16 = slice_by_index(begin = var_12073_begin_0, end = var_12073_end_0, end_mask = var_12073_end_mask_0, x = var_12012_cast_fp16)[name = tensor("op_12073_cast_fp16")]; tensor var_12074_begin_0 = const()[name = tensor("op_12074_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12074_end_0 = const()[name = tensor("op_12074_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12074_end_mask_0 = const()[name = tensor("op_12074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12074_cast_fp16 = slice_by_index(begin = var_12074_begin_0, end = var_12074_end_0, end_mask = var_12074_end_mask_0, x = var_12012_cast_fp16)[name = tensor("op_12074_cast_fp16")]; tensor var_12075_begin_0 = const()[name = tensor("op_12075_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12075_end_0 = const()[name = tensor("op_12075_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12075_end_mask_0 = const()[name = tensor("op_12075_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12075_cast_fp16 = slice_by_index(begin = var_12075_begin_0, end = var_12075_end_0, end_mask = var_12075_end_mask_0, x = var_12016_cast_fp16)[name = tensor("op_12075_cast_fp16")]; tensor var_12076_begin_0 = const()[name = tensor("op_12076_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12076_end_0 = const()[name = tensor("op_12076_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12076_end_mask_0 = const()[name = tensor("op_12076_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12076_cast_fp16 = slice_by_index(begin = var_12076_begin_0, end = var_12076_end_0, end_mask = var_12076_end_mask_0, x = var_12016_cast_fp16)[name = tensor("op_12076_cast_fp16")]; tensor var_12077_begin_0 = const()[name = tensor("op_12077_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12077_end_0 = const()[name = tensor("op_12077_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12077_end_mask_0 = const()[name = tensor("op_12077_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12077_cast_fp16 = slice_by_index(begin = var_12077_begin_0, end = var_12077_end_0, end_mask = var_12077_end_mask_0, x = var_12016_cast_fp16)[name = tensor("op_12077_cast_fp16")]; tensor var_12078_begin_0 = const()[name = tensor("op_12078_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12078_end_0 = const()[name = tensor("op_12078_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12078_end_mask_0 = const()[name = tensor("op_12078_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12078_cast_fp16 = slice_by_index(begin = var_12078_begin_0, end = var_12078_end_0, end_mask = var_12078_end_mask_0, x = var_12016_cast_fp16)[name = tensor("op_12078_cast_fp16")]; tensor var_12079_begin_0 = const()[name = tensor("op_12079_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12079_end_0 = const()[name = tensor("op_12079_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12079_end_mask_0 = const()[name = tensor("op_12079_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12079_cast_fp16 = slice_by_index(begin = var_12079_begin_0, end = var_12079_end_0, end_mask = var_12079_end_mask_0, x = var_12016_cast_fp16)[name = tensor("op_12079_cast_fp16")]; tensor var_12080_begin_0 = const()[name = tensor("op_12080_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12080_end_0 = const()[name = tensor("op_12080_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12080_end_mask_0 = const()[name = tensor("op_12080_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12080_cast_fp16 = slice_by_index(begin = var_12080_begin_0, end = var_12080_end_0, end_mask = var_12080_end_mask_0, x = var_12016_cast_fp16)[name = tensor("op_12080_cast_fp16")]; tensor var_12081_begin_0 = const()[name = tensor("op_12081_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12081_end_0 = const()[name = tensor("op_12081_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12081_end_mask_0 = const()[name = tensor("op_12081_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12081_cast_fp16 = slice_by_index(begin = var_12081_begin_0, end = var_12081_end_0, end_mask = var_12081_end_mask_0, x = var_12016_cast_fp16)[name = tensor("op_12081_cast_fp16")]; tensor var_12082_begin_0 = const()[name = tensor("op_12082_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12082_end_0 = const()[name = tensor("op_12082_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12082_end_mask_0 = const()[name = tensor("op_12082_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12082_cast_fp16 = slice_by_index(begin = var_12082_begin_0, end = var_12082_end_0, end_mask = var_12082_end_mask_0, x = var_12016_cast_fp16)[name = tensor("op_12082_cast_fp16")]; tensor k_115_perm_0 = const()[name = tensor("k_115_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_12087_begin_0 = const()[name = tensor("op_12087_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12087_end_0 = const()[name = tensor("op_12087_end_0"), val = tensor([2, 4096, 1, 40])]; tensor var_12087_end_mask_0 = const()[name = tensor("op_12087_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_3 = transpose(perm = k_115_perm_0, x = k_113_cast_fp16)[name = tensor("transpose_3")]; tensor var_12087_cast_fp16 = slice_by_index(begin = var_12087_begin_0, end = var_12087_end_0, end_mask = var_12087_end_mask_0, x = transpose_3)[name = tensor("op_12087_cast_fp16")]; tensor var_12091_begin_0 = const()[name = tensor("op_12091_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_12091_end_0 = const()[name = tensor("op_12091_end_0"), val = tensor([2, 4096, 1, 80])]; tensor var_12091_end_mask_0 = const()[name = tensor("op_12091_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12091_cast_fp16 = slice_by_index(begin = var_12091_begin_0, end = var_12091_end_0, end_mask = var_12091_end_mask_0, x = transpose_3)[name = tensor("op_12091_cast_fp16")]; tensor var_12095_begin_0 = const()[name = tensor("op_12095_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_12095_end_0 = const()[name = tensor("op_12095_end_0"), val = tensor([2, 4096, 1, 120])]; tensor var_12095_end_mask_0 = const()[name = tensor("op_12095_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12095_cast_fp16 = slice_by_index(begin = var_12095_begin_0, end = var_12095_end_0, end_mask = var_12095_end_mask_0, x = transpose_3)[name = tensor("op_12095_cast_fp16")]; tensor var_12099_begin_0 = const()[name = tensor("op_12099_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_12099_end_0 = const()[name = tensor("op_12099_end_0"), val = tensor([2, 4096, 1, 160])]; tensor var_12099_end_mask_0 = const()[name = tensor("op_12099_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12099_cast_fp16 = slice_by_index(begin = var_12099_begin_0, end = var_12099_end_0, end_mask = var_12099_end_mask_0, x = transpose_3)[name = tensor("op_12099_cast_fp16")]; tensor var_12103_begin_0 = const()[name = tensor("op_12103_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_12103_end_0 = const()[name = tensor("op_12103_end_0"), val = tensor([2, 4096, 1, 200])]; tensor var_12103_end_mask_0 = const()[name = tensor("op_12103_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12103_cast_fp16 = slice_by_index(begin = var_12103_begin_0, end = var_12103_end_0, end_mask = var_12103_end_mask_0, x = transpose_3)[name = tensor("op_12103_cast_fp16")]; tensor var_12107_begin_0 = const()[name = tensor("op_12107_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_12107_end_0 = const()[name = tensor("op_12107_end_0"), val = tensor([2, 4096, 1, 240])]; tensor var_12107_end_mask_0 = const()[name = tensor("op_12107_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12107_cast_fp16 = slice_by_index(begin = var_12107_begin_0, end = var_12107_end_0, end_mask = var_12107_end_mask_0, x = transpose_3)[name = tensor("op_12107_cast_fp16")]; tensor var_12111_begin_0 = const()[name = tensor("op_12111_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_12111_end_0 = const()[name = tensor("op_12111_end_0"), val = tensor([2, 4096, 1, 280])]; tensor var_12111_end_mask_0 = const()[name = tensor("op_12111_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12111_cast_fp16 = slice_by_index(begin = var_12111_begin_0, end = var_12111_end_0, end_mask = var_12111_end_mask_0, x = transpose_3)[name = tensor("op_12111_cast_fp16")]; tensor var_12115_begin_0 = const()[name = tensor("op_12115_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_12115_end_0 = const()[name = tensor("op_12115_end_0"), val = tensor([2, 4096, 1, 320])]; tensor var_12115_end_mask_0 = const()[name = tensor("op_12115_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12115_cast_fp16 = slice_by_index(begin = var_12115_begin_0, end = var_12115_end_0, end_mask = var_12115_end_mask_0, x = transpose_3)[name = tensor("op_12115_cast_fp16")]; tensor var_12117_begin_0 = const()[name = tensor("op_12117_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12117_end_0 = const()[name = tensor("op_12117_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12117_end_mask_0 = const()[name = tensor("op_12117_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12117_cast_fp16 = slice_by_index(begin = var_12117_begin_0, end = var_12117_end_0, end_mask = var_12117_end_mask_0, x = v_57_cast_fp16)[name = tensor("op_12117_cast_fp16")]; tensor var_12121_begin_0 = const()[name = tensor("op_12121_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_12121_end_0 = const()[name = tensor("op_12121_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_12121_end_mask_0 = const()[name = tensor("op_12121_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12121_cast_fp16 = slice_by_index(begin = var_12121_begin_0, end = var_12121_end_0, end_mask = var_12121_end_mask_0, x = v_57_cast_fp16)[name = tensor("op_12121_cast_fp16")]; tensor var_12125_begin_0 = const()[name = tensor("op_12125_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_12125_end_0 = const()[name = tensor("op_12125_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_12125_end_mask_0 = const()[name = tensor("op_12125_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12125_cast_fp16 = slice_by_index(begin = var_12125_begin_0, end = var_12125_end_0, end_mask = var_12125_end_mask_0, x = v_57_cast_fp16)[name = tensor("op_12125_cast_fp16")]; tensor var_12129_begin_0 = const()[name = tensor("op_12129_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_12129_end_0 = const()[name = tensor("op_12129_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_12129_end_mask_0 = const()[name = tensor("op_12129_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12129_cast_fp16 = slice_by_index(begin = var_12129_begin_0, end = var_12129_end_0, end_mask = var_12129_end_mask_0, x = v_57_cast_fp16)[name = tensor("op_12129_cast_fp16")]; tensor var_12133_begin_0 = const()[name = tensor("op_12133_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_12133_end_0 = const()[name = tensor("op_12133_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_12133_end_mask_0 = const()[name = tensor("op_12133_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12133_cast_fp16 = slice_by_index(begin = var_12133_begin_0, end = var_12133_end_0, end_mask = var_12133_end_mask_0, x = v_57_cast_fp16)[name = tensor("op_12133_cast_fp16")]; tensor var_12137_begin_0 = const()[name = tensor("op_12137_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_12137_end_0 = const()[name = tensor("op_12137_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_12137_end_mask_0 = const()[name = tensor("op_12137_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12137_cast_fp16 = slice_by_index(begin = var_12137_begin_0, end = var_12137_end_0, end_mask = var_12137_end_mask_0, x = v_57_cast_fp16)[name = tensor("op_12137_cast_fp16")]; tensor var_12141_begin_0 = const()[name = tensor("op_12141_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_12141_end_0 = const()[name = tensor("op_12141_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_12141_end_mask_0 = const()[name = tensor("op_12141_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12141_cast_fp16 = slice_by_index(begin = var_12141_begin_0, end = var_12141_end_0, end_mask = var_12141_end_mask_0, x = v_57_cast_fp16)[name = tensor("op_12141_cast_fp16")]; tensor var_12145_begin_0 = const()[name = tensor("op_12145_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_12145_end_0 = const()[name = tensor("op_12145_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_12145_end_mask_0 = const()[name = tensor("op_12145_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12145_cast_fp16 = slice_by_index(begin = var_12145_begin_0, end = var_12145_end_0, end_mask = var_12145_end_mask_0, x = v_57_cast_fp16)[name = tensor("op_12145_cast_fp16")]; tensor var_12149_equation_0 = const()[name = tensor("op_12149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12149_cast_fp16 = einsum(equation = var_12149_equation_0, values = (var_12087_cast_fp16, var_12019_cast_fp16))[name = tensor("op_12149_cast_fp16")]; tensor var_12150_to_fp16 = const()[name = tensor("op_12150_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1089_cast_fp16 = mul(x = var_12149_cast_fp16, y = var_12150_to_fp16)[name = tensor("aw_chunk_1089_cast_fp16")]; tensor var_12153_equation_0 = const()[name = tensor("op_12153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12153_cast_fp16 = einsum(equation = var_12153_equation_0, values = (var_12087_cast_fp16, var_12020_cast_fp16))[name = tensor("op_12153_cast_fp16")]; tensor var_12154_to_fp16 = const()[name = tensor("op_12154_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1091_cast_fp16 = mul(x = var_12153_cast_fp16, y = var_12154_to_fp16)[name = tensor("aw_chunk_1091_cast_fp16")]; tensor var_12157_equation_0 = const()[name = tensor("op_12157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12157_cast_fp16 = einsum(equation = var_12157_equation_0, values = (var_12087_cast_fp16, var_12021_cast_fp16))[name = tensor("op_12157_cast_fp16")]; tensor var_12158_to_fp16 = const()[name = tensor("op_12158_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1093_cast_fp16 = mul(x = var_12157_cast_fp16, y = var_12158_to_fp16)[name = tensor("aw_chunk_1093_cast_fp16")]; tensor var_12161_equation_0 = const()[name = tensor("op_12161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12161_cast_fp16 = einsum(equation = var_12161_equation_0, values = (var_12087_cast_fp16, var_12022_cast_fp16))[name = tensor("op_12161_cast_fp16")]; tensor var_12162_to_fp16 = const()[name = tensor("op_12162_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1095_cast_fp16 = mul(x = var_12161_cast_fp16, y = var_12162_to_fp16)[name = tensor("aw_chunk_1095_cast_fp16")]; tensor var_12165_equation_0 = const()[name = tensor("op_12165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12165_cast_fp16 = einsum(equation = var_12165_equation_0, values = (var_12087_cast_fp16, var_12023_cast_fp16))[name = tensor("op_12165_cast_fp16")]; tensor var_12166_to_fp16 = const()[name = tensor("op_12166_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1097_cast_fp16 = mul(x = var_12165_cast_fp16, y = var_12166_to_fp16)[name = tensor("aw_chunk_1097_cast_fp16")]; tensor var_12169_equation_0 = const()[name = tensor("op_12169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12169_cast_fp16 = einsum(equation = var_12169_equation_0, values = (var_12087_cast_fp16, var_12024_cast_fp16))[name = tensor("op_12169_cast_fp16")]; tensor var_12170_to_fp16 = const()[name = tensor("op_12170_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1099_cast_fp16 = mul(x = var_12169_cast_fp16, y = var_12170_to_fp16)[name = tensor("aw_chunk_1099_cast_fp16")]; tensor var_12173_equation_0 = const()[name = tensor("op_12173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12173_cast_fp16 = einsum(equation = var_12173_equation_0, values = (var_12087_cast_fp16, var_12025_cast_fp16))[name = tensor("op_12173_cast_fp16")]; tensor var_12174_to_fp16 = const()[name = tensor("op_12174_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1101_cast_fp16 = mul(x = var_12173_cast_fp16, y = var_12174_to_fp16)[name = tensor("aw_chunk_1101_cast_fp16")]; tensor var_12177_equation_0 = const()[name = tensor("op_12177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12177_cast_fp16 = einsum(equation = var_12177_equation_0, values = (var_12087_cast_fp16, var_12026_cast_fp16))[name = tensor("op_12177_cast_fp16")]; tensor var_12178_to_fp16 = const()[name = tensor("op_12178_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1103_cast_fp16 = mul(x = var_12177_cast_fp16, y = var_12178_to_fp16)[name = tensor("aw_chunk_1103_cast_fp16")]; tensor var_12181_equation_0 = const()[name = tensor("op_12181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12181_cast_fp16 = einsum(equation = var_12181_equation_0, values = (var_12091_cast_fp16, var_12027_cast_fp16))[name = tensor("op_12181_cast_fp16")]; tensor var_12182_to_fp16 = const()[name = tensor("op_12182_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1105_cast_fp16 = mul(x = var_12181_cast_fp16, y = var_12182_to_fp16)[name = tensor("aw_chunk_1105_cast_fp16")]; tensor var_12185_equation_0 = const()[name = tensor("op_12185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12185_cast_fp16 = einsum(equation = var_12185_equation_0, values = (var_12091_cast_fp16, var_12028_cast_fp16))[name = tensor("op_12185_cast_fp16")]; tensor var_12186_to_fp16 = const()[name = tensor("op_12186_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1107_cast_fp16 = mul(x = var_12185_cast_fp16, y = var_12186_to_fp16)[name = tensor("aw_chunk_1107_cast_fp16")]; tensor var_12189_equation_0 = const()[name = tensor("op_12189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12189_cast_fp16 = einsum(equation = var_12189_equation_0, values = (var_12091_cast_fp16, var_12029_cast_fp16))[name = tensor("op_12189_cast_fp16")]; tensor var_12190_to_fp16 = const()[name = tensor("op_12190_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1109_cast_fp16 = mul(x = var_12189_cast_fp16, y = var_12190_to_fp16)[name = tensor("aw_chunk_1109_cast_fp16")]; tensor var_12193_equation_0 = const()[name = tensor("op_12193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12193_cast_fp16 = einsum(equation = var_12193_equation_0, values = (var_12091_cast_fp16, var_12030_cast_fp16))[name = tensor("op_12193_cast_fp16")]; tensor var_12194_to_fp16 = const()[name = tensor("op_12194_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1111_cast_fp16 = mul(x = var_12193_cast_fp16, y = var_12194_to_fp16)[name = tensor("aw_chunk_1111_cast_fp16")]; tensor var_12197_equation_0 = const()[name = tensor("op_12197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12197_cast_fp16 = einsum(equation = var_12197_equation_0, values = (var_12091_cast_fp16, var_12031_cast_fp16))[name = tensor("op_12197_cast_fp16")]; tensor var_12198_to_fp16 = const()[name = tensor("op_12198_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1113_cast_fp16 = mul(x = var_12197_cast_fp16, y = var_12198_to_fp16)[name = tensor("aw_chunk_1113_cast_fp16")]; tensor var_12201_equation_0 = const()[name = tensor("op_12201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12201_cast_fp16 = einsum(equation = var_12201_equation_0, values = (var_12091_cast_fp16, var_12032_cast_fp16))[name = tensor("op_12201_cast_fp16")]; tensor var_12202_to_fp16 = const()[name = tensor("op_12202_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1115_cast_fp16 = mul(x = var_12201_cast_fp16, y = var_12202_to_fp16)[name = tensor("aw_chunk_1115_cast_fp16")]; tensor var_12205_equation_0 = const()[name = tensor("op_12205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12205_cast_fp16 = einsum(equation = var_12205_equation_0, values = (var_12091_cast_fp16, var_12033_cast_fp16))[name = tensor("op_12205_cast_fp16")]; tensor var_12206_to_fp16 = const()[name = tensor("op_12206_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1117_cast_fp16 = mul(x = var_12205_cast_fp16, y = var_12206_to_fp16)[name = tensor("aw_chunk_1117_cast_fp16")]; tensor var_12209_equation_0 = const()[name = tensor("op_12209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12209_cast_fp16 = einsum(equation = var_12209_equation_0, values = (var_12091_cast_fp16, var_12034_cast_fp16))[name = tensor("op_12209_cast_fp16")]; tensor var_12210_to_fp16 = const()[name = tensor("op_12210_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1119_cast_fp16 = mul(x = var_12209_cast_fp16, y = var_12210_to_fp16)[name = tensor("aw_chunk_1119_cast_fp16")]; tensor var_12213_equation_0 = const()[name = tensor("op_12213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12213_cast_fp16 = einsum(equation = var_12213_equation_0, values = (var_12095_cast_fp16, var_12035_cast_fp16))[name = tensor("op_12213_cast_fp16")]; tensor var_12214_to_fp16 = const()[name = tensor("op_12214_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1121_cast_fp16 = mul(x = var_12213_cast_fp16, y = var_12214_to_fp16)[name = tensor("aw_chunk_1121_cast_fp16")]; tensor var_12217_equation_0 = const()[name = tensor("op_12217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12217_cast_fp16 = einsum(equation = var_12217_equation_0, values = (var_12095_cast_fp16, var_12036_cast_fp16))[name = tensor("op_12217_cast_fp16")]; tensor var_12218_to_fp16 = const()[name = tensor("op_12218_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1123_cast_fp16 = mul(x = var_12217_cast_fp16, y = var_12218_to_fp16)[name = tensor("aw_chunk_1123_cast_fp16")]; tensor var_12221_equation_0 = const()[name = tensor("op_12221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12221_cast_fp16 = einsum(equation = var_12221_equation_0, values = (var_12095_cast_fp16, var_12037_cast_fp16))[name = tensor("op_12221_cast_fp16")]; tensor var_12222_to_fp16 = const()[name = tensor("op_12222_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1125_cast_fp16 = mul(x = var_12221_cast_fp16, y = var_12222_to_fp16)[name = tensor("aw_chunk_1125_cast_fp16")]; tensor var_12225_equation_0 = const()[name = tensor("op_12225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12225_cast_fp16 = einsum(equation = var_12225_equation_0, values = (var_12095_cast_fp16, var_12038_cast_fp16))[name = tensor("op_12225_cast_fp16")]; tensor var_12226_to_fp16 = const()[name = tensor("op_12226_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1127_cast_fp16 = mul(x = var_12225_cast_fp16, y = var_12226_to_fp16)[name = tensor("aw_chunk_1127_cast_fp16")]; tensor var_12229_equation_0 = const()[name = tensor("op_12229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12229_cast_fp16 = einsum(equation = var_12229_equation_0, values = (var_12095_cast_fp16, var_12039_cast_fp16))[name = tensor("op_12229_cast_fp16")]; tensor var_12230_to_fp16 = const()[name = tensor("op_12230_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1129_cast_fp16 = mul(x = var_12229_cast_fp16, y = var_12230_to_fp16)[name = tensor("aw_chunk_1129_cast_fp16")]; tensor var_12233_equation_0 = const()[name = tensor("op_12233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12233_cast_fp16 = einsum(equation = var_12233_equation_0, values = (var_12095_cast_fp16, var_12040_cast_fp16))[name = tensor("op_12233_cast_fp16")]; tensor var_12234_to_fp16 = const()[name = tensor("op_12234_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1131_cast_fp16 = mul(x = var_12233_cast_fp16, y = var_12234_to_fp16)[name = tensor("aw_chunk_1131_cast_fp16")]; tensor var_12237_equation_0 = const()[name = tensor("op_12237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12237_cast_fp16 = einsum(equation = var_12237_equation_0, values = (var_12095_cast_fp16, var_12041_cast_fp16))[name = tensor("op_12237_cast_fp16")]; tensor var_12238_to_fp16 = const()[name = tensor("op_12238_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1133_cast_fp16 = mul(x = var_12237_cast_fp16, y = var_12238_to_fp16)[name = tensor("aw_chunk_1133_cast_fp16")]; tensor var_12241_equation_0 = const()[name = tensor("op_12241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12241_cast_fp16 = einsum(equation = var_12241_equation_0, values = (var_12095_cast_fp16, var_12042_cast_fp16))[name = tensor("op_12241_cast_fp16")]; tensor var_12242_to_fp16 = const()[name = tensor("op_12242_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1135_cast_fp16 = mul(x = var_12241_cast_fp16, y = var_12242_to_fp16)[name = tensor("aw_chunk_1135_cast_fp16")]; tensor var_12245_equation_0 = const()[name = tensor("op_12245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12245_cast_fp16 = einsum(equation = var_12245_equation_0, values = (var_12099_cast_fp16, var_12043_cast_fp16))[name = tensor("op_12245_cast_fp16")]; tensor var_12246_to_fp16 = const()[name = tensor("op_12246_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1137_cast_fp16 = mul(x = var_12245_cast_fp16, y = var_12246_to_fp16)[name = tensor("aw_chunk_1137_cast_fp16")]; tensor var_12249_equation_0 = const()[name = tensor("op_12249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12249_cast_fp16 = einsum(equation = var_12249_equation_0, values = (var_12099_cast_fp16, var_12044_cast_fp16))[name = tensor("op_12249_cast_fp16")]; tensor var_12250_to_fp16 = const()[name = tensor("op_12250_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1139_cast_fp16 = mul(x = var_12249_cast_fp16, y = var_12250_to_fp16)[name = tensor("aw_chunk_1139_cast_fp16")]; tensor var_12253_equation_0 = const()[name = tensor("op_12253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12253_cast_fp16 = einsum(equation = var_12253_equation_0, values = (var_12099_cast_fp16, var_12045_cast_fp16))[name = tensor("op_12253_cast_fp16")]; tensor var_12254_to_fp16 = const()[name = tensor("op_12254_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1141_cast_fp16 = mul(x = var_12253_cast_fp16, y = var_12254_to_fp16)[name = tensor("aw_chunk_1141_cast_fp16")]; tensor var_12257_equation_0 = const()[name = tensor("op_12257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12257_cast_fp16 = einsum(equation = var_12257_equation_0, values = (var_12099_cast_fp16, var_12046_cast_fp16))[name = tensor("op_12257_cast_fp16")]; tensor var_12258_to_fp16 = const()[name = tensor("op_12258_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1143_cast_fp16 = mul(x = var_12257_cast_fp16, y = var_12258_to_fp16)[name = tensor("aw_chunk_1143_cast_fp16")]; tensor var_12261_equation_0 = const()[name = tensor("op_12261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12261_cast_fp16 = einsum(equation = var_12261_equation_0, values = (var_12099_cast_fp16, var_12047_cast_fp16))[name = tensor("op_12261_cast_fp16")]; tensor var_12262_to_fp16 = const()[name = tensor("op_12262_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1145_cast_fp16 = mul(x = var_12261_cast_fp16, y = var_12262_to_fp16)[name = tensor("aw_chunk_1145_cast_fp16")]; tensor var_12265_equation_0 = const()[name = tensor("op_12265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12265_cast_fp16 = einsum(equation = var_12265_equation_0, values = (var_12099_cast_fp16, var_12048_cast_fp16))[name = tensor("op_12265_cast_fp16")]; tensor var_12266_to_fp16 = const()[name = tensor("op_12266_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1147_cast_fp16 = mul(x = var_12265_cast_fp16, y = var_12266_to_fp16)[name = tensor("aw_chunk_1147_cast_fp16")]; tensor var_12269_equation_0 = const()[name = tensor("op_12269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12269_cast_fp16 = einsum(equation = var_12269_equation_0, values = (var_12099_cast_fp16, var_12049_cast_fp16))[name = tensor("op_12269_cast_fp16")]; tensor var_12270_to_fp16 = const()[name = tensor("op_12270_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1149_cast_fp16 = mul(x = var_12269_cast_fp16, y = var_12270_to_fp16)[name = tensor("aw_chunk_1149_cast_fp16")]; tensor var_12273_equation_0 = const()[name = tensor("op_12273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12273_cast_fp16 = einsum(equation = var_12273_equation_0, values = (var_12099_cast_fp16, var_12050_cast_fp16))[name = tensor("op_12273_cast_fp16")]; tensor var_12274_to_fp16 = const()[name = tensor("op_12274_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1151_cast_fp16 = mul(x = var_12273_cast_fp16, y = var_12274_to_fp16)[name = tensor("aw_chunk_1151_cast_fp16")]; tensor var_12277_equation_0 = const()[name = tensor("op_12277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12277_cast_fp16 = einsum(equation = var_12277_equation_0, values = (var_12103_cast_fp16, var_12051_cast_fp16))[name = tensor("op_12277_cast_fp16")]; tensor var_12278_to_fp16 = const()[name = tensor("op_12278_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1153_cast_fp16 = mul(x = var_12277_cast_fp16, y = var_12278_to_fp16)[name = tensor("aw_chunk_1153_cast_fp16")]; tensor var_12281_equation_0 = const()[name = tensor("op_12281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12281_cast_fp16 = einsum(equation = var_12281_equation_0, values = (var_12103_cast_fp16, var_12052_cast_fp16))[name = tensor("op_12281_cast_fp16")]; tensor var_12282_to_fp16 = const()[name = tensor("op_12282_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1155_cast_fp16 = mul(x = var_12281_cast_fp16, y = var_12282_to_fp16)[name = tensor("aw_chunk_1155_cast_fp16")]; tensor var_12285_equation_0 = const()[name = tensor("op_12285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12285_cast_fp16 = einsum(equation = var_12285_equation_0, values = (var_12103_cast_fp16, var_12053_cast_fp16))[name = tensor("op_12285_cast_fp16")]; tensor var_12286_to_fp16 = const()[name = tensor("op_12286_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1157_cast_fp16 = mul(x = var_12285_cast_fp16, y = var_12286_to_fp16)[name = tensor("aw_chunk_1157_cast_fp16")]; tensor var_12289_equation_0 = const()[name = tensor("op_12289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12289_cast_fp16 = einsum(equation = var_12289_equation_0, values = (var_12103_cast_fp16, var_12054_cast_fp16))[name = tensor("op_12289_cast_fp16")]; tensor var_12290_to_fp16 = const()[name = tensor("op_12290_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1159_cast_fp16 = mul(x = var_12289_cast_fp16, y = var_12290_to_fp16)[name = tensor("aw_chunk_1159_cast_fp16")]; tensor var_12293_equation_0 = const()[name = tensor("op_12293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12293_cast_fp16 = einsum(equation = var_12293_equation_0, values = (var_12103_cast_fp16, var_12055_cast_fp16))[name = tensor("op_12293_cast_fp16")]; tensor var_12294_to_fp16 = const()[name = tensor("op_12294_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1161_cast_fp16 = mul(x = var_12293_cast_fp16, y = var_12294_to_fp16)[name = tensor("aw_chunk_1161_cast_fp16")]; tensor var_12297_equation_0 = const()[name = tensor("op_12297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12297_cast_fp16 = einsum(equation = var_12297_equation_0, values = (var_12103_cast_fp16, var_12056_cast_fp16))[name = tensor("op_12297_cast_fp16")]; tensor var_12298_to_fp16 = const()[name = tensor("op_12298_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1163_cast_fp16 = mul(x = var_12297_cast_fp16, y = var_12298_to_fp16)[name = tensor("aw_chunk_1163_cast_fp16")]; tensor var_12301_equation_0 = const()[name = tensor("op_12301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12301_cast_fp16 = einsum(equation = var_12301_equation_0, values = (var_12103_cast_fp16, var_12057_cast_fp16))[name = tensor("op_12301_cast_fp16")]; tensor var_12302_to_fp16 = const()[name = tensor("op_12302_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1165_cast_fp16 = mul(x = var_12301_cast_fp16, y = var_12302_to_fp16)[name = tensor("aw_chunk_1165_cast_fp16")]; tensor var_12305_equation_0 = const()[name = tensor("op_12305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12305_cast_fp16 = einsum(equation = var_12305_equation_0, values = (var_12103_cast_fp16, var_12058_cast_fp16))[name = tensor("op_12305_cast_fp16")]; tensor var_12306_to_fp16 = const()[name = tensor("op_12306_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1167_cast_fp16 = mul(x = var_12305_cast_fp16, y = var_12306_to_fp16)[name = tensor("aw_chunk_1167_cast_fp16")]; tensor var_12309_equation_0 = const()[name = tensor("op_12309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12309_cast_fp16 = einsum(equation = var_12309_equation_0, values = (var_12107_cast_fp16, var_12059_cast_fp16))[name = tensor("op_12309_cast_fp16")]; tensor var_12310_to_fp16 = const()[name = tensor("op_12310_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1169_cast_fp16 = mul(x = var_12309_cast_fp16, y = var_12310_to_fp16)[name = tensor("aw_chunk_1169_cast_fp16")]; tensor var_12313_equation_0 = const()[name = tensor("op_12313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12313_cast_fp16 = einsum(equation = var_12313_equation_0, values = (var_12107_cast_fp16, var_12060_cast_fp16))[name = tensor("op_12313_cast_fp16")]; tensor var_12314_to_fp16 = const()[name = tensor("op_12314_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1171_cast_fp16 = mul(x = var_12313_cast_fp16, y = var_12314_to_fp16)[name = tensor("aw_chunk_1171_cast_fp16")]; tensor var_12317_equation_0 = const()[name = tensor("op_12317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12317_cast_fp16 = einsum(equation = var_12317_equation_0, values = (var_12107_cast_fp16, var_12061_cast_fp16))[name = tensor("op_12317_cast_fp16")]; tensor var_12318_to_fp16 = const()[name = tensor("op_12318_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1173_cast_fp16 = mul(x = var_12317_cast_fp16, y = var_12318_to_fp16)[name = tensor("aw_chunk_1173_cast_fp16")]; tensor var_12321_equation_0 = const()[name = tensor("op_12321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12321_cast_fp16 = einsum(equation = var_12321_equation_0, values = (var_12107_cast_fp16, var_12062_cast_fp16))[name = tensor("op_12321_cast_fp16")]; tensor var_12322_to_fp16 = const()[name = tensor("op_12322_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1175_cast_fp16 = mul(x = var_12321_cast_fp16, y = var_12322_to_fp16)[name = tensor("aw_chunk_1175_cast_fp16")]; tensor var_12325_equation_0 = const()[name = tensor("op_12325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12325_cast_fp16 = einsum(equation = var_12325_equation_0, values = (var_12107_cast_fp16, var_12063_cast_fp16))[name = tensor("op_12325_cast_fp16")]; tensor var_12326_to_fp16 = const()[name = tensor("op_12326_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1177_cast_fp16 = mul(x = var_12325_cast_fp16, y = var_12326_to_fp16)[name = tensor("aw_chunk_1177_cast_fp16")]; tensor var_12329_equation_0 = const()[name = tensor("op_12329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12329_cast_fp16 = einsum(equation = var_12329_equation_0, values = (var_12107_cast_fp16, var_12064_cast_fp16))[name = tensor("op_12329_cast_fp16")]; tensor var_12330_to_fp16 = const()[name = tensor("op_12330_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1179_cast_fp16 = mul(x = var_12329_cast_fp16, y = var_12330_to_fp16)[name = tensor("aw_chunk_1179_cast_fp16")]; tensor var_12333_equation_0 = const()[name = tensor("op_12333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12333_cast_fp16 = einsum(equation = var_12333_equation_0, values = (var_12107_cast_fp16, var_12065_cast_fp16))[name = tensor("op_12333_cast_fp16")]; tensor var_12334_to_fp16 = const()[name = tensor("op_12334_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1181_cast_fp16 = mul(x = var_12333_cast_fp16, y = var_12334_to_fp16)[name = tensor("aw_chunk_1181_cast_fp16")]; tensor var_12337_equation_0 = const()[name = tensor("op_12337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12337_cast_fp16 = einsum(equation = var_12337_equation_0, values = (var_12107_cast_fp16, var_12066_cast_fp16))[name = tensor("op_12337_cast_fp16")]; tensor var_12338_to_fp16 = const()[name = tensor("op_12338_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1183_cast_fp16 = mul(x = var_12337_cast_fp16, y = var_12338_to_fp16)[name = tensor("aw_chunk_1183_cast_fp16")]; tensor var_12341_equation_0 = const()[name = tensor("op_12341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12341_cast_fp16 = einsum(equation = var_12341_equation_0, values = (var_12111_cast_fp16, var_12067_cast_fp16))[name = tensor("op_12341_cast_fp16")]; tensor var_12342_to_fp16 = const()[name = tensor("op_12342_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1185_cast_fp16 = mul(x = var_12341_cast_fp16, y = var_12342_to_fp16)[name = tensor("aw_chunk_1185_cast_fp16")]; tensor var_12345_equation_0 = const()[name = tensor("op_12345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12345_cast_fp16 = einsum(equation = var_12345_equation_0, values = (var_12111_cast_fp16, var_12068_cast_fp16))[name = tensor("op_12345_cast_fp16")]; tensor var_12346_to_fp16 = const()[name = tensor("op_12346_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1187_cast_fp16 = mul(x = var_12345_cast_fp16, y = var_12346_to_fp16)[name = tensor("aw_chunk_1187_cast_fp16")]; tensor var_12349_equation_0 = const()[name = tensor("op_12349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12349_cast_fp16 = einsum(equation = var_12349_equation_0, values = (var_12111_cast_fp16, var_12069_cast_fp16))[name = tensor("op_12349_cast_fp16")]; tensor var_12350_to_fp16 = const()[name = tensor("op_12350_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1189_cast_fp16 = mul(x = var_12349_cast_fp16, y = var_12350_to_fp16)[name = tensor("aw_chunk_1189_cast_fp16")]; tensor var_12353_equation_0 = const()[name = tensor("op_12353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12353_cast_fp16 = einsum(equation = var_12353_equation_0, values = (var_12111_cast_fp16, var_12070_cast_fp16))[name = tensor("op_12353_cast_fp16")]; tensor var_12354_to_fp16 = const()[name = tensor("op_12354_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1191_cast_fp16 = mul(x = var_12353_cast_fp16, y = var_12354_to_fp16)[name = tensor("aw_chunk_1191_cast_fp16")]; tensor var_12357_equation_0 = const()[name = tensor("op_12357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12357_cast_fp16 = einsum(equation = var_12357_equation_0, values = (var_12111_cast_fp16, var_12071_cast_fp16))[name = tensor("op_12357_cast_fp16")]; tensor var_12358_to_fp16 = const()[name = tensor("op_12358_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1193_cast_fp16 = mul(x = var_12357_cast_fp16, y = var_12358_to_fp16)[name = tensor("aw_chunk_1193_cast_fp16")]; tensor var_12361_equation_0 = const()[name = tensor("op_12361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12361_cast_fp16 = einsum(equation = var_12361_equation_0, values = (var_12111_cast_fp16, var_12072_cast_fp16))[name = tensor("op_12361_cast_fp16")]; tensor var_12362_to_fp16 = const()[name = tensor("op_12362_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1195_cast_fp16 = mul(x = var_12361_cast_fp16, y = var_12362_to_fp16)[name = tensor("aw_chunk_1195_cast_fp16")]; tensor var_12365_equation_0 = const()[name = tensor("op_12365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12365_cast_fp16 = einsum(equation = var_12365_equation_0, values = (var_12111_cast_fp16, var_12073_cast_fp16))[name = tensor("op_12365_cast_fp16")]; tensor var_12366_to_fp16 = const()[name = tensor("op_12366_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1197_cast_fp16 = mul(x = var_12365_cast_fp16, y = var_12366_to_fp16)[name = tensor("aw_chunk_1197_cast_fp16")]; tensor var_12369_equation_0 = const()[name = tensor("op_12369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12369_cast_fp16 = einsum(equation = var_12369_equation_0, values = (var_12111_cast_fp16, var_12074_cast_fp16))[name = tensor("op_12369_cast_fp16")]; tensor var_12370_to_fp16 = const()[name = tensor("op_12370_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1199_cast_fp16 = mul(x = var_12369_cast_fp16, y = var_12370_to_fp16)[name = tensor("aw_chunk_1199_cast_fp16")]; tensor var_12373_equation_0 = const()[name = tensor("op_12373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12373_cast_fp16 = einsum(equation = var_12373_equation_0, values = (var_12115_cast_fp16, var_12075_cast_fp16))[name = tensor("op_12373_cast_fp16")]; tensor var_12374_to_fp16 = const()[name = tensor("op_12374_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1201_cast_fp16 = mul(x = var_12373_cast_fp16, y = var_12374_to_fp16)[name = tensor("aw_chunk_1201_cast_fp16")]; tensor var_12377_equation_0 = const()[name = tensor("op_12377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12377_cast_fp16 = einsum(equation = var_12377_equation_0, values = (var_12115_cast_fp16, var_12076_cast_fp16))[name = tensor("op_12377_cast_fp16")]; tensor var_12378_to_fp16 = const()[name = tensor("op_12378_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1203_cast_fp16 = mul(x = var_12377_cast_fp16, y = var_12378_to_fp16)[name = tensor("aw_chunk_1203_cast_fp16")]; tensor var_12381_equation_0 = const()[name = tensor("op_12381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12381_cast_fp16 = einsum(equation = var_12381_equation_0, values = (var_12115_cast_fp16, var_12077_cast_fp16))[name = tensor("op_12381_cast_fp16")]; tensor var_12382_to_fp16 = const()[name = tensor("op_12382_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1205_cast_fp16 = mul(x = var_12381_cast_fp16, y = var_12382_to_fp16)[name = tensor("aw_chunk_1205_cast_fp16")]; tensor var_12385_equation_0 = const()[name = tensor("op_12385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12385_cast_fp16 = einsum(equation = var_12385_equation_0, values = (var_12115_cast_fp16, var_12078_cast_fp16))[name = tensor("op_12385_cast_fp16")]; tensor var_12386_to_fp16 = const()[name = tensor("op_12386_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1207_cast_fp16 = mul(x = var_12385_cast_fp16, y = var_12386_to_fp16)[name = tensor("aw_chunk_1207_cast_fp16")]; tensor var_12389_equation_0 = const()[name = tensor("op_12389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12389_cast_fp16 = einsum(equation = var_12389_equation_0, values = (var_12115_cast_fp16, var_12079_cast_fp16))[name = tensor("op_12389_cast_fp16")]; tensor var_12390_to_fp16 = const()[name = tensor("op_12390_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1209_cast_fp16 = mul(x = var_12389_cast_fp16, y = var_12390_to_fp16)[name = tensor("aw_chunk_1209_cast_fp16")]; tensor var_12393_equation_0 = const()[name = tensor("op_12393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12393_cast_fp16 = einsum(equation = var_12393_equation_0, values = (var_12115_cast_fp16, var_12080_cast_fp16))[name = tensor("op_12393_cast_fp16")]; tensor var_12394_to_fp16 = const()[name = tensor("op_12394_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1211_cast_fp16 = mul(x = var_12393_cast_fp16, y = var_12394_to_fp16)[name = tensor("aw_chunk_1211_cast_fp16")]; tensor var_12397_equation_0 = const()[name = tensor("op_12397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12397_cast_fp16 = einsum(equation = var_12397_equation_0, values = (var_12115_cast_fp16, var_12081_cast_fp16))[name = tensor("op_12397_cast_fp16")]; tensor var_12398_to_fp16 = const()[name = tensor("op_12398_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1213_cast_fp16 = mul(x = var_12397_cast_fp16, y = var_12398_to_fp16)[name = tensor("aw_chunk_1213_cast_fp16")]; tensor var_12401_equation_0 = const()[name = tensor("op_12401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12401_cast_fp16 = einsum(equation = var_12401_equation_0, values = (var_12115_cast_fp16, var_12082_cast_fp16))[name = tensor("op_12401_cast_fp16")]; tensor var_12402_to_fp16 = const()[name = tensor("op_12402_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1215_cast_fp16 = mul(x = var_12401_cast_fp16, y = var_12402_to_fp16)[name = tensor("aw_chunk_1215_cast_fp16")]; tensor var_12404_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1089_cast_fp16)[name = tensor("op_12404_cast_fp16")]; tensor var_12405_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1091_cast_fp16)[name = tensor("op_12405_cast_fp16")]; tensor var_12406_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1093_cast_fp16)[name = tensor("op_12406_cast_fp16")]; tensor var_12407_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1095_cast_fp16)[name = tensor("op_12407_cast_fp16")]; tensor var_12408_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1097_cast_fp16)[name = tensor("op_12408_cast_fp16")]; tensor var_12409_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1099_cast_fp16)[name = tensor("op_12409_cast_fp16")]; tensor var_12410_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1101_cast_fp16)[name = tensor("op_12410_cast_fp16")]; tensor var_12411_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1103_cast_fp16)[name = tensor("op_12411_cast_fp16")]; tensor var_12412_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1105_cast_fp16)[name = tensor("op_12412_cast_fp16")]; tensor var_12413_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1107_cast_fp16)[name = tensor("op_12413_cast_fp16")]; tensor var_12414_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1109_cast_fp16)[name = tensor("op_12414_cast_fp16")]; tensor var_12415_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1111_cast_fp16)[name = tensor("op_12415_cast_fp16")]; tensor var_12416_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1113_cast_fp16)[name = tensor("op_12416_cast_fp16")]; tensor var_12417_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1115_cast_fp16)[name = tensor("op_12417_cast_fp16")]; tensor var_12418_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1117_cast_fp16)[name = tensor("op_12418_cast_fp16")]; tensor var_12419_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1119_cast_fp16)[name = tensor("op_12419_cast_fp16")]; tensor var_12420_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1121_cast_fp16)[name = tensor("op_12420_cast_fp16")]; tensor var_12421_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1123_cast_fp16)[name = tensor("op_12421_cast_fp16")]; tensor var_12422_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1125_cast_fp16)[name = tensor("op_12422_cast_fp16")]; tensor var_12423_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1127_cast_fp16)[name = tensor("op_12423_cast_fp16")]; tensor var_12424_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1129_cast_fp16)[name = tensor("op_12424_cast_fp16")]; tensor var_12425_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1131_cast_fp16)[name = tensor("op_12425_cast_fp16")]; tensor var_12426_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1133_cast_fp16)[name = tensor("op_12426_cast_fp16")]; tensor var_12427_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1135_cast_fp16)[name = tensor("op_12427_cast_fp16")]; tensor var_12428_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1137_cast_fp16)[name = tensor("op_12428_cast_fp16")]; tensor var_12429_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1139_cast_fp16)[name = tensor("op_12429_cast_fp16")]; tensor var_12430_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1141_cast_fp16)[name = tensor("op_12430_cast_fp16")]; tensor var_12431_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1143_cast_fp16)[name = tensor("op_12431_cast_fp16")]; tensor var_12432_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1145_cast_fp16)[name = tensor("op_12432_cast_fp16")]; tensor var_12433_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1147_cast_fp16)[name = tensor("op_12433_cast_fp16")]; tensor var_12434_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1149_cast_fp16)[name = tensor("op_12434_cast_fp16")]; tensor var_12435_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1151_cast_fp16)[name = tensor("op_12435_cast_fp16")]; tensor var_12436_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1153_cast_fp16)[name = tensor("op_12436_cast_fp16")]; tensor var_12437_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1155_cast_fp16)[name = tensor("op_12437_cast_fp16")]; tensor var_12438_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1157_cast_fp16)[name = tensor("op_12438_cast_fp16")]; tensor var_12439_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1159_cast_fp16)[name = tensor("op_12439_cast_fp16")]; tensor var_12440_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1161_cast_fp16)[name = tensor("op_12440_cast_fp16")]; tensor var_12441_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1163_cast_fp16)[name = tensor("op_12441_cast_fp16")]; tensor var_12442_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1165_cast_fp16)[name = tensor("op_12442_cast_fp16")]; tensor var_12443_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1167_cast_fp16)[name = tensor("op_12443_cast_fp16")]; tensor var_12444_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1169_cast_fp16)[name = tensor("op_12444_cast_fp16")]; tensor var_12445_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1171_cast_fp16)[name = tensor("op_12445_cast_fp16")]; tensor var_12446_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1173_cast_fp16)[name = tensor("op_12446_cast_fp16")]; tensor var_12447_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1175_cast_fp16)[name = tensor("op_12447_cast_fp16")]; tensor var_12448_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1177_cast_fp16)[name = tensor("op_12448_cast_fp16")]; tensor var_12449_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1179_cast_fp16)[name = tensor("op_12449_cast_fp16")]; tensor var_12450_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1181_cast_fp16)[name = tensor("op_12450_cast_fp16")]; tensor var_12451_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1183_cast_fp16)[name = tensor("op_12451_cast_fp16")]; tensor var_12452_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1185_cast_fp16)[name = tensor("op_12452_cast_fp16")]; tensor var_12453_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1187_cast_fp16)[name = tensor("op_12453_cast_fp16")]; tensor var_12454_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1189_cast_fp16)[name = tensor("op_12454_cast_fp16")]; tensor var_12455_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1191_cast_fp16)[name = tensor("op_12455_cast_fp16")]; tensor var_12456_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1193_cast_fp16)[name = tensor("op_12456_cast_fp16")]; tensor var_12457_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1195_cast_fp16)[name = tensor("op_12457_cast_fp16")]; tensor var_12458_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1197_cast_fp16)[name = tensor("op_12458_cast_fp16")]; tensor var_12459_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1199_cast_fp16)[name = tensor("op_12459_cast_fp16")]; tensor var_12460_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1201_cast_fp16)[name = tensor("op_12460_cast_fp16")]; tensor var_12461_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1203_cast_fp16)[name = tensor("op_12461_cast_fp16")]; tensor var_12462_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1205_cast_fp16)[name = tensor("op_12462_cast_fp16")]; tensor var_12463_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1207_cast_fp16)[name = tensor("op_12463_cast_fp16")]; tensor var_12464_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1209_cast_fp16)[name = tensor("op_12464_cast_fp16")]; tensor var_12465_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1211_cast_fp16)[name = tensor("op_12465_cast_fp16")]; tensor var_12466_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1213_cast_fp16)[name = tensor("op_12466_cast_fp16")]; tensor var_12467_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1215_cast_fp16)[name = tensor("op_12467_cast_fp16")]; tensor var_12469_equation_0 = const()[name = tensor("op_12469_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12469_cast_fp16 = einsum(equation = var_12469_equation_0, values = (var_12117_cast_fp16, var_12404_cast_fp16))[name = tensor("op_12469_cast_fp16")]; tensor var_12471_equation_0 = const()[name = tensor("op_12471_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12471_cast_fp16 = einsum(equation = var_12471_equation_0, values = (var_12117_cast_fp16, var_12405_cast_fp16))[name = tensor("op_12471_cast_fp16")]; tensor var_12473_equation_0 = const()[name = tensor("op_12473_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12473_cast_fp16 = einsum(equation = var_12473_equation_0, values = (var_12117_cast_fp16, var_12406_cast_fp16))[name = tensor("op_12473_cast_fp16")]; tensor var_12475_equation_0 = const()[name = tensor("op_12475_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12475_cast_fp16 = einsum(equation = var_12475_equation_0, values = (var_12117_cast_fp16, var_12407_cast_fp16))[name = tensor("op_12475_cast_fp16")]; tensor var_12477_equation_0 = const()[name = tensor("op_12477_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12477_cast_fp16 = einsum(equation = var_12477_equation_0, values = (var_12117_cast_fp16, var_12408_cast_fp16))[name = tensor("op_12477_cast_fp16")]; tensor var_12479_equation_0 = const()[name = tensor("op_12479_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12479_cast_fp16 = einsum(equation = var_12479_equation_0, values = (var_12117_cast_fp16, var_12409_cast_fp16))[name = tensor("op_12479_cast_fp16")]; tensor var_12481_equation_0 = const()[name = tensor("op_12481_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12481_cast_fp16 = einsum(equation = var_12481_equation_0, values = (var_12117_cast_fp16, var_12410_cast_fp16))[name = tensor("op_12481_cast_fp16")]; tensor var_12483_equation_0 = const()[name = tensor("op_12483_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12483_cast_fp16 = einsum(equation = var_12483_equation_0, values = (var_12117_cast_fp16, var_12411_cast_fp16))[name = tensor("op_12483_cast_fp16")]; tensor var_12485_equation_0 = const()[name = tensor("op_12485_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12485_cast_fp16 = einsum(equation = var_12485_equation_0, values = (var_12121_cast_fp16, var_12412_cast_fp16))[name = tensor("op_12485_cast_fp16")]; tensor var_12487_equation_0 = const()[name = tensor("op_12487_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12487_cast_fp16 = einsum(equation = var_12487_equation_0, values = (var_12121_cast_fp16, var_12413_cast_fp16))[name = tensor("op_12487_cast_fp16")]; tensor var_12489_equation_0 = const()[name = tensor("op_12489_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12489_cast_fp16 = einsum(equation = var_12489_equation_0, values = (var_12121_cast_fp16, var_12414_cast_fp16))[name = tensor("op_12489_cast_fp16")]; tensor var_12491_equation_0 = const()[name = tensor("op_12491_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12491_cast_fp16 = einsum(equation = var_12491_equation_0, values = (var_12121_cast_fp16, var_12415_cast_fp16))[name = tensor("op_12491_cast_fp16")]; tensor var_12493_equation_0 = const()[name = tensor("op_12493_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12493_cast_fp16 = einsum(equation = var_12493_equation_0, values = (var_12121_cast_fp16, var_12416_cast_fp16))[name = tensor("op_12493_cast_fp16")]; tensor var_12495_equation_0 = const()[name = tensor("op_12495_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12495_cast_fp16 = einsum(equation = var_12495_equation_0, values = (var_12121_cast_fp16, var_12417_cast_fp16))[name = tensor("op_12495_cast_fp16")]; tensor var_12497_equation_0 = const()[name = tensor("op_12497_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12497_cast_fp16 = einsum(equation = var_12497_equation_0, values = (var_12121_cast_fp16, var_12418_cast_fp16))[name = tensor("op_12497_cast_fp16")]; tensor var_12499_equation_0 = const()[name = tensor("op_12499_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12499_cast_fp16 = einsum(equation = var_12499_equation_0, values = (var_12121_cast_fp16, var_12419_cast_fp16))[name = tensor("op_12499_cast_fp16")]; tensor var_12501_equation_0 = const()[name = tensor("op_12501_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12501_cast_fp16 = einsum(equation = var_12501_equation_0, values = (var_12125_cast_fp16, var_12420_cast_fp16))[name = tensor("op_12501_cast_fp16")]; tensor var_12503_equation_0 = const()[name = tensor("op_12503_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12503_cast_fp16 = einsum(equation = var_12503_equation_0, values = (var_12125_cast_fp16, var_12421_cast_fp16))[name = tensor("op_12503_cast_fp16")]; tensor var_12505_equation_0 = const()[name = tensor("op_12505_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12505_cast_fp16 = einsum(equation = var_12505_equation_0, values = (var_12125_cast_fp16, var_12422_cast_fp16))[name = tensor("op_12505_cast_fp16")]; tensor var_12507_equation_0 = const()[name = tensor("op_12507_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12507_cast_fp16 = einsum(equation = var_12507_equation_0, values = (var_12125_cast_fp16, var_12423_cast_fp16))[name = tensor("op_12507_cast_fp16")]; tensor var_12509_equation_0 = const()[name = tensor("op_12509_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12509_cast_fp16 = einsum(equation = var_12509_equation_0, values = (var_12125_cast_fp16, var_12424_cast_fp16))[name = tensor("op_12509_cast_fp16")]; tensor var_12511_equation_0 = const()[name = tensor("op_12511_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12511_cast_fp16 = einsum(equation = var_12511_equation_0, values = (var_12125_cast_fp16, var_12425_cast_fp16))[name = tensor("op_12511_cast_fp16")]; tensor var_12513_equation_0 = const()[name = tensor("op_12513_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12513_cast_fp16 = einsum(equation = var_12513_equation_0, values = (var_12125_cast_fp16, var_12426_cast_fp16))[name = tensor("op_12513_cast_fp16")]; tensor var_12515_equation_0 = const()[name = tensor("op_12515_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12515_cast_fp16 = einsum(equation = var_12515_equation_0, values = (var_12125_cast_fp16, var_12427_cast_fp16))[name = tensor("op_12515_cast_fp16")]; tensor var_12517_equation_0 = const()[name = tensor("op_12517_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12517_cast_fp16 = einsum(equation = var_12517_equation_0, values = (var_12129_cast_fp16, var_12428_cast_fp16))[name = tensor("op_12517_cast_fp16")]; tensor var_12519_equation_0 = const()[name = tensor("op_12519_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12519_cast_fp16 = einsum(equation = var_12519_equation_0, values = (var_12129_cast_fp16, var_12429_cast_fp16))[name = tensor("op_12519_cast_fp16")]; tensor var_12521_equation_0 = const()[name = tensor("op_12521_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12521_cast_fp16 = einsum(equation = var_12521_equation_0, values = (var_12129_cast_fp16, var_12430_cast_fp16))[name = tensor("op_12521_cast_fp16")]; tensor var_12523_equation_0 = const()[name = tensor("op_12523_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12523_cast_fp16 = einsum(equation = var_12523_equation_0, values = (var_12129_cast_fp16, var_12431_cast_fp16))[name = tensor("op_12523_cast_fp16")]; tensor var_12525_equation_0 = const()[name = tensor("op_12525_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12525_cast_fp16 = einsum(equation = var_12525_equation_0, values = (var_12129_cast_fp16, var_12432_cast_fp16))[name = tensor("op_12525_cast_fp16")]; tensor var_12527_equation_0 = const()[name = tensor("op_12527_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12527_cast_fp16 = einsum(equation = var_12527_equation_0, values = (var_12129_cast_fp16, var_12433_cast_fp16))[name = tensor("op_12527_cast_fp16")]; tensor var_12529_equation_0 = const()[name = tensor("op_12529_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12529_cast_fp16 = einsum(equation = var_12529_equation_0, values = (var_12129_cast_fp16, var_12434_cast_fp16))[name = tensor("op_12529_cast_fp16")]; tensor var_12531_equation_0 = const()[name = tensor("op_12531_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12531_cast_fp16 = einsum(equation = var_12531_equation_0, values = (var_12129_cast_fp16, var_12435_cast_fp16))[name = tensor("op_12531_cast_fp16")]; tensor var_12533_equation_0 = const()[name = tensor("op_12533_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12533_cast_fp16 = einsum(equation = var_12533_equation_0, values = (var_12133_cast_fp16, var_12436_cast_fp16))[name = tensor("op_12533_cast_fp16")]; tensor var_12535_equation_0 = const()[name = tensor("op_12535_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12535_cast_fp16 = einsum(equation = var_12535_equation_0, values = (var_12133_cast_fp16, var_12437_cast_fp16))[name = tensor("op_12535_cast_fp16")]; tensor var_12537_equation_0 = const()[name = tensor("op_12537_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12537_cast_fp16 = einsum(equation = var_12537_equation_0, values = (var_12133_cast_fp16, var_12438_cast_fp16))[name = tensor("op_12537_cast_fp16")]; tensor var_12539_equation_0 = const()[name = tensor("op_12539_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12539_cast_fp16 = einsum(equation = var_12539_equation_0, values = (var_12133_cast_fp16, var_12439_cast_fp16))[name = tensor("op_12539_cast_fp16")]; tensor var_12541_equation_0 = const()[name = tensor("op_12541_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12541_cast_fp16 = einsum(equation = var_12541_equation_0, values = (var_12133_cast_fp16, var_12440_cast_fp16))[name = tensor("op_12541_cast_fp16")]; tensor var_12543_equation_0 = const()[name = tensor("op_12543_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12543_cast_fp16 = einsum(equation = var_12543_equation_0, values = (var_12133_cast_fp16, var_12441_cast_fp16))[name = tensor("op_12543_cast_fp16")]; tensor var_12545_equation_0 = const()[name = tensor("op_12545_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12545_cast_fp16 = einsum(equation = var_12545_equation_0, values = (var_12133_cast_fp16, var_12442_cast_fp16))[name = tensor("op_12545_cast_fp16")]; tensor var_12547_equation_0 = const()[name = tensor("op_12547_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12547_cast_fp16 = einsum(equation = var_12547_equation_0, values = (var_12133_cast_fp16, var_12443_cast_fp16))[name = tensor("op_12547_cast_fp16")]; tensor var_12549_equation_0 = const()[name = tensor("op_12549_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12549_cast_fp16 = einsum(equation = var_12549_equation_0, values = (var_12137_cast_fp16, var_12444_cast_fp16))[name = tensor("op_12549_cast_fp16")]; tensor var_12551_equation_0 = const()[name = tensor("op_12551_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12551_cast_fp16 = einsum(equation = var_12551_equation_0, values = (var_12137_cast_fp16, var_12445_cast_fp16))[name = tensor("op_12551_cast_fp16")]; tensor var_12553_equation_0 = const()[name = tensor("op_12553_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12553_cast_fp16 = einsum(equation = var_12553_equation_0, values = (var_12137_cast_fp16, var_12446_cast_fp16))[name = tensor("op_12553_cast_fp16")]; tensor var_12555_equation_0 = const()[name = tensor("op_12555_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12555_cast_fp16 = einsum(equation = var_12555_equation_0, values = (var_12137_cast_fp16, var_12447_cast_fp16))[name = tensor("op_12555_cast_fp16")]; tensor var_12557_equation_0 = const()[name = tensor("op_12557_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12557_cast_fp16 = einsum(equation = var_12557_equation_0, values = (var_12137_cast_fp16, var_12448_cast_fp16))[name = tensor("op_12557_cast_fp16")]; tensor var_12559_equation_0 = const()[name = tensor("op_12559_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12559_cast_fp16 = einsum(equation = var_12559_equation_0, values = (var_12137_cast_fp16, var_12449_cast_fp16))[name = tensor("op_12559_cast_fp16")]; tensor var_12561_equation_0 = const()[name = tensor("op_12561_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12561_cast_fp16 = einsum(equation = var_12561_equation_0, values = (var_12137_cast_fp16, var_12450_cast_fp16))[name = tensor("op_12561_cast_fp16")]; tensor var_12563_equation_0 = const()[name = tensor("op_12563_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12563_cast_fp16 = einsum(equation = var_12563_equation_0, values = (var_12137_cast_fp16, var_12451_cast_fp16))[name = tensor("op_12563_cast_fp16")]; tensor var_12565_equation_0 = const()[name = tensor("op_12565_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12565_cast_fp16 = einsum(equation = var_12565_equation_0, values = (var_12141_cast_fp16, var_12452_cast_fp16))[name = tensor("op_12565_cast_fp16")]; tensor var_12567_equation_0 = const()[name = tensor("op_12567_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12567_cast_fp16 = einsum(equation = var_12567_equation_0, values = (var_12141_cast_fp16, var_12453_cast_fp16))[name = tensor("op_12567_cast_fp16")]; tensor var_12569_equation_0 = const()[name = tensor("op_12569_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12569_cast_fp16 = einsum(equation = var_12569_equation_0, values = (var_12141_cast_fp16, var_12454_cast_fp16))[name = tensor("op_12569_cast_fp16")]; tensor var_12571_equation_0 = const()[name = tensor("op_12571_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12571_cast_fp16 = einsum(equation = var_12571_equation_0, values = (var_12141_cast_fp16, var_12455_cast_fp16))[name = tensor("op_12571_cast_fp16")]; tensor var_12573_equation_0 = const()[name = tensor("op_12573_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12573_cast_fp16 = einsum(equation = var_12573_equation_0, values = (var_12141_cast_fp16, var_12456_cast_fp16))[name = tensor("op_12573_cast_fp16")]; tensor var_12575_equation_0 = const()[name = tensor("op_12575_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12575_cast_fp16 = einsum(equation = var_12575_equation_0, values = (var_12141_cast_fp16, var_12457_cast_fp16))[name = tensor("op_12575_cast_fp16")]; tensor var_12577_equation_0 = const()[name = tensor("op_12577_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12577_cast_fp16 = einsum(equation = var_12577_equation_0, values = (var_12141_cast_fp16, var_12458_cast_fp16))[name = tensor("op_12577_cast_fp16")]; tensor var_12579_equation_0 = const()[name = tensor("op_12579_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12579_cast_fp16 = einsum(equation = var_12579_equation_0, values = (var_12141_cast_fp16, var_12459_cast_fp16))[name = tensor("op_12579_cast_fp16")]; tensor var_12581_equation_0 = const()[name = tensor("op_12581_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12581_cast_fp16 = einsum(equation = var_12581_equation_0, values = (var_12145_cast_fp16, var_12460_cast_fp16))[name = tensor("op_12581_cast_fp16")]; tensor var_12583_equation_0 = const()[name = tensor("op_12583_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12583_cast_fp16 = einsum(equation = var_12583_equation_0, values = (var_12145_cast_fp16, var_12461_cast_fp16))[name = tensor("op_12583_cast_fp16")]; tensor var_12585_equation_0 = const()[name = tensor("op_12585_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12585_cast_fp16 = einsum(equation = var_12585_equation_0, values = (var_12145_cast_fp16, var_12462_cast_fp16))[name = tensor("op_12585_cast_fp16")]; tensor var_12587_equation_0 = const()[name = tensor("op_12587_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12587_cast_fp16 = einsum(equation = var_12587_equation_0, values = (var_12145_cast_fp16, var_12463_cast_fp16))[name = tensor("op_12587_cast_fp16")]; tensor var_12589_equation_0 = const()[name = tensor("op_12589_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12589_cast_fp16 = einsum(equation = var_12589_equation_0, values = (var_12145_cast_fp16, var_12464_cast_fp16))[name = tensor("op_12589_cast_fp16")]; tensor var_12591_equation_0 = const()[name = tensor("op_12591_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12591_cast_fp16 = einsum(equation = var_12591_equation_0, values = (var_12145_cast_fp16, var_12465_cast_fp16))[name = tensor("op_12591_cast_fp16")]; tensor var_12593_equation_0 = const()[name = tensor("op_12593_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12593_cast_fp16 = einsum(equation = var_12593_equation_0, values = (var_12145_cast_fp16, var_12466_cast_fp16))[name = tensor("op_12593_cast_fp16")]; tensor var_12595_equation_0 = const()[name = tensor("op_12595_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12595_cast_fp16 = einsum(equation = var_12595_equation_0, values = (var_12145_cast_fp16, var_12467_cast_fp16))[name = tensor("op_12595_cast_fp16")]; tensor var_12597_interleave_0 = const()[name = tensor("op_12597_interleave_0"), val = tensor(false)]; tensor var_12597_cast_fp16 = concat(axis = var_10347, interleave = var_12597_interleave_0, values = (var_12469_cast_fp16, var_12471_cast_fp16, var_12473_cast_fp16, var_12475_cast_fp16, var_12477_cast_fp16, var_12479_cast_fp16, var_12481_cast_fp16, var_12483_cast_fp16))[name = tensor("op_12597_cast_fp16")]; tensor var_12599_interleave_0 = const()[name = tensor("op_12599_interleave_0"), val = tensor(false)]; tensor var_12599_cast_fp16 = concat(axis = var_10347, interleave = var_12599_interleave_0, values = (var_12485_cast_fp16, var_12487_cast_fp16, var_12489_cast_fp16, var_12491_cast_fp16, var_12493_cast_fp16, var_12495_cast_fp16, var_12497_cast_fp16, var_12499_cast_fp16))[name = tensor("op_12599_cast_fp16")]; tensor var_12601_interleave_0 = const()[name = tensor("op_12601_interleave_0"), val = tensor(false)]; tensor var_12601_cast_fp16 = concat(axis = var_10347, interleave = var_12601_interleave_0, values = (var_12501_cast_fp16, var_12503_cast_fp16, var_12505_cast_fp16, var_12507_cast_fp16, var_12509_cast_fp16, var_12511_cast_fp16, var_12513_cast_fp16, var_12515_cast_fp16))[name = tensor("op_12601_cast_fp16")]; tensor var_12603_interleave_0 = const()[name = tensor("op_12603_interleave_0"), val = tensor(false)]; tensor var_12603_cast_fp16 = concat(axis = var_10347, interleave = var_12603_interleave_0, values = (var_12517_cast_fp16, var_12519_cast_fp16, var_12521_cast_fp16, var_12523_cast_fp16, var_12525_cast_fp16, var_12527_cast_fp16, var_12529_cast_fp16, var_12531_cast_fp16))[name = tensor("op_12603_cast_fp16")]; tensor var_12605_interleave_0 = const()[name = tensor("op_12605_interleave_0"), val = tensor(false)]; tensor var_12605_cast_fp16 = concat(axis = var_10347, interleave = var_12605_interleave_0, values = (var_12533_cast_fp16, var_12535_cast_fp16, var_12537_cast_fp16, var_12539_cast_fp16, var_12541_cast_fp16, var_12543_cast_fp16, var_12545_cast_fp16, var_12547_cast_fp16))[name = tensor("op_12605_cast_fp16")]; tensor var_12607_interleave_0 = const()[name = tensor("op_12607_interleave_0"), val = tensor(false)]; tensor var_12607_cast_fp16 = concat(axis = var_10347, interleave = var_12607_interleave_0, values = (var_12549_cast_fp16, var_12551_cast_fp16, var_12553_cast_fp16, var_12555_cast_fp16, var_12557_cast_fp16, var_12559_cast_fp16, var_12561_cast_fp16, var_12563_cast_fp16))[name = tensor("op_12607_cast_fp16")]; tensor var_12609_interleave_0 = const()[name = tensor("op_12609_interleave_0"), val = tensor(false)]; tensor var_12609_cast_fp16 = concat(axis = var_10347, interleave = var_12609_interleave_0, values = (var_12565_cast_fp16, var_12567_cast_fp16, var_12569_cast_fp16, var_12571_cast_fp16, var_12573_cast_fp16, var_12575_cast_fp16, var_12577_cast_fp16, var_12579_cast_fp16))[name = tensor("op_12609_cast_fp16")]; tensor var_12611_interleave_0 = const()[name = tensor("op_12611_interleave_0"), val = tensor(false)]; tensor var_12611_cast_fp16 = concat(axis = var_10347, interleave = var_12611_interleave_0, values = (var_12581_cast_fp16, var_12583_cast_fp16, var_12585_cast_fp16, var_12587_cast_fp16, var_12589_cast_fp16, var_12591_cast_fp16, var_12593_cast_fp16, var_12595_cast_fp16))[name = tensor("op_12611_cast_fp16")]; tensor input_491_interleave_0 = const()[name = tensor("input_491_interleave_0"), val = tensor(false)]; tensor input_491_cast_fp16 = concat(axis = var_10375, interleave = input_491_interleave_0, values = (var_12597_cast_fp16, var_12599_cast_fp16, var_12601_cast_fp16, var_12603_cast_fp16, var_12605_cast_fp16, var_12607_cast_fp16, var_12609_cast_fp16, var_12611_cast_fp16))[name = tensor("input_491_cast_fp16")]; tensor var_12617 = const()[name = tensor("op_12617"), val = tensor([1, 1])]; tensor var_12619 = const()[name = tensor("op_12619"), val = tensor([1, 1])]; tensor var_12621_pad_type_0 = const()[name = tensor("op_12621_pad_type_0"), val = tensor("custom")]; tensor var_12621_pad_0 = const()[name = tensor("op_12621_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1702962304)))]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1703167168)))]; tensor var_12621_cast_fp16 = conv(bias = up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_12619, groups = var_10375, pad = var_12621_pad_0, pad_type = var_12621_pad_type_0, strides = var_12617, weight = up_blocks_3_attentions_1_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_491_cast_fp16)[name = tensor("op_12621_cast_fp16")]; tensor inputs_87_cast_fp16 = add(x = var_12621_cast_fp16, y = inputs_85_cast_fp16)[name = tensor("inputs_87_cast_fp16")]; tensor hidden_states_309_axes_0 = const()[name = tensor("hidden_states_309_axes_0"), val = tensor([1])]; tensor hidden_states_309_gamma_0_to_fp16 = const()[name = tensor("hidden_states_309_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1703167872)))]; tensor hidden_states_309_beta_0_to_fp16 = const()[name = tensor("hidden_states_309_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1703168576)))]; tensor var_12631_to_fp16 = const()[name = tensor("op_12631_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_309_cast_fp16 = layer_norm(axes = hidden_states_309_axes_0, beta = hidden_states_309_beta_0_to_fp16, epsilon = var_12631_to_fp16, gamma = hidden_states_309_gamma_0_to_fp16, x = inputs_87_cast_fp16)[name = tensor("hidden_states_309_cast_fp16")]; tensor var_12646 = const()[name = tensor("op_12646"), val = tensor([1, 1])]; tensor var_12648 = const()[name = tensor("op_12648"), val = tensor([1, 1])]; tensor q_59_pad_type_0 = const()[name = tensor("q_59_pad_type_0"), val = tensor("custom")]; tensor q_59_pad_0 = const()[name = tensor("q_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1703169280)))]; tensor q_59_cast_fp16 = conv(dilations = var_12648, groups = var_10375, pad = q_59_pad_0, pad_type = q_59_pad_type_0, strides = var_12646, weight = up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_309_cast_fp16)[name = tensor("q_59_cast_fp16")]; tensor var_12652 = const()[name = tensor("op_12652"), val = tensor([1, 1])]; tensor var_12654 = const()[name = tensor("op_12654"), val = tensor([1, 1])]; tensor k_117_pad_type_0 = const()[name = tensor("k_117_pad_type_0"), val = tensor("custom")]; tensor k_117_pad_0 = const()[name = tensor("k_117_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1703374144)))]; tensor k_117_cast_fp16 = conv(dilations = var_12654, groups = var_10375, pad = k_117_pad_0, pad_type = k_117_pad_type_0, strides = var_12652, weight = up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_117_cast_fp16")]; tensor var_12658 = const()[name = tensor("op_12658"), val = tensor([1, 1])]; tensor var_12660 = const()[name = tensor("op_12660"), val = tensor([1, 1])]; tensor v_59_pad_type_0 = const()[name = tensor("v_59_pad_type_0"), val = tensor("custom")]; tensor v_59_pad_0 = const()[name = tensor("v_59_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1703865728)))]; tensor v_59_cast_fp16 = conv(dilations = var_12660, groups = var_10375, pad = v_59_pad_0, pad_type = v_59_pad_type_0, strides = var_12658, weight = up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_59_cast_fp16")]; tensor var_12664_begin_0 = const()[name = tensor("op_12664_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12664_end_0 = const()[name = tensor("op_12664_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12664_end_mask_0 = const()[name = tensor("op_12664_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12664_cast_fp16 = slice_by_index(begin = var_12664_begin_0, end = var_12664_end_0, end_mask = var_12664_end_mask_0, x = q_59_cast_fp16)[name = tensor("op_12664_cast_fp16")]; tensor var_12668_begin_0 = const()[name = tensor("op_12668_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_12668_end_0 = const()[name = tensor("op_12668_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_12668_end_mask_0 = const()[name = tensor("op_12668_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12668_cast_fp16 = slice_by_index(begin = var_12668_begin_0, end = var_12668_end_0, end_mask = var_12668_end_mask_0, x = q_59_cast_fp16)[name = tensor("op_12668_cast_fp16")]; tensor var_12672_begin_0 = const()[name = tensor("op_12672_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_12672_end_0 = const()[name = tensor("op_12672_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_12672_end_mask_0 = const()[name = tensor("op_12672_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12672_cast_fp16 = slice_by_index(begin = var_12672_begin_0, end = var_12672_end_0, end_mask = var_12672_end_mask_0, x = q_59_cast_fp16)[name = tensor("op_12672_cast_fp16")]; tensor var_12676_begin_0 = const()[name = tensor("op_12676_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_12676_end_0 = const()[name = tensor("op_12676_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_12676_end_mask_0 = const()[name = tensor("op_12676_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12676_cast_fp16 = slice_by_index(begin = var_12676_begin_0, end = var_12676_end_0, end_mask = var_12676_end_mask_0, x = q_59_cast_fp16)[name = tensor("op_12676_cast_fp16")]; tensor var_12680_begin_0 = const()[name = tensor("op_12680_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_12680_end_0 = const()[name = tensor("op_12680_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_12680_end_mask_0 = const()[name = tensor("op_12680_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12680_cast_fp16 = slice_by_index(begin = var_12680_begin_0, end = var_12680_end_0, end_mask = var_12680_end_mask_0, x = q_59_cast_fp16)[name = tensor("op_12680_cast_fp16")]; tensor var_12684_begin_0 = const()[name = tensor("op_12684_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_12684_end_0 = const()[name = tensor("op_12684_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_12684_end_mask_0 = const()[name = tensor("op_12684_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12684_cast_fp16 = slice_by_index(begin = var_12684_begin_0, end = var_12684_end_0, end_mask = var_12684_end_mask_0, x = q_59_cast_fp16)[name = tensor("op_12684_cast_fp16")]; tensor var_12688_begin_0 = const()[name = tensor("op_12688_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_12688_end_0 = const()[name = tensor("op_12688_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_12688_end_mask_0 = const()[name = tensor("op_12688_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12688_cast_fp16 = slice_by_index(begin = var_12688_begin_0, end = var_12688_end_0, end_mask = var_12688_end_mask_0, x = q_59_cast_fp16)[name = tensor("op_12688_cast_fp16")]; tensor var_12692_begin_0 = const()[name = tensor("op_12692_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_12692_end_0 = const()[name = tensor("op_12692_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_12692_end_mask_0 = const()[name = tensor("op_12692_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12692_cast_fp16 = slice_by_index(begin = var_12692_begin_0, end = var_12692_end_0, end_mask = var_12692_end_mask_0, x = q_59_cast_fp16)[name = tensor("op_12692_cast_fp16")]; tensor var_12695_begin_0 = const()[name = tensor("op_12695_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12695_end_0 = const()[name = tensor("op_12695_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12695_end_mask_0 = const()[name = tensor("op_12695_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12695_cast_fp16 = slice_by_index(begin = var_12695_begin_0, end = var_12695_end_0, end_mask = var_12695_end_mask_0, x = var_12664_cast_fp16)[name = tensor("op_12695_cast_fp16")]; tensor var_12696_begin_0 = const()[name = tensor("op_12696_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12696_end_0 = const()[name = tensor("op_12696_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12696_end_mask_0 = const()[name = tensor("op_12696_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12696_cast_fp16 = slice_by_index(begin = var_12696_begin_0, end = var_12696_end_0, end_mask = var_12696_end_mask_0, x = var_12664_cast_fp16)[name = tensor("op_12696_cast_fp16")]; tensor var_12697_begin_0 = const()[name = tensor("op_12697_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12697_end_0 = const()[name = tensor("op_12697_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12697_end_mask_0 = const()[name = tensor("op_12697_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12697_cast_fp16 = slice_by_index(begin = var_12697_begin_0, end = var_12697_end_0, end_mask = var_12697_end_mask_0, x = var_12664_cast_fp16)[name = tensor("op_12697_cast_fp16")]; tensor var_12698_begin_0 = const()[name = tensor("op_12698_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12698_end_0 = const()[name = tensor("op_12698_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12698_end_mask_0 = const()[name = tensor("op_12698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12698_cast_fp16 = slice_by_index(begin = var_12698_begin_0, end = var_12698_end_0, end_mask = var_12698_end_mask_0, x = var_12664_cast_fp16)[name = tensor("op_12698_cast_fp16")]; tensor var_12699_begin_0 = const()[name = tensor("op_12699_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12699_end_0 = const()[name = tensor("op_12699_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12699_end_mask_0 = const()[name = tensor("op_12699_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12699_cast_fp16 = slice_by_index(begin = var_12699_begin_0, end = var_12699_end_0, end_mask = var_12699_end_mask_0, x = var_12664_cast_fp16)[name = tensor("op_12699_cast_fp16")]; tensor var_12700_begin_0 = const()[name = tensor("op_12700_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12700_end_0 = const()[name = tensor("op_12700_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12700_end_mask_0 = const()[name = tensor("op_12700_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12700_cast_fp16 = slice_by_index(begin = var_12700_begin_0, end = var_12700_end_0, end_mask = var_12700_end_mask_0, x = var_12664_cast_fp16)[name = tensor("op_12700_cast_fp16")]; tensor var_12701_begin_0 = const()[name = tensor("op_12701_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12701_end_0 = const()[name = tensor("op_12701_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12701_end_mask_0 = const()[name = tensor("op_12701_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12701_cast_fp16 = slice_by_index(begin = var_12701_begin_0, end = var_12701_end_0, end_mask = var_12701_end_mask_0, x = var_12664_cast_fp16)[name = tensor("op_12701_cast_fp16")]; tensor var_12702_begin_0 = const()[name = tensor("op_12702_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12702_end_0 = const()[name = tensor("op_12702_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12702_end_mask_0 = const()[name = tensor("op_12702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12702_cast_fp16 = slice_by_index(begin = var_12702_begin_0, end = var_12702_end_0, end_mask = var_12702_end_mask_0, x = var_12664_cast_fp16)[name = tensor("op_12702_cast_fp16")]; tensor var_12703_begin_0 = const()[name = tensor("op_12703_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12703_end_0 = const()[name = tensor("op_12703_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12703_end_mask_0 = const()[name = tensor("op_12703_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12703_cast_fp16 = slice_by_index(begin = var_12703_begin_0, end = var_12703_end_0, end_mask = var_12703_end_mask_0, x = var_12668_cast_fp16)[name = tensor("op_12703_cast_fp16")]; tensor var_12704_begin_0 = const()[name = tensor("op_12704_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12704_end_0 = const()[name = tensor("op_12704_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12704_end_mask_0 = const()[name = tensor("op_12704_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12704_cast_fp16 = slice_by_index(begin = var_12704_begin_0, end = var_12704_end_0, end_mask = var_12704_end_mask_0, x = var_12668_cast_fp16)[name = tensor("op_12704_cast_fp16")]; tensor var_12705_begin_0 = const()[name = tensor("op_12705_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12705_end_0 = const()[name = tensor("op_12705_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12705_end_mask_0 = const()[name = tensor("op_12705_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12705_cast_fp16 = slice_by_index(begin = var_12705_begin_0, end = var_12705_end_0, end_mask = var_12705_end_mask_0, x = var_12668_cast_fp16)[name = tensor("op_12705_cast_fp16")]; tensor var_12706_begin_0 = const()[name = tensor("op_12706_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12706_end_0 = const()[name = tensor("op_12706_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12706_end_mask_0 = const()[name = tensor("op_12706_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12706_cast_fp16 = slice_by_index(begin = var_12706_begin_0, end = var_12706_end_0, end_mask = var_12706_end_mask_0, x = var_12668_cast_fp16)[name = tensor("op_12706_cast_fp16")]; tensor var_12707_begin_0 = const()[name = tensor("op_12707_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12707_end_0 = const()[name = tensor("op_12707_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12707_end_mask_0 = const()[name = tensor("op_12707_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12707_cast_fp16 = slice_by_index(begin = var_12707_begin_0, end = var_12707_end_0, end_mask = var_12707_end_mask_0, x = var_12668_cast_fp16)[name = tensor("op_12707_cast_fp16")]; tensor var_12708_begin_0 = const()[name = tensor("op_12708_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12708_end_0 = const()[name = tensor("op_12708_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12708_end_mask_0 = const()[name = tensor("op_12708_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12708_cast_fp16 = slice_by_index(begin = var_12708_begin_0, end = var_12708_end_0, end_mask = var_12708_end_mask_0, x = var_12668_cast_fp16)[name = tensor("op_12708_cast_fp16")]; tensor var_12709_begin_0 = const()[name = tensor("op_12709_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12709_end_0 = const()[name = tensor("op_12709_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12709_end_mask_0 = const()[name = tensor("op_12709_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12709_cast_fp16 = slice_by_index(begin = var_12709_begin_0, end = var_12709_end_0, end_mask = var_12709_end_mask_0, x = var_12668_cast_fp16)[name = tensor("op_12709_cast_fp16")]; tensor var_12710_begin_0 = const()[name = tensor("op_12710_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12710_end_0 = const()[name = tensor("op_12710_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12710_end_mask_0 = const()[name = tensor("op_12710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12710_cast_fp16 = slice_by_index(begin = var_12710_begin_0, end = var_12710_end_0, end_mask = var_12710_end_mask_0, x = var_12668_cast_fp16)[name = tensor("op_12710_cast_fp16")]; tensor var_12711_begin_0 = const()[name = tensor("op_12711_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12711_end_0 = const()[name = tensor("op_12711_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12711_end_mask_0 = const()[name = tensor("op_12711_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12711_cast_fp16 = slice_by_index(begin = var_12711_begin_0, end = var_12711_end_0, end_mask = var_12711_end_mask_0, x = var_12672_cast_fp16)[name = tensor("op_12711_cast_fp16")]; tensor var_12712_begin_0 = const()[name = tensor("op_12712_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12712_end_0 = const()[name = tensor("op_12712_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12712_end_mask_0 = const()[name = tensor("op_12712_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12712_cast_fp16 = slice_by_index(begin = var_12712_begin_0, end = var_12712_end_0, end_mask = var_12712_end_mask_0, x = var_12672_cast_fp16)[name = tensor("op_12712_cast_fp16")]; tensor var_12713_begin_0 = const()[name = tensor("op_12713_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12713_end_0 = const()[name = tensor("op_12713_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12713_end_mask_0 = const()[name = tensor("op_12713_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12713_cast_fp16 = slice_by_index(begin = var_12713_begin_0, end = var_12713_end_0, end_mask = var_12713_end_mask_0, x = var_12672_cast_fp16)[name = tensor("op_12713_cast_fp16")]; tensor var_12714_begin_0 = const()[name = tensor("op_12714_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12714_end_0 = const()[name = tensor("op_12714_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12714_end_mask_0 = const()[name = tensor("op_12714_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12714_cast_fp16 = slice_by_index(begin = var_12714_begin_0, end = var_12714_end_0, end_mask = var_12714_end_mask_0, x = var_12672_cast_fp16)[name = tensor("op_12714_cast_fp16")]; tensor var_12715_begin_0 = const()[name = tensor("op_12715_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12715_end_0 = const()[name = tensor("op_12715_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12715_end_mask_0 = const()[name = tensor("op_12715_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12715_cast_fp16 = slice_by_index(begin = var_12715_begin_0, end = var_12715_end_0, end_mask = var_12715_end_mask_0, x = var_12672_cast_fp16)[name = tensor("op_12715_cast_fp16")]; tensor var_12716_begin_0 = const()[name = tensor("op_12716_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12716_end_0 = const()[name = tensor("op_12716_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12716_end_mask_0 = const()[name = tensor("op_12716_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12716_cast_fp16 = slice_by_index(begin = var_12716_begin_0, end = var_12716_end_0, end_mask = var_12716_end_mask_0, x = var_12672_cast_fp16)[name = tensor("op_12716_cast_fp16")]; tensor var_12717_begin_0 = const()[name = tensor("op_12717_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12717_end_0 = const()[name = tensor("op_12717_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12717_end_mask_0 = const()[name = tensor("op_12717_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12717_cast_fp16 = slice_by_index(begin = var_12717_begin_0, end = var_12717_end_0, end_mask = var_12717_end_mask_0, x = var_12672_cast_fp16)[name = tensor("op_12717_cast_fp16")]; tensor var_12718_begin_0 = const()[name = tensor("op_12718_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12718_end_0 = const()[name = tensor("op_12718_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12718_end_mask_0 = const()[name = tensor("op_12718_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12718_cast_fp16 = slice_by_index(begin = var_12718_begin_0, end = var_12718_end_0, end_mask = var_12718_end_mask_0, x = var_12672_cast_fp16)[name = tensor("op_12718_cast_fp16")]; tensor var_12719_begin_0 = const()[name = tensor("op_12719_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12719_end_0 = const()[name = tensor("op_12719_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12719_end_mask_0 = const()[name = tensor("op_12719_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12719_cast_fp16 = slice_by_index(begin = var_12719_begin_0, end = var_12719_end_0, end_mask = var_12719_end_mask_0, x = var_12676_cast_fp16)[name = tensor("op_12719_cast_fp16")]; tensor var_12720_begin_0 = const()[name = tensor("op_12720_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12720_end_0 = const()[name = tensor("op_12720_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12720_end_mask_0 = const()[name = tensor("op_12720_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12720_cast_fp16 = slice_by_index(begin = var_12720_begin_0, end = var_12720_end_0, end_mask = var_12720_end_mask_0, x = var_12676_cast_fp16)[name = tensor("op_12720_cast_fp16")]; tensor var_12721_begin_0 = const()[name = tensor("op_12721_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12721_end_0 = const()[name = tensor("op_12721_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12721_end_mask_0 = const()[name = tensor("op_12721_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12721_cast_fp16 = slice_by_index(begin = var_12721_begin_0, end = var_12721_end_0, end_mask = var_12721_end_mask_0, x = var_12676_cast_fp16)[name = tensor("op_12721_cast_fp16")]; tensor var_12722_begin_0 = const()[name = tensor("op_12722_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12722_end_0 = const()[name = tensor("op_12722_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12722_end_mask_0 = const()[name = tensor("op_12722_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12722_cast_fp16 = slice_by_index(begin = var_12722_begin_0, end = var_12722_end_0, end_mask = var_12722_end_mask_0, x = var_12676_cast_fp16)[name = tensor("op_12722_cast_fp16")]; tensor var_12723_begin_0 = const()[name = tensor("op_12723_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12723_end_0 = const()[name = tensor("op_12723_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12723_end_mask_0 = const()[name = tensor("op_12723_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12723_cast_fp16 = slice_by_index(begin = var_12723_begin_0, end = var_12723_end_0, end_mask = var_12723_end_mask_0, x = var_12676_cast_fp16)[name = tensor("op_12723_cast_fp16")]; tensor var_12724_begin_0 = const()[name = tensor("op_12724_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12724_end_0 = const()[name = tensor("op_12724_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12724_end_mask_0 = const()[name = tensor("op_12724_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12724_cast_fp16 = slice_by_index(begin = var_12724_begin_0, end = var_12724_end_0, end_mask = var_12724_end_mask_0, x = var_12676_cast_fp16)[name = tensor("op_12724_cast_fp16")]; tensor var_12725_begin_0 = const()[name = tensor("op_12725_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12725_end_0 = const()[name = tensor("op_12725_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12725_end_mask_0 = const()[name = tensor("op_12725_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12725_cast_fp16 = slice_by_index(begin = var_12725_begin_0, end = var_12725_end_0, end_mask = var_12725_end_mask_0, x = var_12676_cast_fp16)[name = tensor("op_12725_cast_fp16")]; tensor var_12726_begin_0 = const()[name = tensor("op_12726_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12726_end_0 = const()[name = tensor("op_12726_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12726_end_mask_0 = const()[name = tensor("op_12726_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12726_cast_fp16 = slice_by_index(begin = var_12726_begin_0, end = var_12726_end_0, end_mask = var_12726_end_mask_0, x = var_12676_cast_fp16)[name = tensor("op_12726_cast_fp16")]; tensor var_12727_begin_0 = const()[name = tensor("op_12727_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12727_end_0 = const()[name = tensor("op_12727_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12727_end_mask_0 = const()[name = tensor("op_12727_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12727_cast_fp16 = slice_by_index(begin = var_12727_begin_0, end = var_12727_end_0, end_mask = var_12727_end_mask_0, x = var_12680_cast_fp16)[name = tensor("op_12727_cast_fp16")]; tensor var_12728_begin_0 = const()[name = tensor("op_12728_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12728_end_0 = const()[name = tensor("op_12728_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12728_end_mask_0 = const()[name = tensor("op_12728_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12728_cast_fp16 = slice_by_index(begin = var_12728_begin_0, end = var_12728_end_0, end_mask = var_12728_end_mask_0, x = var_12680_cast_fp16)[name = tensor("op_12728_cast_fp16")]; tensor var_12729_begin_0 = const()[name = tensor("op_12729_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12729_end_0 = const()[name = tensor("op_12729_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12729_end_mask_0 = const()[name = tensor("op_12729_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12729_cast_fp16 = slice_by_index(begin = var_12729_begin_0, end = var_12729_end_0, end_mask = var_12729_end_mask_0, x = var_12680_cast_fp16)[name = tensor("op_12729_cast_fp16")]; tensor var_12730_begin_0 = const()[name = tensor("op_12730_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12730_end_0 = const()[name = tensor("op_12730_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12730_end_mask_0 = const()[name = tensor("op_12730_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12730_cast_fp16 = slice_by_index(begin = var_12730_begin_0, end = var_12730_end_0, end_mask = var_12730_end_mask_0, x = var_12680_cast_fp16)[name = tensor("op_12730_cast_fp16")]; tensor var_12731_begin_0 = const()[name = tensor("op_12731_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12731_end_0 = const()[name = tensor("op_12731_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12731_end_mask_0 = const()[name = tensor("op_12731_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12731_cast_fp16 = slice_by_index(begin = var_12731_begin_0, end = var_12731_end_0, end_mask = var_12731_end_mask_0, x = var_12680_cast_fp16)[name = tensor("op_12731_cast_fp16")]; tensor var_12732_begin_0 = const()[name = tensor("op_12732_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12732_end_0 = const()[name = tensor("op_12732_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12732_end_mask_0 = const()[name = tensor("op_12732_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12732_cast_fp16 = slice_by_index(begin = var_12732_begin_0, end = var_12732_end_0, end_mask = var_12732_end_mask_0, x = var_12680_cast_fp16)[name = tensor("op_12732_cast_fp16")]; tensor var_12733_begin_0 = const()[name = tensor("op_12733_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12733_end_0 = const()[name = tensor("op_12733_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12733_end_mask_0 = const()[name = tensor("op_12733_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12733_cast_fp16 = slice_by_index(begin = var_12733_begin_0, end = var_12733_end_0, end_mask = var_12733_end_mask_0, x = var_12680_cast_fp16)[name = tensor("op_12733_cast_fp16")]; tensor var_12734_begin_0 = const()[name = tensor("op_12734_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12734_end_0 = const()[name = tensor("op_12734_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12734_end_mask_0 = const()[name = tensor("op_12734_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12734_cast_fp16 = slice_by_index(begin = var_12734_begin_0, end = var_12734_end_0, end_mask = var_12734_end_mask_0, x = var_12680_cast_fp16)[name = tensor("op_12734_cast_fp16")]; tensor var_12735_begin_0 = const()[name = tensor("op_12735_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12735_end_0 = const()[name = tensor("op_12735_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12735_end_mask_0 = const()[name = tensor("op_12735_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12735_cast_fp16 = slice_by_index(begin = var_12735_begin_0, end = var_12735_end_0, end_mask = var_12735_end_mask_0, x = var_12684_cast_fp16)[name = tensor("op_12735_cast_fp16")]; tensor var_12736_begin_0 = const()[name = tensor("op_12736_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12736_end_0 = const()[name = tensor("op_12736_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12736_end_mask_0 = const()[name = tensor("op_12736_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12736_cast_fp16 = slice_by_index(begin = var_12736_begin_0, end = var_12736_end_0, end_mask = var_12736_end_mask_0, x = var_12684_cast_fp16)[name = tensor("op_12736_cast_fp16")]; tensor var_12737_begin_0 = const()[name = tensor("op_12737_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12737_end_0 = const()[name = tensor("op_12737_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12737_end_mask_0 = const()[name = tensor("op_12737_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12737_cast_fp16 = slice_by_index(begin = var_12737_begin_0, end = var_12737_end_0, end_mask = var_12737_end_mask_0, x = var_12684_cast_fp16)[name = tensor("op_12737_cast_fp16")]; tensor var_12738_begin_0 = const()[name = tensor("op_12738_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12738_end_0 = const()[name = tensor("op_12738_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12738_end_mask_0 = const()[name = tensor("op_12738_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12738_cast_fp16 = slice_by_index(begin = var_12738_begin_0, end = var_12738_end_0, end_mask = var_12738_end_mask_0, x = var_12684_cast_fp16)[name = tensor("op_12738_cast_fp16")]; tensor var_12739_begin_0 = const()[name = tensor("op_12739_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12739_end_0 = const()[name = tensor("op_12739_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12739_end_mask_0 = const()[name = tensor("op_12739_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12739_cast_fp16 = slice_by_index(begin = var_12739_begin_0, end = var_12739_end_0, end_mask = var_12739_end_mask_0, x = var_12684_cast_fp16)[name = tensor("op_12739_cast_fp16")]; tensor var_12740_begin_0 = const()[name = tensor("op_12740_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12740_end_0 = const()[name = tensor("op_12740_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12740_end_mask_0 = const()[name = tensor("op_12740_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12740_cast_fp16 = slice_by_index(begin = var_12740_begin_0, end = var_12740_end_0, end_mask = var_12740_end_mask_0, x = var_12684_cast_fp16)[name = tensor("op_12740_cast_fp16")]; tensor var_12741_begin_0 = const()[name = tensor("op_12741_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12741_end_0 = const()[name = tensor("op_12741_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12741_end_mask_0 = const()[name = tensor("op_12741_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12741_cast_fp16 = slice_by_index(begin = var_12741_begin_0, end = var_12741_end_0, end_mask = var_12741_end_mask_0, x = var_12684_cast_fp16)[name = tensor("op_12741_cast_fp16")]; tensor var_12742_begin_0 = const()[name = tensor("op_12742_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12742_end_0 = const()[name = tensor("op_12742_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12742_end_mask_0 = const()[name = tensor("op_12742_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12742_cast_fp16 = slice_by_index(begin = var_12742_begin_0, end = var_12742_end_0, end_mask = var_12742_end_mask_0, x = var_12684_cast_fp16)[name = tensor("op_12742_cast_fp16")]; tensor var_12743_begin_0 = const()[name = tensor("op_12743_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12743_end_0 = const()[name = tensor("op_12743_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12743_end_mask_0 = const()[name = tensor("op_12743_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12743_cast_fp16 = slice_by_index(begin = var_12743_begin_0, end = var_12743_end_0, end_mask = var_12743_end_mask_0, x = var_12688_cast_fp16)[name = tensor("op_12743_cast_fp16")]; tensor var_12744_begin_0 = const()[name = tensor("op_12744_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12744_end_0 = const()[name = tensor("op_12744_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12744_end_mask_0 = const()[name = tensor("op_12744_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12744_cast_fp16 = slice_by_index(begin = var_12744_begin_0, end = var_12744_end_0, end_mask = var_12744_end_mask_0, x = var_12688_cast_fp16)[name = tensor("op_12744_cast_fp16")]; tensor var_12745_begin_0 = const()[name = tensor("op_12745_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12745_end_0 = const()[name = tensor("op_12745_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12745_end_mask_0 = const()[name = tensor("op_12745_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12745_cast_fp16 = slice_by_index(begin = var_12745_begin_0, end = var_12745_end_0, end_mask = var_12745_end_mask_0, x = var_12688_cast_fp16)[name = tensor("op_12745_cast_fp16")]; tensor var_12746_begin_0 = const()[name = tensor("op_12746_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12746_end_0 = const()[name = tensor("op_12746_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12746_end_mask_0 = const()[name = tensor("op_12746_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12746_cast_fp16 = slice_by_index(begin = var_12746_begin_0, end = var_12746_end_0, end_mask = var_12746_end_mask_0, x = var_12688_cast_fp16)[name = tensor("op_12746_cast_fp16")]; tensor var_12747_begin_0 = const()[name = tensor("op_12747_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12747_end_0 = const()[name = tensor("op_12747_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12747_end_mask_0 = const()[name = tensor("op_12747_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12747_cast_fp16 = slice_by_index(begin = var_12747_begin_0, end = var_12747_end_0, end_mask = var_12747_end_mask_0, x = var_12688_cast_fp16)[name = tensor("op_12747_cast_fp16")]; tensor var_12748_begin_0 = const()[name = tensor("op_12748_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12748_end_0 = const()[name = tensor("op_12748_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12748_end_mask_0 = const()[name = tensor("op_12748_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12748_cast_fp16 = slice_by_index(begin = var_12748_begin_0, end = var_12748_end_0, end_mask = var_12748_end_mask_0, x = var_12688_cast_fp16)[name = tensor("op_12748_cast_fp16")]; tensor var_12749_begin_0 = const()[name = tensor("op_12749_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12749_end_0 = const()[name = tensor("op_12749_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12749_end_mask_0 = const()[name = tensor("op_12749_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12749_cast_fp16 = slice_by_index(begin = var_12749_begin_0, end = var_12749_end_0, end_mask = var_12749_end_mask_0, x = var_12688_cast_fp16)[name = tensor("op_12749_cast_fp16")]; tensor var_12750_begin_0 = const()[name = tensor("op_12750_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12750_end_0 = const()[name = tensor("op_12750_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12750_end_mask_0 = const()[name = tensor("op_12750_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12750_cast_fp16 = slice_by_index(begin = var_12750_begin_0, end = var_12750_end_0, end_mask = var_12750_end_mask_0, x = var_12688_cast_fp16)[name = tensor("op_12750_cast_fp16")]; tensor var_12751_begin_0 = const()[name = tensor("op_12751_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12751_end_0 = const()[name = tensor("op_12751_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_12751_end_mask_0 = const()[name = tensor("op_12751_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12751_cast_fp16 = slice_by_index(begin = var_12751_begin_0, end = var_12751_end_0, end_mask = var_12751_end_mask_0, x = var_12692_cast_fp16)[name = tensor("op_12751_cast_fp16")]; tensor var_12752_begin_0 = const()[name = tensor("op_12752_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12752_end_0 = const()[name = tensor("op_12752_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_12752_end_mask_0 = const()[name = tensor("op_12752_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12752_cast_fp16 = slice_by_index(begin = var_12752_begin_0, end = var_12752_end_0, end_mask = var_12752_end_mask_0, x = var_12692_cast_fp16)[name = tensor("op_12752_cast_fp16")]; tensor var_12753_begin_0 = const()[name = tensor("op_12753_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12753_end_0 = const()[name = tensor("op_12753_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_12753_end_mask_0 = const()[name = tensor("op_12753_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12753_cast_fp16 = slice_by_index(begin = var_12753_begin_0, end = var_12753_end_0, end_mask = var_12753_end_mask_0, x = var_12692_cast_fp16)[name = tensor("op_12753_cast_fp16")]; tensor var_12754_begin_0 = const()[name = tensor("op_12754_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_12754_end_0 = const()[name = tensor("op_12754_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_12754_end_mask_0 = const()[name = tensor("op_12754_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12754_cast_fp16 = slice_by_index(begin = var_12754_begin_0, end = var_12754_end_0, end_mask = var_12754_end_mask_0, x = var_12692_cast_fp16)[name = tensor("op_12754_cast_fp16")]; tensor var_12755_begin_0 = const()[name = tensor("op_12755_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_12755_end_0 = const()[name = tensor("op_12755_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_12755_end_mask_0 = const()[name = tensor("op_12755_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12755_cast_fp16 = slice_by_index(begin = var_12755_begin_0, end = var_12755_end_0, end_mask = var_12755_end_mask_0, x = var_12692_cast_fp16)[name = tensor("op_12755_cast_fp16")]; tensor var_12756_begin_0 = const()[name = tensor("op_12756_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_12756_end_0 = const()[name = tensor("op_12756_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_12756_end_mask_0 = const()[name = tensor("op_12756_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12756_cast_fp16 = slice_by_index(begin = var_12756_begin_0, end = var_12756_end_0, end_mask = var_12756_end_mask_0, x = var_12692_cast_fp16)[name = tensor("op_12756_cast_fp16")]; tensor var_12757_begin_0 = const()[name = tensor("op_12757_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_12757_end_0 = const()[name = tensor("op_12757_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_12757_end_mask_0 = const()[name = tensor("op_12757_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12757_cast_fp16 = slice_by_index(begin = var_12757_begin_0, end = var_12757_end_0, end_mask = var_12757_end_mask_0, x = var_12692_cast_fp16)[name = tensor("op_12757_cast_fp16")]; tensor var_12758_begin_0 = const()[name = tensor("op_12758_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_12758_end_0 = const()[name = tensor("op_12758_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_12758_end_mask_0 = const()[name = tensor("op_12758_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12758_cast_fp16 = slice_by_index(begin = var_12758_begin_0, end = var_12758_end_0, end_mask = var_12758_end_mask_0, x = var_12692_cast_fp16)[name = tensor("op_12758_cast_fp16")]; tensor k_119_perm_0 = const()[name = tensor("k_119_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_12763_begin_0 = const()[name = tensor("op_12763_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12763_end_0 = const()[name = tensor("op_12763_end_0"), val = tensor([2, 77, 1, 40])]; tensor var_12763_end_mask_0 = const()[name = tensor("op_12763_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_2 = transpose(perm = k_119_perm_0, x = k_117_cast_fp16)[name = tensor("transpose_2")]; tensor var_12763_cast_fp16 = slice_by_index(begin = var_12763_begin_0, end = var_12763_end_0, end_mask = var_12763_end_mask_0, x = transpose_2)[name = tensor("op_12763_cast_fp16")]; tensor var_12767_begin_0 = const()[name = tensor("op_12767_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_12767_end_0 = const()[name = tensor("op_12767_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_12767_end_mask_0 = const()[name = tensor("op_12767_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12767_cast_fp16 = slice_by_index(begin = var_12767_begin_0, end = var_12767_end_0, end_mask = var_12767_end_mask_0, x = transpose_2)[name = tensor("op_12767_cast_fp16")]; tensor var_12771_begin_0 = const()[name = tensor("op_12771_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_12771_end_0 = const()[name = tensor("op_12771_end_0"), val = tensor([2, 77, 1, 120])]; tensor var_12771_end_mask_0 = const()[name = tensor("op_12771_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12771_cast_fp16 = slice_by_index(begin = var_12771_begin_0, end = var_12771_end_0, end_mask = var_12771_end_mask_0, x = transpose_2)[name = tensor("op_12771_cast_fp16")]; tensor var_12775_begin_0 = const()[name = tensor("op_12775_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_12775_end_0 = const()[name = tensor("op_12775_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_12775_end_mask_0 = const()[name = tensor("op_12775_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12775_cast_fp16 = slice_by_index(begin = var_12775_begin_0, end = var_12775_end_0, end_mask = var_12775_end_mask_0, x = transpose_2)[name = tensor("op_12775_cast_fp16")]; tensor var_12779_begin_0 = const()[name = tensor("op_12779_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_12779_end_0 = const()[name = tensor("op_12779_end_0"), val = tensor([2, 77, 1, 200])]; tensor var_12779_end_mask_0 = const()[name = tensor("op_12779_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12779_cast_fp16 = slice_by_index(begin = var_12779_begin_0, end = var_12779_end_0, end_mask = var_12779_end_mask_0, x = transpose_2)[name = tensor("op_12779_cast_fp16")]; tensor var_12783_begin_0 = const()[name = tensor("op_12783_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_12783_end_0 = const()[name = tensor("op_12783_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_12783_end_mask_0 = const()[name = tensor("op_12783_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12783_cast_fp16 = slice_by_index(begin = var_12783_begin_0, end = var_12783_end_0, end_mask = var_12783_end_mask_0, x = transpose_2)[name = tensor("op_12783_cast_fp16")]; tensor var_12787_begin_0 = const()[name = tensor("op_12787_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_12787_end_0 = const()[name = tensor("op_12787_end_0"), val = tensor([2, 77, 1, 280])]; tensor var_12787_end_mask_0 = const()[name = tensor("op_12787_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12787_cast_fp16 = slice_by_index(begin = var_12787_begin_0, end = var_12787_end_0, end_mask = var_12787_end_mask_0, x = transpose_2)[name = tensor("op_12787_cast_fp16")]; tensor var_12791_begin_0 = const()[name = tensor("op_12791_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_12791_end_0 = const()[name = tensor("op_12791_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_12791_end_mask_0 = const()[name = tensor("op_12791_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12791_cast_fp16 = slice_by_index(begin = var_12791_begin_0, end = var_12791_end_0, end_mask = var_12791_end_mask_0, x = transpose_2)[name = tensor("op_12791_cast_fp16")]; tensor var_12793_begin_0 = const()[name = tensor("op_12793_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12793_end_0 = const()[name = tensor("op_12793_end_0"), val = tensor([2, 40, 1, 77])]; tensor var_12793_end_mask_0 = const()[name = tensor("op_12793_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12793_cast_fp16 = slice_by_index(begin = var_12793_begin_0, end = var_12793_end_0, end_mask = var_12793_end_mask_0, x = v_59_cast_fp16)[name = tensor("op_12793_cast_fp16")]; tensor var_12797_begin_0 = const()[name = tensor("op_12797_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_12797_end_0 = const()[name = tensor("op_12797_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_12797_end_mask_0 = const()[name = tensor("op_12797_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12797_cast_fp16 = slice_by_index(begin = var_12797_begin_0, end = var_12797_end_0, end_mask = var_12797_end_mask_0, x = v_59_cast_fp16)[name = tensor("op_12797_cast_fp16")]; tensor var_12801_begin_0 = const()[name = tensor("op_12801_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_12801_end_0 = const()[name = tensor("op_12801_end_0"), val = tensor([2, 120, 1, 77])]; tensor var_12801_end_mask_0 = const()[name = tensor("op_12801_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12801_cast_fp16 = slice_by_index(begin = var_12801_begin_0, end = var_12801_end_0, end_mask = var_12801_end_mask_0, x = v_59_cast_fp16)[name = tensor("op_12801_cast_fp16")]; tensor var_12805_begin_0 = const()[name = tensor("op_12805_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_12805_end_0 = const()[name = tensor("op_12805_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_12805_end_mask_0 = const()[name = tensor("op_12805_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12805_cast_fp16 = slice_by_index(begin = var_12805_begin_0, end = var_12805_end_0, end_mask = var_12805_end_mask_0, x = v_59_cast_fp16)[name = tensor("op_12805_cast_fp16")]; tensor var_12809_begin_0 = const()[name = tensor("op_12809_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_12809_end_0 = const()[name = tensor("op_12809_end_0"), val = tensor([2, 200, 1, 77])]; tensor var_12809_end_mask_0 = const()[name = tensor("op_12809_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12809_cast_fp16 = slice_by_index(begin = var_12809_begin_0, end = var_12809_end_0, end_mask = var_12809_end_mask_0, x = v_59_cast_fp16)[name = tensor("op_12809_cast_fp16")]; tensor var_12813_begin_0 = const()[name = tensor("op_12813_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_12813_end_0 = const()[name = tensor("op_12813_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_12813_end_mask_0 = const()[name = tensor("op_12813_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12813_cast_fp16 = slice_by_index(begin = var_12813_begin_0, end = var_12813_end_0, end_mask = var_12813_end_mask_0, x = v_59_cast_fp16)[name = tensor("op_12813_cast_fp16")]; tensor var_12817_begin_0 = const()[name = tensor("op_12817_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_12817_end_0 = const()[name = tensor("op_12817_end_0"), val = tensor([2, 280, 1, 77])]; tensor var_12817_end_mask_0 = const()[name = tensor("op_12817_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12817_cast_fp16 = slice_by_index(begin = var_12817_begin_0, end = var_12817_end_0, end_mask = var_12817_end_mask_0, x = v_59_cast_fp16)[name = tensor("op_12817_cast_fp16")]; tensor var_12821_begin_0 = const()[name = tensor("op_12821_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_12821_end_0 = const()[name = tensor("op_12821_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_12821_end_mask_0 = const()[name = tensor("op_12821_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12821_cast_fp16 = slice_by_index(begin = var_12821_begin_0, end = var_12821_end_0, end_mask = var_12821_end_mask_0, x = v_59_cast_fp16)[name = tensor("op_12821_cast_fp16")]; tensor var_12825_equation_0 = const()[name = tensor("op_12825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12825_cast_fp16 = einsum(equation = var_12825_equation_0, values = (var_12763_cast_fp16, var_12695_cast_fp16))[name = tensor("op_12825_cast_fp16")]; tensor var_12826_to_fp16 = const()[name = tensor("op_12826_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1217_cast_fp16 = mul(x = var_12825_cast_fp16, y = var_12826_to_fp16)[name = tensor("aw_chunk_1217_cast_fp16")]; tensor var_12829_equation_0 = const()[name = tensor("op_12829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12829_cast_fp16 = einsum(equation = var_12829_equation_0, values = (var_12763_cast_fp16, var_12696_cast_fp16))[name = tensor("op_12829_cast_fp16")]; tensor var_12830_to_fp16 = const()[name = tensor("op_12830_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1219_cast_fp16 = mul(x = var_12829_cast_fp16, y = var_12830_to_fp16)[name = tensor("aw_chunk_1219_cast_fp16")]; tensor var_12833_equation_0 = const()[name = tensor("op_12833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12833_cast_fp16 = einsum(equation = var_12833_equation_0, values = (var_12763_cast_fp16, var_12697_cast_fp16))[name = tensor("op_12833_cast_fp16")]; tensor var_12834_to_fp16 = const()[name = tensor("op_12834_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1221_cast_fp16 = mul(x = var_12833_cast_fp16, y = var_12834_to_fp16)[name = tensor("aw_chunk_1221_cast_fp16")]; tensor var_12837_equation_0 = const()[name = tensor("op_12837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12837_cast_fp16 = einsum(equation = var_12837_equation_0, values = (var_12763_cast_fp16, var_12698_cast_fp16))[name = tensor("op_12837_cast_fp16")]; tensor var_12838_to_fp16 = const()[name = tensor("op_12838_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1223_cast_fp16 = mul(x = var_12837_cast_fp16, y = var_12838_to_fp16)[name = tensor("aw_chunk_1223_cast_fp16")]; tensor var_12841_equation_0 = const()[name = tensor("op_12841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12841_cast_fp16 = einsum(equation = var_12841_equation_0, values = (var_12763_cast_fp16, var_12699_cast_fp16))[name = tensor("op_12841_cast_fp16")]; tensor var_12842_to_fp16 = const()[name = tensor("op_12842_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1225_cast_fp16 = mul(x = var_12841_cast_fp16, y = var_12842_to_fp16)[name = tensor("aw_chunk_1225_cast_fp16")]; tensor var_12845_equation_0 = const()[name = tensor("op_12845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12845_cast_fp16 = einsum(equation = var_12845_equation_0, values = (var_12763_cast_fp16, var_12700_cast_fp16))[name = tensor("op_12845_cast_fp16")]; tensor var_12846_to_fp16 = const()[name = tensor("op_12846_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1227_cast_fp16 = mul(x = var_12845_cast_fp16, y = var_12846_to_fp16)[name = tensor("aw_chunk_1227_cast_fp16")]; tensor var_12849_equation_0 = const()[name = tensor("op_12849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12849_cast_fp16 = einsum(equation = var_12849_equation_0, values = (var_12763_cast_fp16, var_12701_cast_fp16))[name = tensor("op_12849_cast_fp16")]; tensor var_12850_to_fp16 = const()[name = tensor("op_12850_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1229_cast_fp16 = mul(x = var_12849_cast_fp16, y = var_12850_to_fp16)[name = tensor("aw_chunk_1229_cast_fp16")]; tensor var_12853_equation_0 = const()[name = tensor("op_12853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12853_cast_fp16 = einsum(equation = var_12853_equation_0, values = (var_12763_cast_fp16, var_12702_cast_fp16))[name = tensor("op_12853_cast_fp16")]; tensor var_12854_to_fp16 = const()[name = tensor("op_12854_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1231_cast_fp16 = mul(x = var_12853_cast_fp16, y = var_12854_to_fp16)[name = tensor("aw_chunk_1231_cast_fp16")]; tensor var_12857_equation_0 = const()[name = tensor("op_12857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12857_cast_fp16 = einsum(equation = var_12857_equation_0, values = (var_12767_cast_fp16, var_12703_cast_fp16))[name = tensor("op_12857_cast_fp16")]; tensor var_12858_to_fp16 = const()[name = tensor("op_12858_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1233_cast_fp16 = mul(x = var_12857_cast_fp16, y = var_12858_to_fp16)[name = tensor("aw_chunk_1233_cast_fp16")]; tensor var_12861_equation_0 = const()[name = tensor("op_12861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12861_cast_fp16 = einsum(equation = var_12861_equation_0, values = (var_12767_cast_fp16, var_12704_cast_fp16))[name = tensor("op_12861_cast_fp16")]; tensor var_12862_to_fp16 = const()[name = tensor("op_12862_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1235_cast_fp16 = mul(x = var_12861_cast_fp16, y = var_12862_to_fp16)[name = tensor("aw_chunk_1235_cast_fp16")]; tensor var_12865_equation_0 = const()[name = tensor("op_12865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12865_cast_fp16 = einsum(equation = var_12865_equation_0, values = (var_12767_cast_fp16, var_12705_cast_fp16))[name = tensor("op_12865_cast_fp16")]; tensor var_12866_to_fp16 = const()[name = tensor("op_12866_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1237_cast_fp16 = mul(x = var_12865_cast_fp16, y = var_12866_to_fp16)[name = tensor("aw_chunk_1237_cast_fp16")]; tensor var_12869_equation_0 = const()[name = tensor("op_12869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12869_cast_fp16 = einsum(equation = var_12869_equation_0, values = (var_12767_cast_fp16, var_12706_cast_fp16))[name = tensor("op_12869_cast_fp16")]; tensor var_12870_to_fp16 = const()[name = tensor("op_12870_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1239_cast_fp16 = mul(x = var_12869_cast_fp16, y = var_12870_to_fp16)[name = tensor("aw_chunk_1239_cast_fp16")]; tensor var_12873_equation_0 = const()[name = tensor("op_12873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12873_cast_fp16 = einsum(equation = var_12873_equation_0, values = (var_12767_cast_fp16, var_12707_cast_fp16))[name = tensor("op_12873_cast_fp16")]; tensor var_12874_to_fp16 = const()[name = tensor("op_12874_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1241_cast_fp16 = mul(x = var_12873_cast_fp16, y = var_12874_to_fp16)[name = tensor("aw_chunk_1241_cast_fp16")]; tensor var_12877_equation_0 = const()[name = tensor("op_12877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12877_cast_fp16 = einsum(equation = var_12877_equation_0, values = (var_12767_cast_fp16, var_12708_cast_fp16))[name = tensor("op_12877_cast_fp16")]; tensor var_12878_to_fp16 = const()[name = tensor("op_12878_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1243_cast_fp16 = mul(x = var_12877_cast_fp16, y = var_12878_to_fp16)[name = tensor("aw_chunk_1243_cast_fp16")]; tensor var_12881_equation_0 = const()[name = tensor("op_12881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12881_cast_fp16 = einsum(equation = var_12881_equation_0, values = (var_12767_cast_fp16, var_12709_cast_fp16))[name = tensor("op_12881_cast_fp16")]; tensor var_12882_to_fp16 = const()[name = tensor("op_12882_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1245_cast_fp16 = mul(x = var_12881_cast_fp16, y = var_12882_to_fp16)[name = tensor("aw_chunk_1245_cast_fp16")]; tensor var_12885_equation_0 = const()[name = tensor("op_12885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12885_cast_fp16 = einsum(equation = var_12885_equation_0, values = (var_12767_cast_fp16, var_12710_cast_fp16))[name = tensor("op_12885_cast_fp16")]; tensor var_12886_to_fp16 = const()[name = tensor("op_12886_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1247_cast_fp16 = mul(x = var_12885_cast_fp16, y = var_12886_to_fp16)[name = tensor("aw_chunk_1247_cast_fp16")]; tensor var_12889_equation_0 = const()[name = tensor("op_12889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12889_cast_fp16 = einsum(equation = var_12889_equation_0, values = (var_12771_cast_fp16, var_12711_cast_fp16))[name = tensor("op_12889_cast_fp16")]; tensor var_12890_to_fp16 = const()[name = tensor("op_12890_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1249_cast_fp16 = mul(x = var_12889_cast_fp16, y = var_12890_to_fp16)[name = tensor("aw_chunk_1249_cast_fp16")]; tensor var_12893_equation_0 = const()[name = tensor("op_12893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12893_cast_fp16 = einsum(equation = var_12893_equation_0, values = (var_12771_cast_fp16, var_12712_cast_fp16))[name = tensor("op_12893_cast_fp16")]; tensor var_12894_to_fp16 = const()[name = tensor("op_12894_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1251_cast_fp16 = mul(x = var_12893_cast_fp16, y = var_12894_to_fp16)[name = tensor("aw_chunk_1251_cast_fp16")]; tensor var_12897_equation_0 = const()[name = tensor("op_12897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12897_cast_fp16 = einsum(equation = var_12897_equation_0, values = (var_12771_cast_fp16, var_12713_cast_fp16))[name = tensor("op_12897_cast_fp16")]; tensor var_12898_to_fp16 = const()[name = tensor("op_12898_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1253_cast_fp16 = mul(x = var_12897_cast_fp16, y = var_12898_to_fp16)[name = tensor("aw_chunk_1253_cast_fp16")]; tensor var_12901_equation_0 = const()[name = tensor("op_12901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12901_cast_fp16 = einsum(equation = var_12901_equation_0, values = (var_12771_cast_fp16, var_12714_cast_fp16))[name = tensor("op_12901_cast_fp16")]; tensor var_12902_to_fp16 = const()[name = tensor("op_12902_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1255_cast_fp16 = mul(x = var_12901_cast_fp16, y = var_12902_to_fp16)[name = tensor("aw_chunk_1255_cast_fp16")]; tensor var_12905_equation_0 = const()[name = tensor("op_12905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12905_cast_fp16 = einsum(equation = var_12905_equation_0, values = (var_12771_cast_fp16, var_12715_cast_fp16))[name = tensor("op_12905_cast_fp16")]; tensor var_12906_to_fp16 = const()[name = tensor("op_12906_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1257_cast_fp16 = mul(x = var_12905_cast_fp16, y = var_12906_to_fp16)[name = tensor("aw_chunk_1257_cast_fp16")]; tensor var_12909_equation_0 = const()[name = tensor("op_12909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12909_cast_fp16 = einsum(equation = var_12909_equation_0, values = (var_12771_cast_fp16, var_12716_cast_fp16))[name = tensor("op_12909_cast_fp16")]; tensor var_12910_to_fp16 = const()[name = tensor("op_12910_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1259_cast_fp16 = mul(x = var_12909_cast_fp16, y = var_12910_to_fp16)[name = tensor("aw_chunk_1259_cast_fp16")]; tensor var_12913_equation_0 = const()[name = tensor("op_12913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12913_cast_fp16 = einsum(equation = var_12913_equation_0, values = (var_12771_cast_fp16, var_12717_cast_fp16))[name = tensor("op_12913_cast_fp16")]; tensor var_12914_to_fp16 = const()[name = tensor("op_12914_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1261_cast_fp16 = mul(x = var_12913_cast_fp16, y = var_12914_to_fp16)[name = tensor("aw_chunk_1261_cast_fp16")]; tensor var_12917_equation_0 = const()[name = tensor("op_12917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12917_cast_fp16 = einsum(equation = var_12917_equation_0, values = (var_12771_cast_fp16, var_12718_cast_fp16))[name = tensor("op_12917_cast_fp16")]; tensor var_12918_to_fp16 = const()[name = tensor("op_12918_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1263_cast_fp16 = mul(x = var_12917_cast_fp16, y = var_12918_to_fp16)[name = tensor("aw_chunk_1263_cast_fp16")]; tensor var_12921_equation_0 = const()[name = tensor("op_12921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12921_cast_fp16 = einsum(equation = var_12921_equation_0, values = (var_12775_cast_fp16, var_12719_cast_fp16))[name = tensor("op_12921_cast_fp16")]; tensor var_12922_to_fp16 = const()[name = tensor("op_12922_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1265_cast_fp16 = mul(x = var_12921_cast_fp16, y = var_12922_to_fp16)[name = tensor("aw_chunk_1265_cast_fp16")]; tensor var_12925_equation_0 = const()[name = tensor("op_12925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12925_cast_fp16 = einsum(equation = var_12925_equation_0, values = (var_12775_cast_fp16, var_12720_cast_fp16))[name = tensor("op_12925_cast_fp16")]; tensor var_12926_to_fp16 = const()[name = tensor("op_12926_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1267_cast_fp16 = mul(x = var_12925_cast_fp16, y = var_12926_to_fp16)[name = tensor("aw_chunk_1267_cast_fp16")]; tensor var_12929_equation_0 = const()[name = tensor("op_12929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12929_cast_fp16 = einsum(equation = var_12929_equation_0, values = (var_12775_cast_fp16, var_12721_cast_fp16))[name = tensor("op_12929_cast_fp16")]; tensor var_12930_to_fp16 = const()[name = tensor("op_12930_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1269_cast_fp16 = mul(x = var_12929_cast_fp16, y = var_12930_to_fp16)[name = tensor("aw_chunk_1269_cast_fp16")]; tensor var_12933_equation_0 = const()[name = tensor("op_12933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12933_cast_fp16 = einsum(equation = var_12933_equation_0, values = (var_12775_cast_fp16, var_12722_cast_fp16))[name = tensor("op_12933_cast_fp16")]; tensor var_12934_to_fp16 = const()[name = tensor("op_12934_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1271_cast_fp16 = mul(x = var_12933_cast_fp16, y = var_12934_to_fp16)[name = tensor("aw_chunk_1271_cast_fp16")]; tensor var_12937_equation_0 = const()[name = tensor("op_12937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12937_cast_fp16 = einsum(equation = var_12937_equation_0, values = (var_12775_cast_fp16, var_12723_cast_fp16))[name = tensor("op_12937_cast_fp16")]; tensor var_12938_to_fp16 = const()[name = tensor("op_12938_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1273_cast_fp16 = mul(x = var_12937_cast_fp16, y = var_12938_to_fp16)[name = tensor("aw_chunk_1273_cast_fp16")]; tensor var_12941_equation_0 = const()[name = tensor("op_12941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12941_cast_fp16 = einsum(equation = var_12941_equation_0, values = (var_12775_cast_fp16, var_12724_cast_fp16))[name = tensor("op_12941_cast_fp16")]; tensor var_12942_to_fp16 = const()[name = tensor("op_12942_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1275_cast_fp16 = mul(x = var_12941_cast_fp16, y = var_12942_to_fp16)[name = tensor("aw_chunk_1275_cast_fp16")]; tensor var_12945_equation_0 = const()[name = tensor("op_12945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12945_cast_fp16 = einsum(equation = var_12945_equation_0, values = (var_12775_cast_fp16, var_12725_cast_fp16))[name = tensor("op_12945_cast_fp16")]; tensor var_12946_to_fp16 = const()[name = tensor("op_12946_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1277_cast_fp16 = mul(x = var_12945_cast_fp16, y = var_12946_to_fp16)[name = tensor("aw_chunk_1277_cast_fp16")]; tensor var_12949_equation_0 = const()[name = tensor("op_12949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12949_cast_fp16 = einsum(equation = var_12949_equation_0, values = (var_12775_cast_fp16, var_12726_cast_fp16))[name = tensor("op_12949_cast_fp16")]; tensor var_12950_to_fp16 = const()[name = tensor("op_12950_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1279_cast_fp16 = mul(x = var_12949_cast_fp16, y = var_12950_to_fp16)[name = tensor("aw_chunk_1279_cast_fp16")]; tensor var_12953_equation_0 = const()[name = tensor("op_12953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12953_cast_fp16 = einsum(equation = var_12953_equation_0, values = (var_12779_cast_fp16, var_12727_cast_fp16))[name = tensor("op_12953_cast_fp16")]; tensor var_12954_to_fp16 = const()[name = tensor("op_12954_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1281_cast_fp16 = mul(x = var_12953_cast_fp16, y = var_12954_to_fp16)[name = tensor("aw_chunk_1281_cast_fp16")]; tensor var_12957_equation_0 = const()[name = tensor("op_12957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12957_cast_fp16 = einsum(equation = var_12957_equation_0, values = (var_12779_cast_fp16, var_12728_cast_fp16))[name = tensor("op_12957_cast_fp16")]; tensor var_12958_to_fp16 = const()[name = tensor("op_12958_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1283_cast_fp16 = mul(x = var_12957_cast_fp16, y = var_12958_to_fp16)[name = tensor("aw_chunk_1283_cast_fp16")]; tensor var_12961_equation_0 = const()[name = tensor("op_12961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12961_cast_fp16 = einsum(equation = var_12961_equation_0, values = (var_12779_cast_fp16, var_12729_cast_fp16))[name = tensor("op_12961_cast_fp16")]; tensor var_12962_to_fp16 = const()[name = tensor("op_12962_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1285_cast_fp16 = mul(x = var_12961_cast_fp16, y = var_12962_to_fp16)[name = tensor("aw_chunk_1285_cast_fp16")]; tensor var_12965_equation_0 = const()[name = tensor("op_12965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12965_cast_fp16 = einsum(equation = var_12965_equation_0, values = (var_12779_cast_fp16, var_12730_cast_fp16))[name = tensor("op_12965_cast_fp16")]; tensor var_12966_to_fp16 = const()[name = tensor("op_12966_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1287_cast_fp16 = mul(x = var_12965_cast_fp16, y = var_12966_to_fp16)[name = tensor("aw_chunk_1287_cast_fp16")]; tensor var_12969_equation_0 = const()[name = tensor("op_12969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12969_cast_fp16 = einsum(equation = var_12969_equation_0, values = (var_12779_cast_fp16, var_12731_cast_fp16))[name = tensor("op_12969_cast_fp16")]; tensor var_12970_to_fp16 = const()[name = tensor("op_12970_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1289_cast_fp16 = mul(x = var_12969_cast_fp16, y = var_12970_to_fp16)[name = tensor("aw_chunk_1289_cast_fp16")]; tensor var_12973_equation_0 = const()[name = tensor("op_12973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12973_cast_fp16 = einsum(equation = var_12973_equation_0, values = (var_12779_cast_fp16, var_12732_cast_fp16))[name = tensor("op_12973_cast_fp16")]; tensor var_12974_to_fp16 = const()[name = tensor("op_12974_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1291_cast_fp16 = mul(x = var_12973_cast_fp16, y = var_12974_to_fp16)[name = tensor("aw_chunk_1291_cast_fp16")]; tensor var_12977_equation_0 = const()[name = tensor("op_12977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12977_cast_fp16 = einsum(equation = var_12977_equation_0, values = (var_12779_cast_fp16, var_12733_cast_fp16))[name = tensor("op_12977_cast_fp16")]; tensor var_12978_to_fp16 = const()[name = tensor("op_12978_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1293_cast_fp16 = mul(x = var_12977_cast_fp16, y = var_12978_to_fp16)[name = tensor("aw_chunk_1293_cast_fp16")]; tensor var_12981_equation_0 = const()[name = tensor("op_12981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12981_cast_fp16 = einsum(equation = var_12981_equation_0, values = (var_12779_cast_fp16, var_12734_cast_fp16))[name = tensor("op_12981_cast_fp16")]; tensor var_12982_to_fp16 = const()[name = tensor("op_12982_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1295_cast_fp16 = mul(x = var_12981_cast_fp16, y = var_12982_to_fp16)[name = tensor("aw_chunk_1295_cast_fp16")]; tensor var_12985_equation_0 = const()[name = tensor("op_12985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12985_cast_fp16 = einsum(equation = var_12985_equation_0, values = (var_12783_cast_fp16, var_12735_cast_fp16))[name = tensor("op_12985_cast_fp16")]; tensor var_12986_to_fp16 = const()[name = tensor("op_12986_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1297_cast_fp16 = mul(x = var_12985_cast_fp16, y = var_12986_to_fp16)[name = tensor("aw_chunk_1297_cast_fp16")]; tensor var_12989_equation_0 = const()[name = tensor("op_12989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12989_cast_fp16 = einsum(equation = var_12989_equation_0, values = (var_12783_cast_fp16, var_12736_cast_fp16))[name = tensor("op_12989_cast_fp16")]; tensor var_12990_to_fp16 = const()[name = tensor("op_12990_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1299_cast_fp16 = mul(x = var_12989_cast_fp16, y = var_12990_to_fp16)[name = tensor("aw_chunk_1299_cast_fp16")]; tensor var_12993_equation_0 = const()[name = tensor("op_12993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12993_cast_fp16 = einsum(equation = var_12993_equation_0, values = (var_12783_cast_fp16, var_12737_cast_fp16))[name = tensor("op_12993_cast_fp16")]; tensor var_12994_to_fp16 = const()[name = tensor("op_12994_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1301_cast_fp16 = mul(x = var_12993_cast_fp16, y = var_12994_to_fp16)[name = tensor("aw_chunk_1301_cast_fp16")]; tensor var_12997_equation_0 = const()[name = tensor("op_12997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_12997_cast_fp16 = einsum(equation = var_12997_equation_0, values = (var_12783_cast_fp16, var_12738_cast_fp16))[name = tensor("op_12997_cast_fp16")]; tensor var_12998_to_fp16 = const()[name = tensor("op_12998_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1303_cast_fp16 = mul(x = var_12997_cast_fp16, y = var_12998_to_fp16)[name = tensor("aw_chunk_1303_cast_fp16")]; tensor var_13001_equation_0 = const()[name = tensor("op_13001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13001_cast_fp16 = einsum(equation = var_13001_equation_0, values = (var_12783_cast_fp16, var_12739_cast_fp16))[name = tensor("op_13001_cast_fp16")]; tensor var_13002_to_fp16 = const()[name = tensor("op_13002_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1305_cast_fp16 = mul(x = var_13001_cast_fp16, y = var_13002_to_fp16)[name = tensor("aw_chunk_1305_cast_fp16")]; tensor var_13005_equation_0 = const()[name = tensor("op_13005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13005_cast_fp16 = einsum(equation = var_13005_equation_0, values = (var_12783_cast_fp16, var_12740_cast_fp16))[name = tensor("op_13005_cast_fp16")]; tensor var_13006_to_fp16 = const()[name = tensor("op_13006_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1307_cast_fp16 = mul(x = var_13005_cast_fp16, y = var_13006_to_fp16)[name = tensor("aw_chunk_1307_cast_fp16")]; tensor var_13009_equation_0 = const()[name = tensor("op_13009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13009_cast_fp16 = einsum(equation = var_13009_equation_0, values = (var_12783_cast_fp16, var_12741_cast_fp16))[name = tensor("op_13009_cast_fp16")]; tensor var_13010_to_fp16 = const()[name = tensor("op_13010_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1309_cast_fp16 = mul(x = var_13009_cast_fp16, y = var_13010_to_fp16)[name = tensor("aw_chunk_1309_cast_fp16")]; tensor var_13013_equation_0 = const()[name = tensor("op_13013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13013_cast_fp16 = einsum(equation = var_13013_equation_0, values = (var_12783_cast_fp16, var_12742_cast_fp16))[name = tensor("op_13013_cast_fp16")]; tensor var_13014_to_fp16 = const()[name = tensor("op_13014_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1311_cast_fp16 = mul(x = var_13013_cast_fp16, y = var_13014_to_fp16)[name = tensor("aw_chunk_1311_cast_fp16")]; tensor var_13017_equation_0 = const()[name = tensor("op_13017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13017_cast_fp16 = einsum(equation = var_13017_equation_0, values = (var_12787_cast_fp16, var_12743_cast_fp16))[name = tensor("op_13017_cast_fp16")]; tensor var_13018_to_fp16 = const()[name = tensor("op_13018_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1313_cast_fp16 = mul(x = var_13017_cast_fp16, y = var_13018_to_fp16)[name = tensor("aw_chunk_1313_cast_fp16")]; tensor var_13021_equation_0 = const()[name = tensor("op_13021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13021_cast_fp16 = einsum(equation = var_13021_equation_0, values = (var_12787_cast_fp16, var_12744_cast_fp16))[name = tensor("op_13021_cast_fp16")]; tensor var_13022_to_fp16 = const()[name = tensor("op_13022_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1315_cast_fp16 = mul(x = var_13021_cast_fp16, y = var_13022_to_fp16)[name = tensor("aw_chunk_1315_cast_fp16")]; tensor var_13025_equation_0 = const()[name = tensor("op_13025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13025_cast_fp16 = einsum(equation = var_13025_equation_0, values = (var_12787_cast_fp16, var_12745_cast_fp16))[name = tensor("op_13025_cast_fp16")]; tensor var_13026_to_fp16 = const()[name = tensor("op_13026_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1317_cast_fp16 = mul(x = var_13025_cast_fp16, y = var_13026_to_fp16)[name = tensor("aw_chunk_1317_cast_fp16")]; tensor var_13029_equation_0 = const()[name = tensor("op_13029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13029_cast_fp16 = einsum(equation = var_13029_equation_0, values = (var_12787_cast_fp16, var_12746_cast_fp16))[name = tensor("op_13029_cast_fp16")]; tensor var_13030_to_fp16 = const()[name = tensor("op_13030_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1319_cast_fp16 = mul(x = var_13029_cast_fp16, y = var_13030_to_fp16)[name = tensor("aw_chunk_1319_cast_fp16")]; tensor var_13033_equation_0 = const()[name = tensor("op_13033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13033_cast_fp16 = einsum(equation = var_13033_equation_0, values = (var_12787_cast_fp16, var_12747_cast_fp16))[name = tensor("op_13033_cast_fp16")]; tensor var_13034_to_fp16 = const()[name = tensor("op_13034_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1321_cast_fp16 = mul(x = var_13033_cast_fp16, y = var_13034_to_fp16)[name = tensor("aw_chunk_1321_cast_fp16")]; tensor var_13037_equation_0 = const()[name = tensor("op_13037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13037_cast_fp16 = einsum(equation = var_13037_equation_0, values = (var_12787_cast_fp16, var_12748_cast_fp16))[name = tensor("op_13037_cast_fp16")]; tensor var_13038_to_fp16 = const()[name = tensor("op_13038_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1323_cast_fp16 = mul(x = var_13037_cast_fp16, y = var_13038_to_fp16)[name = tensor("aw_chunk_1323_cast_fp16")]; tensor var_13041_equation_0 = const()[name = tensor("op_13041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13041_cast_fp16 = einsum(equation = var_13041_equation_0, values = (var_12787_cast_fp16, var_12749_cast_fp16))[name = tensor("op_13041_cast_fp16")]; tensor var_13042_to_fp16 = const()[name = tensor("op_13042_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1325_cast_fp16 = mul(x = var_13041_cast_fp16, y = var_13042_to_fp16)[name = tensor("aw_chunk_1325_cast_fp16")]; tensor var_13045_equation_0 = const()[name = tensor("op_13045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13045_cast_fp16 = einsum(equation = var_13045_equation_0, values = (var_12787_cast_fp16, var_12750_cast_fp16))[name = tensor("op_13045_cast_fp16")]; tensor var_13046_to_fp16 = const()[name = tensor("op_13046_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1327_cast_fp16 = mul(x = var_13045_cast_fp16, y = var_13046_to_fp16)[name = tensor("aw_chunk_1327_cast_fp16")]; tensor var_13049_equation_0 = const()[name = tensor("op_13049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13049_cast_fp16 = einsum(equation = var_13049_equation_0, values = (var_12791_cast_fp16, var_12751_cast_fp16))[name = tensor("op_13049_cast_fp16")]; tensor var_13050_to_fp16 = const()[name = tensor("op_13050_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1329_cast_fp16 = mul(x = var_13049_cast_fp16, y = var_13050_to_fp16)[name = tensor("aw_chunk_1329_cast_fp16")]; tensor var_13053_equation_0 = const()[name = tensor("op_13053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13053_cast_fp16 = einsum(equation = var_13053_equation_0, values = (var_12791_cast_fp16, var_12752_cast_fp16))[name = tensor("op_13053_cast_fp16")]; tensor var_13054_to_fp16 = const()[name = tensor("op_13054_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1331_cast_fp16 = mul(x = var_13053_cast_fp16, y = var_13054_to_fp16)[name = tensor("aw_chunk_1331_cast_fp16")]; tensor var_13057_equation_0 = const()[name = tensor("op_13057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13057_cast_fp16 = einsum(equation = var_13057_equation_0, values = (var_12791_cast_fp16, var_12753_cast_fp16))[name = tensor("op_13057_cast_fp16")]; tensor var_13058_to_fp16 = const()[name = tensor("op_13058_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1333_cast_fp16 = mul(x = var_13057_cast_fp16, y = var_13058_to_fp16)[name = tensor("aw_chunk_1333_cast_fp16")]; tensor var_13061_equation_0 = const()[name = tensor("op_13061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13061_cast_fp16 = einsum(equation = var_13061_equation_0, values = (var_12791_cast_fp16, var_12754_cast_fp16))[name = tensor("op_13061_cast_fp16")]; tensor var_13062_to_fp16 = const()[name = tensor("op_13062_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1335_cast_fp16 = mul(x = var_13061_cast_fp16, y = var_13062_to_fp16)[name = tensor("aw_chunk_1335_cast_fp16")]; tensor var_13065_equation_0 = const()[name = tensor("op_13065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13065_cast_fp16 = einsum(equation = var_13065_equation_0, values = (var_12791_cast_fp16, var_12755_cast_fp16))[name = tensor("op_13065_cast_fp16")]; tensor var_13066_to_fp16 = const()[name = tensor("op_13066_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1337_cast_fp16 = mul(x = var_13065_cast_fp16, y = var_13066_to_fp16)[name = tensor("aw_chunk_1337_cast_fp16")]; tensor var_13069_equation_0 = const()[name = tensor("op_13069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13069_cast_fp16 = einsum(equation = var_13069_equation_0, values = (var_12791_cast_fp16, var_12756_cast_fp16))[name = tensor("op_13069_cast_fp16")]; tensor var_13070_to_fp16 = const()[name = tensor("op_13070_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1339_cast_fp16 = mul(x = var_13069_cast_fp16, y = var_13070_to_fp16)[name = tensor("aw_chunk_1339_cast_fp16")]; tensor var_13073_equation_0 = const()[name = tensor("op_13073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13073_cast_fp16 = einsum(equation = var_13073_equation_0, values = (var_12791_cast_fp16, var_12757_cast_fp16))[name = tensor("op_13073_cast_fp16")]; tensor var_13074_to_fp16 = const()[name = tensor("op_13074_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1341_cast_fp16 = mul(x = var_13073_cast_fp16, y = var_13074_to_fp16)[name = tensor("aw_chunk_1341_cast_fp16")]; tensor var_13077_equation_0 = const()[name = tensor("op_13077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13077_cast_fp16 = einsum(equation = var_13077_equation_0, values = (var_12791_cast_fp16, var_12758_cast_fp16))[name = tensor("op_13077_cast_fp16")]; tensor var_13078_to_fp16 = const()[name = tensor("op_13078_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1343_cast_fp16 = mul(x = var_13077_cast_fp16, y = var_13078_to_fp16)[name = tensor("aw_chunk_1343_cast_fp16")]; tensor var_13080_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1217_cast_fp16)[name = tensor("op_13080_cast_fp16")]; tensor var_13081_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1219_cast_fp16)[name = tensor("op_13081_cast_fp16")]; tensor var_13082_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1221_cast_fp16)[name = tensor("op_13082_cast_fp16")]; tensor var_13083_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1223_cast_fp16)[name = tensor("op_13083_cast_fp16")]; tensor var_13084_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1225_cast_fp16)[name = tensor("op_13084_cast_fp16")]; tensor var_13085_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1227_cast_fp16)[name = tensor("op_13085_cast_fp16")]; tensor var_13086_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1229_cast_fp16)[name = tensor("op_13086_cast_fp16")]; tensor var_13087_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1231_cast_fp16)[name = tensor("op_13087_cast_fp16")]; tensor var_13088_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1233_cast_fp16)[name = tensor("op_13088_cast_fp16")]; tensor var_13089_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1235_cast_fp16)[name = tensor("op_13089_cast_fp16")]; tensor var_13090_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1237_cast_fp16)[name = tensor("op_13090_cast_fp16")]; tensor var_13091_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1239_cast_fp16)[name = tensor("op_13091_cast_fp16")]; tensor var_13092_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1241_cast_fp16)[name = tensor("op_13092_cast_fp16")]; tensor var_13093_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1243_cast_fp16)[name = tensor("op_13093_cast_fp16")]; tensor var_13094_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1245_cast_fp16)[name = tensor("op_13094_cast_fp16")]; tensor var_13095_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1247_cast_fp16)[name = tensor("op_13095_cast_fp16")]; tensor var_13096_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1249_cast_fp16)[name = tensor("op_13096_cast_fp16")]; tensor var_13097_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1251_cast_fp16)[name = tensor("op_13097_cast_fp16")]; tensor var_13098_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1253_cast_fp16)[name = tensor("op_13098_cast_fp16")]; tensor var_13099_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1255_cast_fp16)[name = tensor("op_13099_cast_fp16")]; tensor var_13100_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1257_cast_fp16)[name = tensor("op_13100_cast_fp16")]; tensor var_13101_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1259_cast_fp16)[name = tensor("op_13101_cast_fp16")]; tensor var_13102_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1261_cast_fp16)[name = tensor("op_13102_cast_fp16")]; tensor var_13103_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1263_cast_fp16)[name = tensor("op_13103_cast_fp16")]; tensor var_13104_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1265_cast_fp16)[name = tensor("op_13104_cast_fp16")]; tensor var_13105_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1267_cast_fp16)[name = tensor("op_13105_cast_fp16")]; tensor var_13106_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1269_cast_fp16)[name = tensor("op_13106_cast_fp16")]; tensor var_13107_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1271_cast_fp16)[name = tensor("op_13107_cast_fp16")]; tensor var_13108_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1273_cast_fp16)[name = tensor("op_13108_cast_fp16")]; tensor var_13109_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1275_cast_fp16)[name = tensor("op_13109_cast_fp16")]; tensor var_13110_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1277_cast_fp16)[name = tensor("op_13110_cast_fp16")]; tensor var_13111_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1279_cast_fp16)[name = tensor("op_13111_cast_fp16")]; tensor var_13112_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1281_cast_fp16)[name = tensor("op_13112_cast_fp16")]; tensor var_13113_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1283_cast_fp16)[name = tensor("op_13113_cast_fp16")]; tensor var_13114_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1285_cast_fp16)[name = tensor("op_13114_cast_fp16")]; tensor var_13115_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1287_cast_fp16)[name = tensor("op_13115_cast_fp16")]; tensor var_13116_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1289_cast_fp16)[name = tensor("op_13116_cast_fp16")]; tensor var_13117_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1291_cast_fp16)[name = tensor("op_13117_cast_fp16")]; tensor var_13118_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1293_cast_fp16)[name = tensor("op_13118_cast_fp16")]; tensor var_13119_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1295_cast_fp16)[name = tensor("op_13119_cast_fp16")]; tensor var_13120_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1297_cast_fp16)[name = tensor("op_13120_cast_fp16")]; tensor var_13121_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1299_cast_fp16)[name = tensor("op_13121_cast_fp16")]; tensor var_13122_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1301_cast_fp16)[name = tensor("op_13122_cast_fp16")]; tensor var_13123_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1303_cast_fp16)[name = tensor("op_13123_cast_fp16")]; tensor var_13124_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1305_cast_fp16)[name = tensor("op_13124_cast_fp16")]; tensor var_13125_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1307_cast_fp16)[name = tensor("op_13125_cast_fp16")]; tensor var_13126_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1309_cast_fp16)[name = tensor("op_13126_cast_fp16")]; tensor var_13127_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1311_cast_fp16)[name = tensor("op_13127_cast_fp16")]; tensor var_13128_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1313_cast_fp16)[name = tensor("op_13128_cast_fp16")]; tensor var_13129_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1315_cast_fp16)[name = tensor("op_13129_cast_fp16")]; tensor var_13130_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1317_cast_fp16)[name = tensor("op_13130_cast_fp16")]; tensor var_13131_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1319_cast_fp16)[name = tensor("op_13131_cast_fp16")]; tensor var_13132_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1321_cast_fp16)[name = tensor("op_13132_cast_fp16")]; tensor var_13133_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1323_cast_fp16)[name = tensor("op_13133_cast_fp16")]; tensor var_13134_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1325_cast_fp16)[name = tensor("op_13134_cast_fp16")]; tensor var_13135_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1327_cast_fp16)[name = tensor("op_13135_cast_fp16")]; tensor var_13136_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1329_cast_fp16)[name = tensor("op_13136_cast_fp16")]; tensor var_13137_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1331_cast_fp16)[name = tensor("op_13137_cast_fp16")]; tensor var_13138_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1333_cast_fp16)[name = tensor("op_13138_cast_fp16")]; tensor var_13139_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1335_cast_fp16)[name = tensor("op_13139_cast_fp16")]; tensor var_13140_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1337_cast_fp16)[name = tensor("op_13140_cast_fp16")]; tensor var_13141_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1339_cast_fp16)[name = tensor("op_13141_cast_fp16")]; tensor var_13142_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1341_cast_fp16)[name = tensor("op_13142_cast_fp16")]; tensor var_13143_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1343_cast_fp16)[name = tensor("op_13143_cast_fp16")]; tensor var_13145_equation_0 = const()[name = tensor("op_13145_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13145_cast_fp16 = einsum(equation = var_13145_equation_0, values = (var_12793_cast_fp16, var_13080_cast_fp16))[name = tensor("op_13145_cast_fp16")]; tensor var_13147_equation_0 = const()[name = tensor("op_13147_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13147_cast_fp16 = einsum(equation = var_13147_equation_0, values = (var_12793_cast_fp16, var_13081_cast_fp16))[name = tensor("op_13147_cast_fp16")]; tensor var_13149_equation_0 = const()[name = tensor("op_13149_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13149_cast_fp16 = einsum(equation = var_13149_equation_0, values = (var_12793_cast_fp16, var_13082_cast_fp16))[name = tensor("op_13149_cast_fp16")]; tensor var_13151_equation_0 = const()[name = tensor("op_13151_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13151_cast_fp16 = einsum(equation = var_13151_equation_0, values = (var_12793_cast_fp16, var_13083_cast_fp16))[name = tensor("op_13151_cast_fp16")]; tensor var_13153_equation_0 = const()[name = tensor("op_13153_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13153_cast_fp16 = einsum(equation = var_13153_equation_0, values = (var_12793_cast_fp16, var_13084_cast_fp16))[name = tensor("op_13153_cast_fp16")]; tensor var_13155_equation_0 = const()[name = tensor("op_13155_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13155_cast_fp16 = einsum(equation = var_13155_equation_0, values = (var_12793_cast_fp16, var_13085_cast_fp16))[name = tensor("op_13155_cast_fp16")]; tensor var_13157_equation_0 = const()[name = tensor("op_13157_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13157_cast_fp16 = einsum(equation = var_13157_equation_0, values = (var_12793_cast_fp16, var_13086_cast_fp16))[name = tensor("op_13157_cast_fp16")]; tensor var_13159_equation_0 = const()[name = tensor("op_13159_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13159_cast_fp16 = einsum(equation = var_13159_equation_0, values = (var_12793_cast_fp16, var_13087_cast_fp16))[name = tensor("op_13159_cast_fp16")]; tensor var_13161_equation_0 = const()[name = tensor("op_13161_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13161_cast_fp16 = einsum(equation = var_13161_equation_0, values = (var_12797_cast_fp16, var_13088_cast_fp16))[name = tensor("op_13161_cast_fp16")]; tensor var_13163_equation_0 = const()[name = tensor("op_13163_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13163_cast_fp16 = einsum(equation = var_13163_equation_0, values = (var_12797_cast_fp16, var_13089_cast_fp16))[name = tensor("op_13163_cast_fp16")]; tensor var_13165_equation_0 = const()[name = tensor("op_13165_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13165_cast_fp16 = einsum(equation = var_13165_equation_0, values = (var_12797_cast_fp16, var_13090_cast_fp16))[name = tensor("op_13165_cast_fp16")]; tensor var_13167_equation_0 = const()[name = tensor("op_13167_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13167_cast_fp16 = einsum(equation = var_13167_equation_0, values = (var_12797_cast_fp16, var_13091_cast_fp16))[name = tensor("op_13167_cast_fp16")]; tensor var_13169_equation_0 = const()[name = tensor("op_13169_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13169_cast_fp16 = einsum(equation = var_13169_equation_0, values = (var_12797_cast_fp16, var_13092_cast_fp16))[name = tensor("op_13169_cast_fp16")]; tensor var_13171_equation_0 = const()[name = tensor("op_13171_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13171_cast_fp16 = einsum(equation = var_13171_equation_0, values = (var_12797_cast_fp16, var_13093_cast_fp16))[name = tensor("op_13171_cast_fp16")]; tensor var_13173_equation_0 = const()[name = tensor("op_13173_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13173_cast_fp16 = einsum(equation = var_13173_equation_0, values = (var_12797_cast_fp16, var_13094_cast_fp16))[name = tensor("op_13173_cast_fp16")]; tensor var_13175_equation_0 = const()[name = tensor("op_13175_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13175_cast_fp16 = einsum(equation = var_13175_equation_0, values = (var_12797_cast_fp16, var_13095_cast_fp16))[name = tensor("op_13175_cast_fp16")]; tensor var_13177_equation_0 = const()[name = tensor("op_13177_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13177_cast_fp16 = einsum(equation = var_13177_equation_0, values = (var_12801_cast_fp16, var_13096_cast_fp16))[name = tensor("op_13177_cast_fp16")]; tensor var_13179_equation_0 = const()[name = tensor("op_13179_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13179_cast_fp16 = einsum(equation = var_13179_equation_0, values = (var_12801_cast_fp16, var_13097_cast_fp16))[name = tensor("op_13179_cast_fp16")]; tensor var_13181_equation_0 = const()[name = tensor("op_13181_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13181_cast_fp16 = einsum(equation = var_13181_equation_0, values = (var_12801_cast_fp16, var_13098_cast_fp16))[name = tensor("op_13181_cast_fp16")]; tensor var_13183_equation_0 = const()[name = tensor("op_13183_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13183_cast_fp16 = einsum(equation = var_13183_equation_0, values = (var_12801_cast_fp16, var_13099_cast_fp16))[name = tensor("op_13183_cast_fp16")]; tensor var_13185_equation_0 = const()[name = tensor("op_13185_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13185_cast_fp16 = einsum(equation = var_13185_equation_0, values = (var_12801_cast_fp16, var_13100_cast_fp16))[name = tensor("op_13185_cast_fp16")]; tensor var_13187_equation_0 = const()[name = tensor("op_13187_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13187_cast_fp16 = einsum(equation = var_13187_equation_0, values = (var_12801_cast_fp16, var_13101_cast_fp16))[name = tensor("op_13187_cast_fp16")]; tensor var_13189_equation_0 = const()[name = tensor("op_13189_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13189_cast_fp16 = einsum(equation = var_13189_equation_0, values = (var_12801_cast_fp16, var_13102_cast_fp16))[name = tensor("op_13189_cast_fp16")]; tensor var_13191_equation_0 = const()[name = tensor("op_13191_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13191_cast_fp16 = einsum(equation = var_13191_equation_0, values = (var_12801_cast_fp16, var_13103_cast_fp16))[name = tensor("op_13191_cast_fp16")]; tensor var_13193_equation_0 = const()[name = tensor("op_13193_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13193_cast_fp16 = einsum(equation = var_13193_equation_0, values = (var_12805_cast_fp16, var_13104_cast_fp16))[name = tensor("op_13193_cast_fp16")]; tensor var_13195_equation_0 = const()[name = tensor("op_13195_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13195_cast_fp16 = einsum(equation = var_13195_equation_0, values = (var_12805_cast_fp16, var_13105_cast_fp16))[name = tensor("op_13195_cast_fp16")]; tensor var_13197_equation_0 = const()[name = tensor("op_13197_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13197_cast_fp16 = einsum(equation = var_13197_equation_0, values = (var_12805_cast_fp16, var_13106_cast_fp16))[name = tensor("op_13197_cast_fp16")]; tensor var_13199_equation_0 = const()[name = tensor("op_13199_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13199_cast_fp16 = einsum(equation = var_13199_equation_0, values = (var_12805_cast_fp16, var_13107_cast_fp16))[name = tensor("op_13199_cast_fp16")]; tensor var_13201_equation_0 = const()[name = tensor("op_13201_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13201_cast_fp16 = einsum(equation = var_13201_equation_0, values = (var_12805_cast_fp16, var_13108_cast_fp16))[name = tensor("op_13201_cast_fp16")]; tensor var_13203_equation_0 = const()[name = tensor("op_13203_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13203_cast_fp16 = einsum(equation = var_13203_equation_0, values = (var_12805_cast_fp16, var_13109_cast_fp16))[name = tensor("op_13203_cast_fp16")]; tensor var_13205_equation_0 = const()[name = tensor("op_13205_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13205_cast_fp16 = einsum(equation = var_13205_equation_0, values = (var_12805_cast_fp16, var_13110_cast_fp16))[name = tensor("op_13205_cast_fp16")]; tensor var_13207_equation_0 = const()[name = tensor("op_13207_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13207_cast_fp16 = einsum(equation = var_13207_equation_0, values = (var_12805_cast_fp16, var_13111_cast_fp16))[name = tensor("op_13207_cast_fp16")]; tensor var_13209_equation_0 = const()[name = tensor("op_13209_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13209_cast_fp16 = einsum(equation = var_13209_equation_0, values = (var_12809_cast_fp16, var_13112_cast_fp16))[name = tensor("op_13209_cast_fp16")]; tensor var_13211_equation_0 = const()[name = tensor("op_13211_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13211_cast_fp16 = einsum(equation = var_13211_equation_0, values = (var_12809_cast_fp16, var_13113_cast_fp16))[name = tensor("op_13211_cast_fp16")]; tensor var_13213_equation_0 = const()[name = tensor("op_13213_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13213_cast_fp16 = einsum(equation = var_13213_equation_0, values = (var_12809_cast_fp16, var_13114_cast_fp16))[name = tensor("op_13213_cast_fp16")]; tensor var_13215_equation_0 = const()[name = tensor("op_13215_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13215_cast_fp16 = einsum(equation = var_13215_equation_0, values = (var_12809_cast_fp16, var_13115_cast_fp16))[name = tensor("op_13215_cast_fp16")]; tensor var_13217_equation_0 = const()[name = tensor("op_13217_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13217_cast_fp16 = einsum(equation = var_13217_equation_0, values = (var_12809_cast_fp16, var_13116_cast_fp16))[name = tensor("op_13217_cast_fp16")]; tensor var_13219_equation_0 = const()[name = tensor("op_13219_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13219_cast_fp16 = einsum(equation = var_13219_equation_0, values = (var_12809_cast_fp16, var_13117_cast_fp16))[name = tensor("op_13219_cast_fp16")]; tensor var_13221_equation_0 = const()[name = tensor("op_13221_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13221_cast_fp16 = einsum(equation = var_13221_equation_0, values = (var_12809_cast_fp16, var_13118_cast_fp16))[name = tensor("op_13221_cast_fp16")]; tensor var_13223_equation_0 = const()[name = tensor("op_13223_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13223_cast_fp16 = einsum(equation = var_13223_equation_0, values = (var_12809_cast_fp16, var_13119_cast_fp16))[name = tensor("op_13223_cast_fp16")]; tensor var_13225_equation_0 = const()[name = tensor("op_13225_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13225_cast_fp16 = einsum(equation = var_13225_equation_0, values = (var_12813_cast_fp16, var_13120_cast_fp16))[name = tensor("op_13225_cast_fp16")]; tensor var_13227_equation_0 = const()[name = tensor("op_13227_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13227_cast_fp16 = einsum(equation = var_13227_equation_0, values = (var_12813_cast_fp16, var_13121_cast_fp16))[name = tensor("op_13227_cast_fp16")]; tensor var_13229_equation_0 = const()[name = tensor("op_13229_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13229_cast_fp16 = einsum(equation = var_13229_equation_0, values = (var_12813_cast_fp16, var_13122_cast_fp16))[name = tensor("op_13229_cast_fp16")]; tensor var_13231_equation_0 = const()[name = tensor("op_13231_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13231_cast_fp16 = einsum(equation = var_13231_equation_0, values = (var_12813_cast_fp16, var_13123_cast_fp16))[name = tensor("op_13231_cast_fp16")]; tensor var_13233_equation_0 = const()[name = tensor("op_13233_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13233_cast_fp16 = einsum(equation = var_13233_equation_0, values = (var_12813_cast_fp16, var_13124_cast_fp16))[name = tensor("op_13233_cast_fp16")]; tensor var_13235_equation_0 = const()[name = tensor("op_13235_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13235_cast_fp16 = einsum(equation = var_13235_equation_0, values = (var_12813_cast_fp16, var_13125_cast_fp16))[name = tensor("op_13235_cast_fp16")]; tensor var_13237_equation_0 = const()[name = tensor("op_13237_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13237_cast_fp16 = einsum(equation = var_13237_equation_0, values = (var_12813_cast_fp16, var_13126_cast_fp16))[name = tensor("op_13237_cast_fp16")]; tensor var_13239_equation_0 = const()[name = tensor("op_13239_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13239_cast_fp16 = einsum(equation = var_13239_equation_0, values = (var_12813_cast_fp16, var_13127_cast_fp16))[name = tensor("op_13239_cast_fp16")]; tensor var_13241_equation_0 = const()[name = tensor("op_13241_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13241_cast_fp16 = einsum(equation = var_13241_equation_0, values = (var_12817_cast_fp16, var_13128_cast_fp16))[name = tensor("op_13241_cast_fp16")]; tensor var_13243_equation_0 = const()[name = tensor("op_13243_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13243_cast_fp16 = einsum(equation = var_13243_equation_0, values = (var_12817_cast_fp16, var_13129_cast_fp16))[name = tensor("op_13243_cast_fp16")]; tensor var_13245_equation_0 = const()[name = tensor("op_13245_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13245_cast_fp16 = einsum(equation = var_13245_equation_0, values = (var_12817_cast_fp16, var_13130_cast_fp16))[name = tensor("op_13245_cast_fp16")]; tensor var_13247_equation_0 = const()[name = tensor("op_13247_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13247_cast_fp16 = einsum(equation = var_13247_equation_0, values = (var_12817_cast_fp16, var_13131_cast_fp16))[name = tensor("op_13247_cast_fp16")]; tensor var_13249_equation_0 = const()[name = tensor("op_13249_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13249_cast_fp16 = einsum(equation = var_13249_equation_0, values = (var_12817_cast_fp16, var_13132_cast_fp16))[name = tensor("op_13249_cast_fp16")]; tensor var_13251_equation_0 = const()[name = tensor("op_13251_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13251_cast_fp16 = einsum(equation = var_13251_equation_0, values = (var_12817_cast_fp16, var_13133_cast_fp16))[name = tensor("op_13251_cast_fp16")]; tensor var_13253_equation_0 = const()[name = tensor("op_13253_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13253_cast_fp16 = einsum(equation = var_13253_equation_0, values = (var_12817_cast_fp16, var_13134_cast_fp16))[name = tensor("op_13253_cast_fp16")]; tensor var_13255_equation_0 = const()[name = tensor("op_13255_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13255_cast_fp16 = einsum(equation = var_13255_equation_0, values = (var_12817_cast_fp16, var_13135_cast_fp16))[name = tensor("op_13255_cast_fp16")]; tensor var_13257_equation_0 = const()[name = tensor("op_13257_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13257_cast_fp16 = einsum(equation = var_13257_equation_0, values = (var_12821_cast_fp16, var_13136_cast_fp16))[name = tensor("op_13257_cast_fp16")]; tensor var_13259_equation_0 = const()[name = tensor("op_13259_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13259_cast_fp16 = einsum(equation = var_13259_equation_0, values = (var_12821_cast_fp16, var_13137_cast_fp16))[name = tensor("op_13259_cast_fp16")]; tensor var_13261_equation_0 = const()[name = tensor("op_13261_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13261_cast_fp16 = einsum(equation = var_13261_equation_0, values = (var_12821_cast_fp16, var_13138_cast_fp16))[name = tensor("op_13261_cast_fp16")]; tensor var_13263_equation_0 = const()[name = tensor("op_13263_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13263_cast_fp16 = einsum(equation = var_13263_equation_0, values = (var_12821_cast_fp16, var_13139_cast_fp16))[name = tensor("op_13263_cast_fp16")]; tensor var_13265_equation_0 = const()[name = tensor("op_13265_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13265_cast_fp16 = einsum(equation = var_13265_equation_0, values = (var_12821_cast_fp16, var_13140_cast_fp16))[name = tensor("op_13265_cast_fp16")]; tensor var_13267_equation_0 = const()[name = tensor("op_13267_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13267_cast_fp16 = einsum(equation = var_13267_equation_0, values = (var_12821_cast_fp16, var_13141_cast_fp16))[name = tensor("op_13267_cast_fp16")]; tensor var_13269_equation_0 = const()[name = tensor("op_13269_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13269_cast_fp16 = einsum(equation = var_13269_equation_0, values = (var_12821_cast_fp16, var_13142_cast_fp16))[name = tensor("op_13269_cast_fp16")]; tensor var_13271_equation_0 = const()[name = tensor("op_13271_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13271_cast_fp16 = einsum(equation = var_13271_equation_0, values = (var_12821_cast_fp16, var_13143_cast_fp16))[name = tensor("op_13271_cast_fp16")]; tensor var_13273_interleave_0 = const()[name = tensor("op_13273_interleave_0"), val = tensor(false)]; tensor var_13273_cast_fp16 = concat(axis = var_10347, interleave = var_13273_interleave_0, values = (var_13145_cast_fp16, var_13147_cast_fp16, var_13149_cast_fp16, var_13151_cast_fp16, var_13153_cast_fp16, var_13155_cast_fp16, var_13157_cast_fp16, var_13159_cast_fp16))[name = tensor("op_13273_cast_fp16")]; tensor var_13275_interleave_0 = const()[name = tensor("op_13275_interleave_0"), val = tensor(false)]; tensor var_13275_cast_fp16 = concat(axis = var_10347, interleave = var_13275_interleave_0, values = (var_13161_cast_fp16, var_13163_cast_fp16, var_13165_cast_fp16, var_13167_cast_fp16, var_13169_cast_fp16, var_13171_cast_fp16, var_13173_cast_fp16, var_13175_cast_fp16))[name = tensor("op_13275_cast_fp16")]; tensor var_13277_interleave_0 = const()[name = tensor("op_13277_interleave_0"), val = tensor(false)]; tensor var_13277_cast_fp16 = concat(axis = var_10347, interleave = var_13277_interleave_0, values = (var_13177_cast_fp16, var_13179_cast_fp16, var_13181_cast_fp16, var_13183_cast_fp16, var_13185_cast_fp16, var_13187_cast_fp16, var_13189_cast_fp16, var_13191_cast_fp16))[name = tensor("op_13277_cast_fp16")]; tensor var_13279_interleave_0 = const()[name = tensor("op_13279_interleave_0"), val = tensor(false)]; tensor var_13279_cast_fp16 = concat(axis = var_10347, interleave = var_13279_interleave_0, values = (var_13193_cast_fp16, var_13195_cast_fp16, var_13197_cast_fp16, var_13199_cast_fp16, var_13201_cast_fp16, var_13203_cast_fp16, var_13205_cast_fp16, var_13207_cast_fp16))[name = tensor("op_13279_cast_fp16")]; tensor var_13281_interleave_0 = const()[name = tensor("op_13281_interleave_0"), val = tensor(false)]; tensor var_13281_cast_fp16 = concat(axis = var_10347, interleave = var_13281_interleave_0, values = (var_13209_cast_fp16, var_13211_cast_fp16, var_13213_cast_fp16, var_13215_cast_fp16, var_13217_cast_fp16, var_13219_cast_fp16, var_13221_cast_fp16, var_13223_cast_fp16))[name = tensor("op_13281_cast_fp16")]; tensor var_13283_interleave_0 = const()[name = tensor("op_13283_interleave_0"), val = tensor(false)]; tensor var_13283_cast_fp16 = concat(axis = var_10347, interleave = var_13283_interleave_0, values = (var_13225_cast_fp16, var_13227_cast_fp16, var_13229_cast_fp16, var_13231_cast_fp16, var_13233_cast_fp16, var_13235_cast_fp16, var_13237_cast_fp16, var_13239_cast_fp16))[name = tensor("op_13283_cast_fp16")]; tensor var_13285_interleave_0 = const()[name = tensor("op_13285_interleave_0"), val = tensor(false)]; tensor var_13285_cast_fp16 = concat(axis = var_10347, interleave = var_13285_interleave_0, values = (var_13241_cast_fp16, var_13243_cast_fp16, var_13245_cast_fp16, var_13247_cast_fp16, var_13249_cast_fp16, var_13251_cast_fp16, var_13253_cast_fp16, var_13255_cast_fp16))[name = tensor("op_13285_cast_fp16")]; tensor var_13287_interleave_0 = const()[name = tensor("op_13287_interleave_0"), val = tensor(false)]; tensor var_13287_cast_fp16 = concat(axis = var_10347, interleave = var_13287_interleave_0, values = (var_13257_cast_fp16, var_13259_cast_fp16, var_13261_cast_fp16, var_13263_cast_fp16, var_13265_cast_fp16, var_13267_cast_fp16, var_13269_cast_fp16, var_13271_cast_fp16))[name = tensor("op_13287_cast_fp16")]; tensor input_493_interleave_0 = const()[name = tensor("input_493_interleave_0"), val = tensor(false)]; tensor input_493_cast_fp16 = concat(axis = var_10375, interleave = input_493_interleave_0, values = (var_13273_cast_fp16, var_13275_cast_fp16, var_13277_cast_fp16, var_13279_cast_fp16, var_13281_cast_fp16, var_13283_cast_fp16, var_13285_cast_fp16, var_13287_cast_fp16))[name = tensor("input_493_cast_fp16")]; tensor var_13293 = const()[name = tensor("op_13293"), val = tensor([1, 1])]; tensor var_13295 = const()[name = tensor("op_13295"), val = tensor([1, 1])]; tensor var_13297_pad_type_0 = const()[name = tensor("op_13297_pad_type_0"), val = tensor("custom")]; tensor var_13297_pad_0 = const()[name = tensor("op_13297_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1704357312)))]; tensor up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1704562176)))]; tensor var_13297_cast_fp16 = conv(bias = up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_13295, groups = var_10375, pad = var_13297_pad_0, pad_type = var_13297_pad_type_0, strides = var_13293, weight = up_blocks_3_attentions_1_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_493_cast_fp16)[name = tensor("op_13297_cast_fp16")]; tensor inputs_89_cast_fp16 = add(x = var_13297_cast_fp16, y = inputs_87_cast_fp16)[name = tensor("inputs_89_cast_fp16")]; tensor input_495_axes_0 = const()[name = tensor("input_495_axes_0"), val = tensor([1])]; tensor input_495_gamma_0_to_fp16 = const()[name = tensor("input_495_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1704562880)))]; tensor input_495_beta_0_to_fp16 = const()[name = tensor("input_495_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1704563584)))]; tensor var_13307_to_fp16 = const()[name = tensor("op_13307_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_495_cast_fp16 = layer_norm(axes = input_495_axes_0, beta = input_495_beta_0_to_fp16, epsilon = var_13307_to_fp16, gamma = input_495_gamma_0_to_fp16, x = inputs_89_cast_fp16)[name = tensor("input_495_cast_fp16")]; tensor var_13323 = const()[name = tensor("op_13323"), val = tensor([1, 1])]; tensor var_13325 = const()[name = tensor("op_13325"), val = tensor([1, 1])]; tensor var_13327_pad_type_0 = const()[name = tensor("op_13327_pad_type_0"), val = tensor("custom")]; tensor var_13327_pad_0 = const()[name = tensor("op_13327_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1704564288)))]; tensor up_blocks_3_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1706202752)))]; tensor var_13327_cast_fp16 = conv(bias = up_blocks_3_attentions_1_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_13325, groups = var_10375, pad = var_13327_pad_0, pad_type = var_13327_pad_type_0, strides = var_13323, weight = up_blocks_3_attentions_1_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_495_cast_fp16)[name = tensor("op_13327_cast_fp16")]; tensor var_13328_split_sizes_0 = const()[name = tensor("op_13328_split_sizes_0"), val = tensor([1280, 1280])]; tensor var_13328_axis_0 = const()[name = tensor("op_13328_axis_0"), val = tensor(1)]; tensor var_13328_cast_fp16_0, tensor var_13328_cast_fp16_1 = split(axis = var_13328_axis_0, split_sizes = var_13328_split_sizes_0, x = var_13327_cast_fp16)[name = tensor("op_13328_cast_fp16")]; tensor var_13330_mode_0 = const()[name = tensor("op_13330_mode_0"), val = tensor("EXACT")]; tensor var_13330_cast_fp16 = gelu(mode = var_13330_mode_0, x = var_13328_cast_fp16_1)[name = tensor("op_13330_cast_fp16")]; tensor input_497_cast_fp16 = mul(x = var_13328_cast_fp16_0, y = var_13330_cast_fp16)[name = tensor("input_497_cast_fp16")]; tensor var_13334 = const()[name = tensor("op_13334"), val = tensor([1, 1])]; tensor var_13336 = const()[name = tensor("op_13336"), val = tensor([1, 1])]; tensor var_13338_pad_type_0 = const()[name = tensor("op_13338_pad_type_0"), val = tensor("custom")]; tensor var_13338_pad_0 = const()[name = tensor("op_13338_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1706207936)))]; tensor up_blocks_3_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1707027200)))]; tensor var_13338_cast_fp16 = conv(bias = up_blocks_3_attentions_1_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_13336, groups = var_10375, pad = var_13338_pad_0, pad_type = var_13338_pad_type_0, strides = var_13334, weight = up_blocks_3_attentions_1_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_497_cast_fp16)[name = tensor("op_13338_cast_fp16")]; tensor hidden_states_313_cast_fp16 = add(x = var_13338_cast_fp16, y = inputs_89_cast_fp16)[name = tensor("hidden_states_313_cast_fp16")]; tensor var_13340 = const()[name = tensor("op_13340"), val = tensor([2, 320, 64, 64])]; tensor input_499_cast_fp16 = reshape(shape = var_13340, x = hidden_states_313_cast_fp16)[name = tensor("input_499_cast_fp16")]; tensor var_13344 = const()[name = tensor("op_13344"), val = tensor([1, 1])]; tensor var_13346 = const()[name = tensor("op_13346"), val = tensor([1, 1])]; tensor hidden_states_315_pad_type_0 = const()[name = tensor("hidden_states_315_pad_type_0"), val = tensor("custom")]; tensor hidden_states_315_pad_0 = const()[name = tensor("hidden_states_315_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_1_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1707027904)))]; tensor up_blocks_3_attentions_1_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_1_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1707232768)))]; tensor hidden_states_315_cast_fp16 = conv(bias = up_blocks_3_attentions_1_proj_out_bias_to_fp16, dilations = var_13346, groups = var_10375, pad = hidden_states_315_pad_0, pad_type = hidden_states_315_pad_type_0, strides = var_13344, weight = up_blocks_3_attentions_1_proj_out_weight_to_fp16, x = input_499_cast_fp16)[name = tensor("hidden_states_315_cast_fp16")]; tensor hidden_states_317_cast_fp16 = add(x = hidden_states_315_cast_fp16, y = hidden_states_303_cast_fp16)[name = tensor("hidden_states_317_cast_fp16")]; tensor input_501_interleave_0 = const()[name = tensor("input_501_interleave_0"), val = tensor(false)]; tensor input_501_cast_fp16 = concat(axis = var_10375, interleave = input_501_interleave_0, values = (hidden_states_317_cast_fp16, input_7_cast_fp16))[name = tensor("input_501_cast_fp16")]; tensor reshape_228_shape_0 = const()[name = tensor("reshape_228_shape_0"), val = tensor([2, 32, 20, 64, 64])]; tensor reshape_228_cast_fp16 = reshape(shape = reshape_228_shape_0, x = input_501_cast_fp16)[name = tensor("reshape_228_cast_fp16")]; tensor reduce_mean_171_axes_0 = const()[name = tensor("reduce_mean_171_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_171_keep_dims_0 = const()[name = tensor("reduce_mean_171_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_171_cast_fp16 = reduce_mean(axes = reduce_mean_171_axes_0, keep_dims = reduce_mean_171_keep_dims_0, x = reshape_228_cast_fp16)[name = tensor("reduce_mean_171_cast_fp16")]; tensor sub_114_cast_fp16 = sub(x = reshape_228_cast_fp16, y = reduce_mean_171_cast_fp16)[name = tensor("sub_114_cast_fp16")]; tensor square_57_cast_fp16 = square(x = sub_114_cast_fp16)[name = tensor("square_57_cast_fp16")]; tensor reduce_mean_173_axes_0 = const()[name = tensor("reduce_mean_173_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_173_keep_dims_0 = const()[name = tensor("reduce_mean_173_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_173_cast_fp16 = reduce_mean(axes = reduce_mean_173_axes_0, keep_dims = reduce_mean_173_keep_dims_0, x = square_57_cast_fp16)[name = tensor("reduce_mean_173_cast_fp16")]; tensor add_114_y_0_to_fp16 = const()[name = tensor("add_114_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_114_cast_fp16 = add(x = reduce_mean_173_cast_fp16, y = add_114_y_0_to_fp16)[name = tensor("add_114_cast_fp16")]; tensor sqrt_57_cast_fp16 = sqrt(x = add_114_cast_fp16)[name = tensor("sqrt_57_cast_fp16")]; tensor real_div_57_cast_fp16 = real_div(x = sub_114_cast_fp16, y = sqrt_57_cast_fp16)[name = tensor("real_div_57_cast_fp16")]; tensor reshape_229_shape_0 = const()[name = tensor("reshape_229_shape_0"), val = tensor([2, 640, 64, 64])]; tensor reshape_229_cast_fp16 = reshape(shape = reshape_229_shape_0, x = real_div_57_cast_fp16)[name = tensor("reshape_229_cast_fp16")]; tensor add_115_gamma_0_to_fp16 = const()[name = tensor("add_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1707233472)))]; tensor add_115_beta_0_to_fp16 = const()[name = tensor("add_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1707234816)))]; tensor add_115_epsilon_0_to_fp16 = const()[name = tensor("add_115_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_115_cast_fp16 = batch_norm(beta = add_115_beta_0_to_fp16, epsilon = add_115_epsilon_0_to_fp16, gamma = add_115_gamma_0_to_fp16, mean = add_15_mean_0_to_fp16, variance = add_15_variance_0_to_fp16, x = reshape_229_cast_fp16)[name = tensor("add_115_cast_fp16")]; tensor input_505_cast_fp16 = silu(x = add_115_cast_fp16)[name = tensor("input_505_cast_fp16")]; tensor var_13364 = const()[name = tensor("op_13364"), val = tensor([1, 1])]; tensor var_13366 = const()[name = tensor("op_13366"), val = tensor([1, 1])]; tensor hidden_states_319_pad_type_0 = const()[name = tensor("hidden_states_319_pad_type_0"), val = tensor("custom")]; tensor hidden_states_319_pad_0 = const()[name = tensor("hidden_states_319_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_3_resnets_2_conv1_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_2_conv1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1707236160)))]; tensor up_blocks_3_resnets_2_conv1_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_2_conv1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1710922624)))]; tensor hidden_states_319_cast_fp16 = conv(bias = up_blocks_3_resnets_2_conv1_bias_to_fp16, dilations = var_13366, groups = var_10375, pad = hidden_states_319_pad_0, pad_type = hidden_states_319_pad_type_0, strides = var_13364, weight = up_blocks_3_resnets_2_conv1_weight_to_fp16, x = input_505_cast_fp16)[name = tensor("hidden_states_319_cast_fp16")]; tensor var_13372 = const()[name = tensor("op_13372"), val = tensor([1, 1])]; tensor var_13374 = const()[name = tensor("op_13374"), val = tensor([1, 1])]; tensor temb_pad_type_0 = const()[name = tensor("temb_pad_type_0"), val = tensor("custom")]; tensor temb_pad_0 = const()[name = tensor("temb_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_resnets_2_time_emb_proj_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_2_time_emb_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1710923328)))]; tensor up_blocks_3_resnets_2_time_emb_proj_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_2_time_emb_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1711742592)))]; tensor temb_cast_fp16 = conv(bias = up_blocks_3_resnets_2_time_emb_proj_bias_to_fp16, dilations = var_13374, groups = var_10375, pad = temb_pad_0, pad_type = temb_pad_type_0, strides = var_13372, weight = up_blocks_3_resnets_2_time_emb_proj_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("temb_cast_fp16")]; tensor input_509_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = temb_cast_fp16)[name = tensor("input_509_cast_fp16")]; tensor reshape_232_shape_0 = const()[name = tensor("reshape_232_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_232_cast_fp16 = reshape(shape = reshape_232_shape_0, x = input_509_cast_fp16)[name = tensor("reshape_232_cast_fp16")]; tensor reduce_mean_174_axes_0 = const()[name = tensor("reduce_mean_174_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_174_keep_dims_0 = const()[name = tensor("reduce_mean_174_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_174_cast_fp16 = reduce_mean(axes = reduce_mean_174_axes_0, keep_dims = reduce_mean_174_keep_dims_0, x = reshape_232_cast_fp16)[name = tensor("reduce_mean_174_cast_fp16")]; tensor sub_116_cast_fp16 = sub(x = reshape_232_cast_fp16, y = reduce_mean_174_cast_fp16)[name = tensor("sub_116_cast_fp16")]; tensor square_58_cast_fp16 = square(x = sub_116_cast_fp16)[name = tensor("square_58_cast_fp16")]; tensor reduce_mean_176_axes_0 = const()[name = tensor("reduce_mean_176_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_176_keep_dims_0 = const()[name = tensor("reduce_mean_176_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_176_cast_fp16 = reduce_mean(axes = reduce_mean_176_axes_0, keep_dims = reduce_mean_176_keep_dims_0, x = square_58_cast_fp16)[name = tensor("reduce_mean_176_cast_fp16")]; tensor add_116_y_0_to_fp16 = const()[name = tensor("add_116_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_116_cast_fp16 = add(x = reduce_mean_176_cast_fp16, y = add_116_y_0_to_fp16)[name = tensor("add_116_cast_fp16")]; tensor sqrt_58_cast_fp16 = sqrt(x = add_116_cast_fp16)[name = tensor("sqrt_58_cast_fp16")]; tensor real_div_58_cast_fp16 = real_div(x = sub_116_cast_fp16, y = sqrt_58_cast_fp16)[name = tensor("real_div_58_cast_fp16")]; tensor reshape_233_shape_0 = const()[name = tensor("reshape_233_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_233_cast_fp16 = reshape(shape = reshape_233_shape_0, x = real_div_58_cast_fp16)[name = tensor("reshape_233_cast_fp16")]; tensor add_117_gamma_0_to_fp16 = const()[name = tensor("add_117_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1711743296)))]; tensor add_117_beta_0_to_fp16 = const()[name = tensor("add_117_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1711744000)))]; tensor add_117_epsilon_0_to_fp16 = const()[name = tensor("add_117_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_117_cast_fp16 = batch_norm(beta = add_117_beta_0_to_fp16, epsilon = add_117_epsilon_0_to_fp16, gamma = add_117_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_233_cast_fp16)[name = tensor("add_117_cast_fp16")]; tensor input_513_cast_fp16 = silu(x = add_117_cast_fp16)[name = tensor("input_513_cast_fp16")]; tensor var_13384 = const()[name = tensor("op_13384"), val = tensor([1, 1])]; tensor var_13386 = const()[name = tensor("op_13386"), val = tensor([1, 1])]; tensor hidden_states_321_pad_type_0 = const()[name = tensor("hidden_states_321_pad_type_0"), val = tensor("custom")]; tensor hidden_states_321_pad_0 = const()[name = tensor("hidden_states_321_pad_0"), val = tensor([1, 1, 1, 1])]; tensor up_blocks_3_resnets_2_conv2_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_2_conv2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1711744704)))]; tensor up_blocks_3_resnets_2_conv2_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_2_conv2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1713587968)))]; tensor hidden_states_321_cast_fp16 = conv(bias = up_blocks_3_resnets_2_conv2_bias_to_fp16, dilations = var_13386, groups = var_10375, pad = hidden_states_321_pad_0, pad_type = hidden_states_321_pad_type_0, strides = var_13384, weight = up_blocks_3_resnets_2_conv2_weight_to_fp16, x = input_513_cast_fp16)[name = tensor("hidden_states_321_cast_fp16")]; tensor var_13391 = const()[name = tensor("op_13391"), val = tensor([1, 1])]; tensor var_13393 = const()[name = tensor("op_13393"), val = tensor([1, 1])]; tensor x_pad_type_0 = const()[name = tensor("x_pad_type_0"), val = tensor("custom")]; tensor x_pad_0 = const()[name = tensor("x_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_resnets_2_conv_shortcut_weight_to_fp16 = const()[name = tensor("up_blocks_3_resnets_2_conv_shortcut_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1713588672)))]; tensor up_blocks_3_resnets_2_conv_shortcut_bias_to_fp16 = const()[name = tensor("up_blocks_3_resnets_2_conv_shortcut_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1713998336)))]; tensor x_cast_fp16 = conv(bias = up_blocks_3_resnets_2_conv_shortcut_bias_to_fp16, dilations = var_13393, groups = var_10375, pad = x_pad_0, pad_type = x_pad_type_0, strides = var_13391, weight = up_blocks_3_resnets_2_conv_shortcut_weight_to_fp16, x = input_501_cast_fp16)[name = tensor("x_cast_fp16")]; tensor hidden_states_323_cast_fp16 = add(x = x_cast_fp16, y = hidden_states_321_cast_fp16)[name = tensor("hidden_states_323_cast_fp16")]; tensor reshape_236_shape_0 = const()[name = tensor("reshape_236_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_236_cast_fp16 = reshape(shape = reshape_236_shape_0, x = hidden_states_323_cast_fp16)[name = tensor("reshape_236_cast_fp16")]; tensor reduce_mean_177_axes_0 = const()[name = tensor("reduce_mean_177_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_177_keep_dims_0 = const()[name = tensor("reduce_mean_177_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_177_cast_fp16 = reduce_mean(axes = reduce_mean_177_axes_0, keep_dims = reduce_mean_177_keep_dims_0, x = reshape_236_cast_fp16)[name = tensor("reduce_mean_177_cast_fp16")]; tensor sub_118_cast_fp16 = sub(x = reshape_236_cast_fp16, y = reduce_mean_177_cast_fp16)[name = tensor("sub_118_cast_fp16")]; tensor square_59_cast_fp16 = square(x = sub_118_cast_fp16)[name = tensor("square_59_cast_fp16")]; tensor reduce_mean_179_axes_0 = const()[name = tensor("reduce_mean_179_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_179_keep_dims_0 = const()[name = tensor("reduce_mean_179_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_179_cast_fp16 = reduce_mean(axes = reduce_mean_179_axes_0, keep_dims = reduce_mean_179_keep_dims_0, x = square_59_cast_fp16)[name = tensor("reduce_mean_179_cast_fp16")]; tensor add_118_y_0_to_fp16 = const()[name = tensor("add_118_y_0_to_fp16"), val = tensor(0x1.1p-20)]; tensor add_118_cast_fp16 = add(x = reduce_mean_179_cast_fp16, y = add_118_y_0_to_fp16)[name = tensor("add_118_cast_fp16")]; tensor sqrt_59_cast_fp16 = sqrt(x = add_118_cast_fp16)[name = tensor("sqrt_59_cast_fp16")]; tensor real_div_59_cast_fp16 = real_div(x = sub_118_cast_fp16, y = sqrt_59_cast_fp16)[name = tensor("real_div_59_cast_fp16")]; tensor reshape_237_shape_0 = const()[name = tensor("reshape_237_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_237_cast_fp16 = reshape(shape = reshape_237_shape_0, x = real_div_59_cast_fp16)[name = tensor("reshape_237_cast_fp16")]; tensor add_119_gamma_0_to_fp16 = const()[name = tensor("add_119_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1713999040)))]; tensor add_119_beta_0_to_fp16 = const()[name = tensor("add_119_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1713999744)))]; tensor add_119_epsilon_0_to_fp16 = const()[name = tensor("add_119_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_119_cast_fp16 = batch_norm(beta = add_119_beta_0_to_fp16, epsilon = add_119_epsilon_0_to_fp16, gamma = add_119_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_237_cast_fp16)[name = tensor("add_119_cast_fp16")]; tensor var_13413 = const()[name = tensor("op_13413"), val = tensor([1, 1])]; tensor var_13415 = const()[name = tensor("op_13415"), val = tensor([1, 1])]; tensor hidden_states_325_pad_type_0 = const()[name = tensor("hidden_states_325_pad_type_0"), val = tensor("custom")]; tensor hidden_states_325_pad_0 = const()[name = tensor("hidden_states_325_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_proj_in_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_proj_in_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1714000448)))]; tensor up_blocks_3_attentions_2_proj_in_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_proj_in_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1714205312)))]; tensor hidden_states_325_cast_fp16 = conv(bias = up_blocks_3_attentions_2_proj_in_bias_to_fp16, dilations = var_13415, groups = var_10375, pad = hidden_states_325_pad_0, pad_type = hidden_states_325_pad_type_0, strides = var_13413, weight = up_blocks_3_attentions_2_proj_in_weight_to_fp16, x = add_119_cast_fp16)[name = tensor("hidden_states_325_cast_fp16")]; tensor var_13420 = const()[name = tensor("op_13420"), val = tensor([2, 320, 1, 4096])]; tensor inputs_91_cast_fp16 = reshape(shape = var_13420, x = hidden_states_325_cast_fp16)[name = tensor("inputs_91_cast_fp16")]; tensor hidden_states_327_axes_0 = const()[name = tensor("hidden_states_327_axes_0"), val = tensor([1])]; tensor hidden_states_327_gamma_0_to_fp16 = const()[name = tensor("hidden_states_327_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1714206016)))]; tensor hidden_states_327_beta_0_to_fp16 = const()[name = tensor("hidden_states_327_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1714206720)))]; tensor var_13436_to_fp16 = const()[name = tensor("op_13436_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_327_cast_fp16 = layer_norm(axes = hidden_states_327_axes_0, beta = hidden_states_327_beta_0_to_fp16, epsilon = var_13436_to_fp16, gamma = hidden_states_327_gamma_0_to_fp16, x = inputs_91_cast_fp16)[name = tensor("hidden_states_327_cast_fp16")]; tensor var_13451 = const()[name = tensor("op_13451"), val = tensor([1, 1])]; tensor var_13453 = const()[name = tensor("op_13453"), val = tensor([1, 1])]; tensor q_61_pad_type_0 = const()[name = tensor("q_61_pad_type_0"), val = tensor("custom")]; tensor q_61_pad_0 = const()[name = tensor("q_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1714207424)))]; tensor q_61_cast_fp16 = conv(dilations = var_13453, groups = var_10375, pad = q_61_pad_0, pad_type = q_61_pad_type_0, strides = var_13451, weight = up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_q_weight_to_fp16, x = hidden_states_327_cast_fp16)[name = tensor("q_61_cast_fp16")]; tensor var_13457 = const()[name = tensor("op_13457"), val = tensor([1, 1])]; tensor var_13459 = const()[name = tensor("op_13459"), val = tensor([1, 1])]; tensor k_121_pad_type_0 = const()[name = tensor("k_121_pad_type_0"), val = tensor("custom")]; tensor k_121_pad_0 = const()[name = tensor("k_121_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1714412288)))]; tensor k_121_cast_fp16 = conv(dilations = var_13459, groups = var_10375, pad = k_121_pad_0, pad_type = k_121_pad_type_0, strides = var_13457, weight = up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_k_weight_to_fp16, x = hidden_states_327_cast_fp16)[name = tensor("k_121_cast_fp16")]; tensor var_13463 = const()[name = tensor("op_13463"), val = tensor([1, 1])]; tensor var_13465 = const()[name = tensor("op_13465"), val = tensor([1, 1])]; tensor v_61_pad_type_0 = const()[name = tensor("v_61_pad_type_0"), val = tensor("custom")]; tensor v_61_pad_0 = const()[name = tensor("v_61_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1714617152)))]; tensor v_61_cast_fp16 = conv(dilations = var_13465, groups = var_10375, pad = v_61_pad_0, pad_type = v_61_pad_type_0, strides = var_13463, weight = up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_v_weight_to_fp16, x = hidden_states_327_cast_fp16)[name = tensor("v_61_cast_fp16")]; tensor var_13469_begin_0 = const()[name = tensor("op_13469_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13469_end_0 = const()[name = tensor("op_13469_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13469_end_mask_0 = const()[name = tensor("op_13469_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13469_cast_fp16 = slice_by_index(begin = var_13469_begin_0, end = var_13469_end_0, end_mask = var_13469_end_mask_0, x = q_61_cast_fp16)[name = tensor("op_13469_cast_fp16")]; tensor var_13473_begin_0 = const()[name = tensor("op_13473_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_13473_end_0 = const()[name = tensor("op_13473_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_13473_end_mask_0 = const()[name = tensor("op_13473_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13473_cast_fp16 = slice_by_index(begin = var_13473_begin_0, end = var_13473_end_0, end_mask = var_13473_end_mask_0, x = q_61_cast_fp16)[name = tensor("op_13473_cast_fp16")]; tensor var_13477_begin_0 = const()[name = tensor("op_13477_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_13477_end_0 = const()[name = tensor("op_13477_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_13477_end_mask_0 = const()[name = tensor("op_13477_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13477_cast_fp16 = slice_by_index(begin = var_13477_begin_0, end = var_13477_end_0, end_mask = var_13477_end_mask_0, x = q_61_cast_fp16)[name = tensor("op_13477_cast_fp16")]; tensor var_13481_begin_0 = const()[name = tensor("op_13481_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_13481_end_0 = const()[name = tensor("op_13481_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_13481_end_mask_0 = const()[name = tensor("op_13481_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13481_cast_fp16 = slice_by_index(begin = var_13481_begin_0, end = var_13481_end_0, end_mask = var_13481_end_mask_0, x = q_61_cast_fp16)[name = tensor("op_13481_cast_fp16")]; tensor var_13485_begin_0 = const()[name = tensor("op_13485_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_13485_end_0 = const()[name = tensor("op_13485_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_13485_end_mask_0 = const()[name = tensor("op_13485_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13485_cast_fp16 = slice_by_index(begin = var_13485_begin_0, end = var_13485_end_0, end_mask = var_13485_end_mask_0, x = q_61_cast_fp16)[name = tensor("op_13485_cast_fp16")]; tensor var_13489_begin_0 = const()[name = tensor("op_13489_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_13489_end_0 = const()[name = tensor("op_13489_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_13489_end_mask_0 = const()[name = tensor("op_13489_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13489_cast_fp16 = slice_by_index(begin = var_13489_begin_0, end = var_13489_end_0, end_mask = var_13489_end_mask_0, x = q_61_cast_fp16)[name = tensor("op_13489_cast_fp16")]; tensor var_13493_begin_0 = const()[name = tensor("op_13493_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_13493_end_0 = const()[name = tensor("op_13493_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_13493_end_mask_0 = const()[name = tensor("op_13493_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13493_cast_fp16 = slice_by_index(begin = var_13493_begin_0, end = var_13493_end_0, end_mask = var_13493_end_mask_0, x = q_61_cast_fp16)[name = tensor("op_13493_cast_fp16")]; tensor var_13497_begin_0 = const()[name = tensor("op_13497_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_13497_end_0 = const()[name = tensor("op_13497_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_13497_end_mask_0 = const()[name = tensor("op_13497_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13497_cast_fp16 = slice_by_index(begin = var_13497_begin_0, end = var_13497_end_0, end_mask = var_13497_end_mask_0, x = q_61_cast_fp16)[name = tensor("op_13497_cast_fp16")]; tensor var_13500_begin_0 = const()[name = tensor("op_13500_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13500_end_0 = const()[name = tensor("op_13500_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_13500_end_mask_0 = const()[name = tensor("op_13500_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13500_cast_fp16 = slice_by_index(begin = var_13500_begin_0, end = var_13500_end_0, end_mask = var_13500_end_mask_0, x = var_13469_cast_fp16)[name = tensor("op_13500_cast_fp16")]; tensor var_13501_begin_0 = const()[name = tensor("op_13501_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13501_end_0 = const()[name = tensor("op_13501_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_13501_end_mask_0 = const()[name = tensor("op_13501_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13501_cast_fp16 = slice_by_index(begin = var_13501_begin_0, end = var_13501_end_0, end_mask = var_13501_end_mask_0, x = var_13469_cast_fp16)[name = tensor("op_13501_cast_fp16")]; tensor var_13502_begin_0 = const()[name = tensor("op_13502_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13502_end_0 = const()[name = tensor("op_13502_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_13502_end_mask_0 = const()[name = tensor("op_13502_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13502_cast_fp16 = slice_by_index(begin = var_13502_begin_0, end = var_13502_end_0, end_mask = var_13502_end_mask_0, x = var_13469_cast_fp16)[name = tensor("op_13502_cast_fp16")]; tensor var_13503_begin_0 = const()[name = tensor("op_13503_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_13503_end_0 = const()[name = tensor("op_13503_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_13503_end_mask_0 = const()[name = tensor("op_13503_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13503_cast_fp16 = slice_by_index(begin = var_13503_begin_0, end = var_13503_end_0, end_mask = var_13503_end_mask_0, x = var_13469_cast_fp16)[name = tensor("op_13503_cast_fp16")]; tensor var_13504_begin_0 = const()[name = tensor("op_13504_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_13504_end_0 = const()[name = tensor("op_13504_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_13504_end_mask_0 = const()[name = tensor("op_13504_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13504_cast_fp16 = slice_by_index(begin = var_13504_begin_0, end = var_13504_end_0, end_mask = var_13504_end_mask_0, x = var_13469_cast_fp16)[name = tensor("op_13504_cast_fp16")]; tensor var_13505_begin_0 = const()[name = tensor("op_13505_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_13505_end_0 = const()[name = tensor("op_13505_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_13505_end_mask_0 = const()[name = tensor("op_13505_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13505_cast_fp16 = slice_by_index(begin = var_13505_begin_0, end = var_13505_end_0, end_mask = var_13505_end_mask_0, x = var_13469_cast_fp16)[name = tensor("op_13505_cast_fp16")]; tensor var_13506_begin_0 = const()[name = tensor("op_13506_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_13506_end_0 = const()[name = tensor("op_13506_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_13506_end_mask_0 = const()[name = tensor("op_13506_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13506_cast_fp16 = slice_by_index(begin = var_13506_begin_0, end = var_13506_end_0, end_mask = var_13506_end_mask_0, x = var_13469_cast_fp16)[name = tensor("op_13506_cast_fp16")]; tensor var_13507_begin_0 = const()[name = tensor("op_13507_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_13507_end_0 = const()[name = tensor("op_13507_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13507_end_mask_0 = const()[name = tensor("op_13507_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13507_cast_fp16 = slice_by_index(begin = var_13507_begin_0, end = var_13507_end_0, end_mask = var_13507_end_mask_0, x = var_13469_cast_fp16)[name = tensor("op_13507_cast_fp16")]; tensor var_13508_begin_0 = const()[name = tensor("op_13508_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13508_end_0 = const()[name = tensor("op_13508_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_13508_end_mask_0 = const()[name = tensor("op_13508_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13508_cast_fp16 = slice_by_index(begin = var_13508_begin_0, end = var_13508_end_0, end_mask = var_13508_end_mask_0, x = var_13473_cast_fp16)[name = tensor("op_13508_cast_fp16")]; tensor var_13509_begin_0 = const()[name = tensor("op_13509_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13509_end_0 = const()[name = tensor("op_13509_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_13509_end_mask_0 = const()[name = tensor("op_13509_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13509_cast_fp16 = slice_by_index(begin = var_13509_begin_0, end = var_13509_end_0, end_mask = var_13509_end_mask_0, x = var_13473_cast_fp16)[name = tensor("op_13509_cast_fp16")]; tensor var_13510_begin_0 = const()[name = tensor("op_13510_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13510_end_0 = const()[name = tensor("op_13510_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_13510_end_mask_0 = const()[name = tensor("op_13510_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13510_cast_fp16 = slice_by_index(begin = var_13510_begin_0, end = var_13510_end_0, end_mask = var_13510_end_mask_0, x = var_13473_cast_fp16)[name = tensor("op_13510_cast_fp16")]; tensor var_13511_begin_0 = const()[name = tensor("op_13511_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_13511_end_0 = const()[name = tensor("op_13511_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_13511_end_mask_0 = const()[name = tensor("op_13511_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13511_cast_fp16 = slice_by_index(begin = var_13511_begin_0, end = var_13511_end_0, end_mask = var_13511_end_mask_0, x = var_13473_cast_fp16)[name = tensor("op_13511_cast_fp16")]; tensor var_13512_begin_0 = const()[name = tensor("op_13512_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_13512_end_0 = const()[name = tensor("op_13512_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_13512_end_mask_0 = const()[name = tensor("op_13512_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13512_cast_fp16 = slice_by_index(begin = var_13512_begin_0, end = var_13512_end_0, end_mask = var_13512_end_mask_0, x = var_13473_cast_fp16)[name = tensor("op_13512_cast_fp16")]; tensor var_13513_begin_0 = const()[name = tensor("op_13513_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_13513_end_0 = const()[name = tensor("op_13513_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_13513_end_mask_0 = const()[name = tensor("op_13513_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13513_cast_fp16 = slice_by_index(begin = var_13513_begin_0, end = var_13513_end_0, end_mask = var_13513_end_mask_0, x = var_13473_cast_fp16)[name = tensor("op_13513_cast_fp16")]; tensor var_13514_begin_0 = const()[name = tensor("op_13514_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_13514_end_0 = const()[name = tensor("op_13514_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_13514_end_mask_0 = const()[name = tensor("op_13514_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13514_cast_fp16 = slice_by_index(begin = var_13514_begin_0, end = var_13514_end_0, end_mask = var_13514_end_mask_0, x = var_13473_cast_fp16)[name = tensor("op_13514_cast_fp16")]; tensor var_13515_begin_0 = const()[name = tensor("op_13515_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_13515_end_0 = const()[name = tensor("op_13515_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13515_end_mask_0 = const()[name = tensor("op_13515_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13515_cast_fp16 = slice_by_index(begin = var_13515_begin_0, end = var_13515_end_0, end_mask = var_13515_end_mask_0, x = var_13473_cast_fp16)[name = tensor("op_13515_cast_fp16")]; tensor var_13516_begin_0 = const()[name = tensor("op_13516_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13516_end_0 = const()[name = tensor("op_13516_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_13516_end_mask_0 = const()[name = tensor("op_13516_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13516_cast_fp16 = slice_by_index(begin = var_13516_begin_0, end = var_13516_end_0, end_mask = var_13516_end_mask_0, x = var_13477_cast_fp16)[name = tensor("op_13516_cast_fp16")]; tensor var_13517_begin_0 = const()[name = tensor("op_13517_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13517_end_0 = const()[name = tensor("op_13517_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_13517_end_mask_0 = const()[name = tensor("op_13517_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13517_cast_fp16 = slice_by_index(begin = var_13517_begin_0, end = var_13517_end_0, end_mask = var_13517_end_mask_0, x = var_13477_cast_fp16)[name = tensor("op_13517_cast_fp16")]; tensor var_13518_begin_0 = const()[name = tensor("op_13518_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13518_end_0 = const()[name = tensor("op_13518_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_13518_end_mask_0 = const()[name = tensor("op_13518_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13518_cast_fp16 = slice_by_index(begin = var_13518_begin_0, end = var_13518_end_0, end_mask = var_13518_end_mask_0, x = var_13477_cast_fp16)[name = tensor("op_13518_cast_fp16")]; tensor var_13519_begin_0 = const()[name = tensor("op_13519_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_13519_end_0 = const()[name = tensor("op_13519_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_13519_end_mask_0 = const()[name = tensor("op_13519_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13519_cast_fp16 = slice_by_index(begin = var_13519_begin_0, end = var_13519_end_0, end_mask = var_13519_end_mask_0, x = var_13477_cast_fp16)[name = tensor("op_13519_cast_fp16")]; tensor var_13520_begin_0 = const()[name = tensor("op_13520_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_13520_end_0 = const()[name = tensor("op_13520_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_13520_end_mask_0 = const()[name = tensor("op_13520_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13520_cast_fp16 = slice_by_index(begin = var_13520_begin_0, end = var_13520_end_0, end_mask = var_13520_end_mask_0, x = var_13477_cast_fp16)[name = tensor("op_13520_cast_fp16")]; tensor var_13521_begin_0 = const()[name = tensor("op_13521_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_13521_end_0 = const()[name = tensor("op_13521_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_13521_end_mask_0 = const()[name = tensor("op_13521_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13521_cast_fp16 = slice_by_index(begin = var_13521_begin_0, end = var_13521_end_0, end_mask = var_13521_end_mask_0, x = var_13477_cast_fp16)[name = tensor("op_13521_cast_fp16")]; tensor var_13522_begin_0 = const()[name = tensor("op_13522_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_13522_end_0 = const()[name = tensor("op_13522_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_13522_end_mask_0 = const()[name = tensor("op_13522_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13522_cast_fp16 = slice_by_index(begin = var_13522_begin_0, end = var_13522_end_0, end_mask = var_13522_end_mask_0, x = var_13477_cast_fp16)[name = tensor("op_13522_cast_fp16")]; tensor var_13523_begin_0 = const()[name = tensor("op_13523_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_13523_end_0 = const()[name = tensor("op_13523_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13523_end_mask_0 = const()[name = tensor("op_13523_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13523_cast_fp16 = slice_by_index(begin = var_13523_begin_0, end = var_13523_end_0, end_mask = var_13523_end_mask_0, x = var_13477_cast_fp16)[name = tensor("op_13523_cast_fp16")]; tensor var_13524_begin_0 = const()[name = tensor("op_13524_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13524_end_0 = const()[name = tensor("op_13524_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_13524_end_mask_0 = const()[name = tensor("op_13524_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13524_cast_fp16 = slice_by_index(begin = var_13524_begin_0, end = var_13524_end_0, end_mask = var_13524_end_mask_0, x = var_13481_cast_fp16)[name = tensor("op_13524_cast_fp16")]; tensor var_13525_begin_0 = const()[name = tensor("op_13525_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13525_end_0 = const()[name = tensor("op_13525_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_13525_end_mask_0 = const()[name = tensor("op_13525_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13525_cast_fp16 = slice_by_index(begin = var_13525_begin_0, end = var_13525_end_0, end_mask = var_13525_end_mask_0, x = var_13481_cast_fp16)[name = tensor("op_13525_cast_fp16")]; tensor var_13526_begin_0 = const()[name = tensor("op_13526_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13526_end_0 = const()[name = tensor("op_13526_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_13526_end_mask_0 = const()[name = tensor("op_13526_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13526_cast_fp16 = slice_by_index(begin = var_13526_begin_0, end = var_13526_end_0, end_mask = var_13526_end_mask_0, x = var_13481_cast_fp16)[name = tensor("op_13526_cast_fp16")]; tensor var_13527_begin_0 = const()[name = tensor("op_13527_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_13527_end_0 = const()[name = tensor("op_13527_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_13527_end_mask_0 = const()[name = tensor("op_13527_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13527_cast_fp16 = slice_by_index(begin = var_13527_begin_0, end = var_13527_end_0, end_mask = var_13527_end_mask_0, x = var_13481_cast_fp16)[name = tensor("op_13527_cast_fp16")]; tensor var_13528_begin_0 = const()[name = tensor("op_13528_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_13528_end_0 = const()[name = tensor("op_13528_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_13528_end_mask_0 = const()[name = tensor("op_13528_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13528_cast_fp16 = slice_by_index(begin = var_13528_begin_0, end = var_13528_end_0, end_mask = var_13528_end_mask_0, x = var_13481_cast_fp16)[name = tensor("op_13528_cast_fp16")]; tensor var_13529_begin_0 = const()[name = tensor("op_13529_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_13529_end_0 = const()[name = tensor("op_13529_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_13529_end_mask_0 = const()[name = tensor("op_13529_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13529_cast_fp16 = slice_by_index(begin = var_13529_begin_0, end = var_13529_end_0, end_mask = var_13529_end_mask_0, x = var_13481_cast_fp16)[name = tensor("op_13529_cast_fp16")]; tensor var_13530_begin_0 = const()[name = tensor("op_13530_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_13530_end_0 = const()[name = tensor("op_13530_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_13530_end_mask_0 = const()[name = tensor("op_13530_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13530_cast_fp16 = slice_by_index(begin = var_13530_begin_0, end = var_13530_end_0, end_mask = var_13530_end_mask_0, x = var_13481_cast_fp16)[name = tensor("op_13530_cast_fp16")]; tensor var_13531_begin_0 = const()[name = tensor("op_13531_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_13531_end_0 = const()[name = tensor("op_13531_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13531_end_mask_0 = const()[name = tensor("op_13531_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13531_cast_fp16 = slice_by_index(begin = var_13531_begin_0, end = var_13531_end_0, end_mask = var_13531_end_mask_0, x = var_13481_cast_fp16)[name = tensor("op_13531_cast_fp16")]; tensor var_13532_begin_0 = const()[name = tensor("op_13532_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13532_end_0 = const()[name = tensor("op_13532_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_13532_end_mask_0 = const()[name = tensor("op_13532_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13532_cast_fp16 = slice_by_index(begin = var_13532_begin_0, end = var_13532_end_0, end_mask = var_13532_end_mask_0, x = var_13485_cast_fp16)[name = tensor("op_13532_cast_fp16")]; tensor var_13533_begin_0 = const()[name = tensor("op_13533_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13533_end_0 = const()[name = tensor("op_13533_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_13533_end_mask_0 = const()[name = tensor("op_13533_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13533_cast_fp16 = slice_by_index(begin = var_13533_begin_0, end = var_13533_end_0, end_mask = var_13533_end_mask_0, x = var_13485_cast_fp16)[name = tensor("op_13533_cast_fp16")]; tensor var_13534_begin_0 = const()[name = tensor("op_13534_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13534_end_0 = const()[name = tensor("op_13534_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_13534_end_mask_0 = const()[name = tensor("op_13534_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13534_cast_fp16 = slice_by_index(begin = var_13534_begin_0, end = var_13534_end_0, end_mask = var_13534_end_mask_0, x = var_13485_cast_fp16)[name = tensor("op_13534_cast_fp16")]; tensor var_13535_begin_0 = const()[name = tensor("op_13535_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_13535_end_0 = const()[name = tensor("op_13535_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_13535_end_mask_0 = const()[name = tensor("op_13535_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13535_cast_fp16 = slice_by_index(begin = var_13535_begin_0, end = var_13535_end_0, end_mask = var_13535_end_mask_0, x = var_13485_cast_fp16)[name = tensor("op_13535_cast_fp16")]; tensor var_13536_begin_0 = const()[name = tensor("op_13536_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_13536_end_0 = const()[name = tensor("op_13536_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_13536_end_mask_0 = const()[name = tensor("op_13536_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13536_cast_fp16 = slice_by_index(begin = var_13536_begin_0, end = var_13536_end_0, end_mask = var_13536_end_mask_0, x = var_13485_cast_fp16)[name = tensor("op_13536_cast_fp16")]; tensor var_13537_begin_0 = const()[name = tensor("op_13537_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_13537_end_0 = const()[name = tensor("op_13537_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_13537_end_mask_0 = const()[name = tensor("op_13537_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13537_cast_fp16 = slice_by_index(begin = var_13537_begin_0, end = var_13537_end_0, end_mask = var_13537_end_mask_0, x = var_13485_cast_fp16)[name = tensor("op_13537_cast_fp16")]; tensor var_13538_begin_0 = const()[name = tensor("op_13538_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_13538_end_0 = const()[name = tensor("op_13538_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_13538_end_mask_0 = const()[name = tensor("op_13538_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13538_cast_fp16 = slice_by_index(begin = var_13538_begin_0, end = var_13538_end_0, end_mask = var_13538_end_mask_0, x = var_13485_cast_fp16)[name = tensor("op_13538_cast_fp16")]; tensor var_13539_begin_0 = const()[name = tensor("op_13539_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_13539_end_0 = const()[name = tensor("op_13539_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13539_end_mask_0 = const()[name = tensor("op_13539_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13539_cast_fp16 = slice_by_index(begin = var_13539_begin_0, end = var_13539_end_0, end_mask = var_13539_end_mask_0, x = var_13485_cast_fp16)[name = tensor("op_13539_cast_fp16")]; tensor var_13540_begin_0 = const()[name = tensor("op_13540_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13540_end_0 = const()[name = tensor("op_13540_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_13540_end_mask_0 = const()[name = tensor("op_13540_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13540_cast_fp16 = slice_by_index(begin = var_13540_begin_0, end = var_13540_end_0, end_mask = var_13540_end_mask_0, x = var_13489_cast_fp16)[name = tensor("op_13540_cast_fp16")]; tensor var_13541_begin_0 = const()[name = tensor("op_13541_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13541_end_0 = const()[name = tensor("op_13541_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_13541_end_mask_0 = const()[name = tensor("op_13541_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13541_cast_fp16 = slice_by_index(begin = var_13541_begin_0, end = var_13541_end_0, end_mask = var_13541_end_mask_0, x = var_13489_cast_fp16)[name = tensor("op_13541_cast_fp16")]; tensor var_13542_begin_0 = const()[name = tensor("op_13542_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13542_end_0 = const()[name = tensor("op_13542_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_13542_end_mask_0 = const()[name = tensor("op_13542_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13542_cast_fp16 = slice_by_index(begin = var_13542_begin_0, end = var_13542_end_0, end_mask = var_13542_end_mask_0, x = var_13489_cast_fp16)[name = tensor("op_13542_cast_fp16")]; tensor var_13543_begin_0 = const()[name = tensor("op_13543_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_13543_end_0 = const()[name = tensor("op_13543_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_13543_end_mask_0 = const()[name = tensor("op_13543_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13543_cast_fp16 = slice_by_index(begin = var_13543_begin_0, end = var_13543_end_0, end_mask = var_13543_end_mask_0, x = var_13489_cast_fp16)[name = tensor("op_13543_cast_fp16")]; tensor var_13544_begin_0 = const()[name = tensor("op_13544_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_13544_end_0 = const()[name = tensor("op_13544_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_13544_end_mask_0 = const()[name = tensor("op_13544_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13544_cast_fp16 = slice_by_index(begin = var_13544_begin_0, end = var_13544_end_0, end_mask = var_13544_end_mask_0, x = var_13489_cast_fp16)[name = tensor("op_13544_cast_fp16")]; tensor var_13545_begin_0 = const()[name = tensor("op_13545_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_13545_end_0 = const()[name = tensor("op_13545_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_13545_end_mask_0 = const()[name = tensor("op_13545_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13545_cast_fp16 = slice_by_index(begin = var_13545_begin_0, end = var_13545_end_0, end_mask = var_13545_end_mask_0, x = var_13489_cast_fp16)[name = tensor("op_13545_cast_fp16")]; tensor var_13546_begin_0 = const()[name = tensor("op_13546_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_13546_end_0 = const()[name = tensor("op_13546_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_13546_end_mask_0 = const()[name = tensor("op_13546_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13546_cast_fp16 = slice_by_index(begin = var_13546_begin_0, end = var_13546_end_0, end_mask = var_13546_end_mask_0, x = var_13489_cast_fp16)[name = tensor("op_13546_cast_fp16")]; tensor var_13547_begin_0 = const()[name = tensor("op_13547_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_13547_end_0 = const()[name = tensor("op_13547_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13547_end_mask_0 = const()[name = tensor("op_13547_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13547_cast_fp16 = slice_by_index(begin = var_13547_begin_0, end = var_13547_end_0, end_mask = var_13547_end_mask_0, x = var_13489_cast_fp16)[name = tensor("op_13547_cast_fp16")]; tensor var_13548_begin_0 = const()[name = tensor("op_13548_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13548_end_0 = const()[name = tensor("op_13548_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_13548_end_mask_0 = const()[name = tensor("op_13548_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13548_cast_fp16 = slice_by_index(begin = var_13548_begin_0, end = var_13548_end_0, end_mask = var_13548_end_mask_0, x = var_13493_cast_fp16)[name = tensor("op_13548_cast_fp16")]; tensor var_13549_begin_0 = const()[name = tensor("op_13549_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13549_end_0 = const()[name = tensor("op_13549_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_13549_end_mask_0 = const()[name = tensor("op_13549_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13549_cast_fp16 = slice_by_index(begin = var_13549_begin_0, end = var_13549_end_0, end_mask = var_13549_end_mask_0, x = var_13493_cast_fp16)[name = tensor("op_13549_cast_fp16")]; tensor var_13550_begin_0 = const()[name = tensor("op_13550_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13550_end_0 = const()[name = tensor("op_13550_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_13550_end_mask_0 = const()[name = tensor("op_13550_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13550_cast_fp16 = slice_by_index(begin = var_13550_begin_0, end = var_13550_end_0, end_mask = var_13550_end_mask_0, x = var_13493_cast_fp16)[name = tensor("op_13550_cast_fp16")]; tensor var_13551_begin_0 = const()[name = tensor("op_13551_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_13551_end_0 = const()[name = tensor("op_13551_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_13551_end_mask_0 = const()[name = tensor("op_13551_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13551_cast_fp16 = slice_by_index(begin = var_13551_begin_0, end = var_13551_end_0, end_mask = var_13551_end_mask_0, x = var_13493_cast_fp16)[name = tensor("op_13551_cast_fp16")]; tensor var_13552_begin_0 = const()[name = tensor("op_13552_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_13552_end_0 = const()[name = tensor("op_13552_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_13552_end_mask_0 = const()[name = tensor("op_13552_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13552_cast_fp16 = slice_by_index(begin = var_13552_begin_0, end = var_13552_end_0, end_mask = var_13552_end_mask_0, x = var_13493_cast_fp16)[name = tensor("op_13552_cast_fp16")]; tensor var_13553_begin_0 = const()[name = tensor("op_13553_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_13553_end_0 = const()[name = tensor("op_13553_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_13553_end_mask_0 = const()[name = tensor("op_13553_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13553_cast_fp16 = slice_by_index(begin = var_13553_begin_0, end = var_13553_end_0, end_mask = var_13553_end_mask_0, x = var_13493_cast_fp16)[name = tensor("op_13553_cast_fp16")]; tensor var_13554_begin_0 = const()[name = tensor("op_13554_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_13554_end_0 = const()[name = tensor("op_13554_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_13554_end_mask_0 = const()[name = tensor("op_13554_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13554_cast_fp16 = slice_by_index(begin = var_13554_begin_0, end = var_13554_end_0, end_mask = var_13554_end_mask_0, x = var_13493_cast_fp16)[name = tensor("op_13554_cast_fp16")]; tensor var_13555_begin_0 = const()[name = tensor("op_13555_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_13555_end_0 = const()[name = tensor("op_13555_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13555_end_mask_0 = const()[name = tensor("op_13555_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13555_cast_fp16 = slice_by_index(begin = var_13555_begin_0, end = var_13555_end_0, end_mask = var_13555_end_mask_0, x = var_13493_cast_fp16)[name = tensor("op_13555_cast_fp16")]; tensor var_13556_begin_0 = const()[name = tensor("op_13556_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13556_end_0 = const()[name = tensor("op_13556_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_13556_end_mask_0 = const()[name = tensor("op_13556_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13556_cast_fp16 = slice_by_index(begin = var_13556_begin_0, end = var_13556_end_0, end_mask = var_13556_end_mask_0, x = var_13497_cast_fp16)[name = tensor("op_13556_cast_fp16")]; tensor var_13557_begin_0 = const()[name = tensor("op_13557_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13557_end_0 = const()[name = tensor("op_13557_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_13557_end_mask_0 = const()[name = tensor("op_13557_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13557_cast_fp16 = slice_by_index(begin = var_13557_begin_0, end = var_13557_end_0, end_mask = var_13557_end_mask_0, x = var_13497_cast_fp16)[name = tensor("op_13557_cast_fp16")]; tensor var_13558_begin_0 = const()[name = tensor("op_13558_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13558_end_0 = const()[name = tensor("op_13558_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_13558_end_mask_0 = const()[name = tensor("op_13558_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13558_cast_fp16 = slice_by_index(begin = var_13558_begin_0, end = var_13558_end_0, end_mask = var_13558_end_mask_0, x = var_13497_cast_fp16)[name = tensor("op_13558_cast_fp16")]; tensor var_13559_begin_0 = const()[name = tensor("op_13559_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_13559_end_0 = const()[name = tensor("op_13559_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_13559_end_mask_0 = const()[name = tensor("op_13559_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13559_cast_fp16 = slice_by_index(begin = var_13559_begin_0, end = var_13559_end_0, end_mask = var_13559_end_mask_0, x = var_13497_cast_fp16)[name = tensor("op_13559_cast_fp16")]; tensor var_13560_begin_0 = const()[name = tensor("op_13560_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_13560_end_0 = const()[name = tensor("op_13560_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_13560_end_mask_0 = const()[name = tensor("op_13560_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13560_cast_fp16 = slice_by_index(begin = var_13560_begin_0, end = var_13560_end_0, end_mask = var_13560_end_mask_0, x = var_13497_cast_fp16)[name = tensor("op_13560_cast_fp16")]; tensor var_13561_begin_0 = const()[name = tensor("op_13561_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_13561_end_0 = const()[name = tensor("op_13561_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_13561_end_mask_0 = const()[name = tensor("op_13561_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13561_cast_fp16 = slice_by_index(begin = var_13561_begin_0, end = var_13561_end_0, end_mask = var_13561_end_mask_0, x = var_13497_cast_fp16)[name = tensor("op_13561_cast_fp16")]; tensor var_13562_begin_0 = const()[name = tensor("op_13562_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_13562_end_0 = const()[name = tensor("op_13562_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_13562_end_mask_0 = const()[name = tensor("op_13562_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13562_cast_fp16 = slice_by_index(begin = var_13562_begin_0, end = var_13562_end_0, end_mask = var_13562_end_mask_0, x = var_13497_cast_fp16)[name = tensor("op_13562_cast_fp16")]; tensor var_13563_begin_0 = const()[name = tensor("op_13563_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_13563_end_0 = const()[name = tensor("op_13563_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13563_end_mask_0 = const()[name = tensor("op_13563_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13563_cast_fp16 = slice_by_index(begin = var_13563_begin_0, end = var_13563_end_0, end_mask = var_13563_end_mask_0, x = var_13497_cast_fp16)[name = tensor("op_13563_cast_fp16")]; tensor k_123_perm_0 = const()[name = tensor("k_123_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_13568_begin_0 = const()[name = tensor("op_13568_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13568_end_0 = const()[name = tensor("op_13568_end_0"), val = tensor([2, 4096, 1, 40])]; tensor var_13568_end_mask_0 = const()[name = tensor("op_13568_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_1 = transpose(perm = k_123_perm_0, x = k_121_cast_fp16)[name = tensor("transpose_1")]; tensor var_13568_cast_fp16 = slice_by_index(begin = var_13568_begin_0, end = var_13568_end_0, end_mask = var_13568_end_mask_0, x = transpose_1)[name = tensor("op_13568_cast_fp16")]; tensor var_13572_begin_0 = const()[name = tensor("op_13572_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_13572_end_0 = const()[name = tensor("op_13572_end_0"), val = tensor([2, 4096, 1, 80])]; tensor var_13572_end_mask_0 = const()[name = tensor("op_13572_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13572_cast_fp16 = slice_by_index(begin = var_13572_begin_0, end = var_13572_end_0, end_mask = var_13572_end_mask_0, x = transpose_1)[name = tensor("op_13572_cast_fp16")]; tensor var_13576_begin_0 = const()[name = tensor("op_13576_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_13576_end_0 = const()[name = tensor("op_13576_end_0"), val = tensor([2, 4096, 1, 120])]; tensor var_13576_end_mask_0 = const()[name = tensor("op_13576_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13576_cast_fp16 = slice_by_index(begin = var_13576_begin_0, end = var_13576_end_0, end_mask = var_13576_end_mask_0, x = transpose_1)[name = tensor("op_13576_cast_fp16")]; tensor var_13580_begin_0 = const()[name = tensor("op_13580_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_13580_end_0 = const()[name = tensor("op_13580_end_0"), val = tensor([2, 4096, 1, 160])]; tensor var_13580_end_mask_0 = const()[name = tensor("op_13580_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13580_cast_fp16 = slice_by_index(begin = var_13580_begin_0, end = var_13580_end_0, end_mask = var_13580_end_mask_0, x = transpose_1)[name = tensor("op_13580_cast_fp16")]; tensor var_13584_begin_0 = const()[name = tensor("op_13584_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_13584_end_0 = const()[name = tensor("op_13584_end_0"), val = tensor([2, 4096, 1, 200])]; tensor var_13584_end_mask_0 = const()[name = tensor("op_13584_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13584_cast_fp16 = slice_by_index(begin = var_13584_begin_0, end = var_13584_end_0, end_mask = var_13584_end_mask_0, x = transpose_1)[name = tensor("op_13584_cast_fp16")]; tensor var_13588_begin_0 = const()[name = tensor("op_13588_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_13588_end_0 = const()[name = tensor("op_13588_end_0"), val = tensor([2, 4096, 1, 240])]; tensor var_13588_end_mask_0 = const()[name = tensor("op_13588_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13588_cast_fp16 = slice_by_index(begin = var_13588_begin_0, end = var_13588_end_0, end_mask = var_13588_end_mask_0, x = transpose_1)[name = tensor("op_13588_cast_fp16")]; tensor var_13592_begin_0 = const()[name = tensor("op_13592_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_13592_end_0 = const()[name = tensor("op_13592_end_0"), val = tensor([2, 4096, 1, 280])]; tensor var_13592_end_mask_0 = const()[name = tensor("op_13592_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13592_cast_fp16 = slice_by_index(begin = var_13592_begin_0, end = var_13592_end_0, end_mask = var_13592_end_mask_0, x = transpose_1)[name = tensor("op_13592_cast_fp16")]; tensor var_13596_begin_0 = const()[name = tensor("op_13596_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_13596_end_0 = const()[name = tensor("op_13596_end_0"), val = tensor([2, 4096, 1, 320])]; tensor var_13596_end_mask_0 = const()[name = tensor("op_13596_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13596_cast_fp16 = slice_by_index(begin = var_13596_begin_0, end = var_13596_end_0, end_mask = var_13596_end_mask_0, x = transpose_1)[name = tensor("op_13596_cast_fp16")]; tensor var_13598_begin_0 = const()[name = tensor("op_13598_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13598_end_0 = const()[name = tensor("op_13598_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_13598_end_mask_0 = const()[name = tensor("op_13598_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13598_cast_fp16 = slice_by_index(begin = var_13598_begin_0, end = var_13598_end_0, end_mask = var_13598_end_mask_0, x = v_61_cast_fp16)[name = tensor("op_13598_cast_fp16")]; tensor var_13602_begin_0 = const()[name = tensor("op_13602_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_13602_end_0 = const()[name = tensor("op_13602_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_13602_end_mask_0 = const()[name = tensor("op_13602_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13602_cast_fp16 = slice_by_index(begin = var_13602_begin_0, end = var_13602_end_0, end_mask = var_13602_end_mask_0, x = v_61_cast_fp16)[name = tensor("op_13602_cast_fp16")]; tensor var_13606_begin_0 = const()[name = tensor("op_13606_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_13606_end_0 = const()[name = tensor("op_13606_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_13606_end_mask_0 = const()[name = tensor("op_13606_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13606_cast_fp16 = slice_by_index(begin = var_13606_begin_0, end = var_13606_end_0, end_mask = var_13606_end_mask_0, x = v_61_cast_fp16)[name = tensor("op_13606_cast_fp16")]; tensor var_13610_begin_0 = const()[name = tensor("op_13610_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_13610_end_0 = const()[name = tensor("op_13610_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_13610_end_mask_0 = const()[name = tensor("op_13610_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13610_cast_fp16 = slice_by_index(begin = var_13610_begin_0, end = var_13610_end_0, end_mask = var_13610_end_mask_0, x = v_61_cast_fp16)[name = tensor("op_13610_cast_fp16")]; tensor var_13614_begin_0 = const()[name = tensor("op_13614_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_13614_end_0 = const()[name = tensor("op_13614_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_13614_end_mask_0 = const()[name = tensor("op_13614_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13614_cast_fp16 = slice_by_index(begin = var_13614_begin_0, end = var_13614_end_0, end_mask = var_13614_end_mask_0, x = v_61_cast_fp16)[name = tensor("op_13614_cast_fp16")]; tensor var_13618_begin_0 = const()[name = tensor("op_13618_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_13618_end_0 = const()[name = tensor("op_13618_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_13618_end_mask_0 = const()[name = tensor("op_13618_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13618_cast_fp16 = slice_by_index(begin = var_13618_begin_0, end = var_13618_end_0, end_mask = var_13618_end_mask_0, x = v_61_cast_fp16)[name = tensor("op_13618_cast_fp16")]; tensor var_13622_begin_0 = const()[name = tensor("op_13622_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_13622_end_0 = const()[name = tensor("op_13622_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_13622_end_mask_0 = const()[name = tensor("op_13622_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13622_cast_fp16 = slice_by_index(begin = var_13622_begin_0, end = var_13622_end_0, end_mask = var_13622_end_mask_0, x = v_61_cast_fp16)[name = tensor("op_13622_cast_fp16")]; tensor var_13626_begin_0 = const()[name = tensor("op_13626_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_13626_end_0 = const()[name = tensor("op_13626_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_13626_end_mask_0 = const()[name = tensor("op_13626_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13626_cast_fp16 = slice_by_index(begin = var_13626_begin_0, end = var_13626_end_0, end_mask = var_13626_end_mask_0, x = v_61_cast_fp16)[name = tensor("op_13626_cast_fp16")]; tensor var_13630_equation_0 = const()[name = tensor("op_13630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13630_cast_fp16 = einsum(equation = var_13630_equation_0, values = (var_13568_cast_fp16, var_13500_cast_fp16))[name = tensor("op_13630_cast_fp16")]; tensor var_13631_to_fp16 = const()[name = tensor("op_13631_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1345_cast_fp16 = mul(x = var_13630_cast_fp16, y = var_13631_to_fp16)[name = tensor("aw_chunk_1345_cast_fp16")]; tensor var_13634_equation_0 = const()[name = tensor("op_13634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13634_cast_fp16 = einsum(equation = var_13634_equation_0, values = (var_13568_cast_fp16, var_13501_cast_fp16))[name = tensor("op_13634_cast_fp16")]; tensor var_13635_to_fp16 = const()[name = tensor("op_13635_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1347_cast_fp16 = mul(x = var_13634_cast_fp16, y = var_13635_to_fp16)[name = tensor("aw_chunk_1347_cast_fp16")]; tensor var_13638_equation_0 = const()[name = tensor("op_13638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13638_cast_fp16 = einsum(equation = var_13638_equation_0, values = (var_13568_cast_fp16, var_13502_cast_fp16))[name = tensor("op_13638_cast_fp16")]; tensor var_13639_to_fp16 = const()[name = tensor("op_13639_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1349_cast_fp16 = mul(x = var_13638_cast_fp16, y = var_13639_to_fp16)[name = tensor("aw_chunk_1349_cast_fp16")]; tensor var_13642_equation_0 = const()[name = tensor("op_13642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13642_cast_fp16 = einsum(equation = var_13642_equation_0, values = (var_13568_cast_fp16, var_13503_cast_fp16))[name = tensor("op_13642_cast_fp16")]; tensor var_13643_to_fp16 = const()[name = tensor("op_13643_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1351_cast_fp16 = mul(x = var_13642_cast_fp16, y = var_13643_to_fp16)[name = tensor("aw_chunk_1351_cast_fp16")]; tensor var_13646_equation_0 = const()[name = tensor("op_13646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13646_cast_fp16 = einsum(equation = var_13646_equation_0, values = (var_13568_cast_fp16, var_13504_cast_fp16))[name = tensor("op_13646_cast_fp16")]; tensor var_13647_to_fp16 = const()[name = tensor("op_13647_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1353_cast_fp16 = mul(x = var_13646_cast_fp16, y = var_13647_to_fp16)[name = tensor("aw_chunk_1353_cast_fp16")]; tensor var_13650_equation_0 = const()[name = tensor("op_13650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13650_cast_fp16 = einsum(equation = var_13650_equation_0, values = (var_13568_cast_fp16, var_13505_cast_fp16))[name = tensor("op_13650_cast_fp16")]; tensor var_13651_to_fp16 = const()[name = tensor("op_13651_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1355_cast_fp16 = mul(x = var_13650_cast_fp16, y = var_13651_to_fp16)[name = tensor("aw_chunk_1355_cast_fp16")]; tensor var_13654_equation_0 = const()[name = tensor("op_13654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13654_cast_fp16 = einsum(equation = var_13654_equation_0, values = (var_13568_cast_fp16, var_13506_cast_fp16))[name = tensor("op_13654_cast_fp16")]; tensor var_13655_to_fp16 = const()[name = tensor("op_13655_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1357_cast_fp16 = mul(x = var_13654_cast_fp16, y = var_13655_to_fp16)[name = tensor("aw_chunk_1357_cast_fp16")]; tensor var_13658_equation_0 = const()[name = tensor("op_13658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13658_cast_fp16 = einsum(equation = var_13658_equation_0, values = (var_13568_cast_fp16, var_13507_cast_fp16))[name = tensor("op_13658_cast_fp16")]; tensor var_13659_to_fp16 = const()[name = tensor("op_13659_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1359_cast_fp16 = mul(x = var_13658_cast_fp16, y = var_13659_to_fp16)[name = tensor("aw_chunk_1359_cast_fp16")]; tensor var_13662_equation_0 = const()[name = tensor("op_13662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13662_cast_fp16 = einsum(equation = var_13662_equation_0, values = (var_13572_cast_fp16, var_13508_cast_fp16))[name = tensor("op_13662_cast_fp16")]; tensor var_13663_to_fp16 = const()[name = tensor("op_13663_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1361_cast_fp16 = mul(x = var_13662_cast_fp16, y = var_13663_to_fp16)[name = tensor("aw_chunk_1361_cast_fp16")]; tensor var_13666_equation_0 = const()[name = tensor("op_13666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13666_cast_fp16 = einsum(equation = var_13666_equation_0, values = (var_13572_cast_fp16, var_13509_cast_fp16))[name = tensor("op_13666_cast_fp16")]; tensor var_13667_to_fp16 = const()[name = tensor("op_13667_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1363_cast_fp16 = mul(x = var_13666_cast_fp16, y = var_13667_to_fp16)[name = tensor("aw_chunk_1363_cast_fp16")]; tensor var_13670_equation_0 = const()[name = tensor("op_13670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13670_cast_fp16 = einsum(equation = var_13670_equation_0, values = (var_13572_cast_fp16, var_13510_cast_fp16))[name = tensor("op_13670_cast_fp16")]; tensor var_13671_to_fp16 = const()[name = tensor("op_13671_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1365_cast_fp16 = mul(x = var_13670_cast_fp16, y = var_13671_to_fp16)[name = tensor("aw_chunk_1365_cast_fp16")]; tensor var_13674_equation_0 = const()[name = tensor("op_13674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13674_cast_fp16 = einsum(equation = var_13674_equation_0, values = (var_13572_cast_fp16, var_13511_cast_fp16))[name = tensor("op_13674_cast_fp16")]; tensor var_13675_to_fp16 = const()[name = tensor("op_13675_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1367_cast_fp16 = mul(x = var_13674_cast_fp16, y = var_13675_to_fp16)[name = tensor("aw_chunk_1367_cast_fp16")]; tensor var_13678_equation_0 = const()[name = tensor("op_13678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13678_cast_fp16 = einsum(equation = var_13678_equation_0, values = (var_13572_cast_fp16, var_13512_cast_fp16))[name = tensor("op_13678_cast_fp16")]; tensor var_13679_to_fp16 = const()[name = tensor("op_13679_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1369_cast_fp16 = mul(x = var_13678_cast_fp16, y = var_13679_to_fp16)[name = tensor("aw_chunk_1369_cast_fp16")]; tensor var_13682_equation_0 = const()[name = tensor("op_13682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13682_cast_fp16 = einsum(equation = var_13682_equation_0, values = (var_13572_cast_fp16, var_13513_cast_fp16))[name = tensor("op_13682_cast_fp16")]; tensor var_13683_to_fp16 = const()[name = tensor("op_13683_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1371_cast_fp16 = mul(x = var_13682_cast_fp16, y = var_13683_to_fp16)[name = tensor("aw_chunk_1371_cast_fp16")]; tensor var_13686_equation_0 = const()[name = tensor("op_13686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13686_cast_fp16 = einsum(equation = var_13686_equation_0, values = (var_13572_cast_fp16, var_13514_cast_fp16))[name = tensor("op_13686_cast_fp16")]; tensor var_13687_to_fp16 = const()[name = tensor("op_13687_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1373_cast_fp16 = mul(x = var_13686_cast_fp16, y = var_13687_to_fp16)[name = tensor("aw_chunk_1373_cast_fp16")]; tensor var_13690_equation_0 = const()[name = tensor("op_13690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13690_cast_fp16 = einsum(equation = var_13690_equation_0, values = (var_13572_cast_fp16, var_13515_cast_fp16))[name = tensor("op_13690_cast_fp16")]; tensor var_13691_to_fp16 = const()[name = tensor("op_13691_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1375_cast_fp16 = mul(x = var_13690_cast_fp16, y = var_13691_to_fp16)[name = tensor("aw_chunk_1375_cast_fp16")]; tensor var_13694_equation_0 = const()[name = tensor("op_13694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13694_cast_fp16 = einsum(equation = var_13694_equation_0, values = (var_13576_cast_fp16, var_13516_cast_fp16))[name = tensor("op_13694_cast_fp16")]; tensor var_13695_to_fp16 = const()[name = tensor("op_13695_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1377_cast_fp16 = mul(x = var_13694_cast_fp16, y = var_13695_to_fp16)[name = tensor("aw_chunk_1377_cast_fp16")]; tensor var_13698_equation_0 = const()[name = tensor("op_13698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13698_cast_fp16 = einsum(equation = var_13698_equation_0, values = (var_13576_cast_fp16, var_13517_cast_fp16))[name = tensor("op_13698_cast_fp16")]; tensor var_13699_to_fp16 = const()[name = tensor("op_13699_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1379_cast_fp16 = mul(x = var_13698_cast_fp16, y = var_13699_to_fp16)[name = tensor("aw_chunk_1379_cast_fp16")]; tensor var_13702_equation_0 = const()[name = tensor("op_13702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13702_cast_fp16 = einsum(equation = var_13702_equation_0, values = (var_13576_cast_fp16, var_13518_cast_fp16))[name = tensor("op_13702_cast_fp16")]; tensor var_13703_to_fp16 = const()[name = tensor("op_13703_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1381_cast_fp16 = mul(x = var_13702_cast_fp16, y = var_13703_to_fp16)[name = tensor("aw_chunk_1381_cast_fp16")]; tensor var_13706_equation_0 = const()[name = tensor("op_13706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13706_cast_fp16 = einsum(equation = var_13706_equation_0, values = (var_13576_cast_fp16, var_13519_cast_fp16))[name = tensor("op_13706_cast_fp16")]; tensor var_13707_to_fp16 = const()[name = tensor("op_13707_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1383_cast_fp16 = mul(x = var_13706_cast_fp16, y = var_13707_to_fp16)[name = tensor("aw_chunk_1383_cast_fp16")]; tensor var_13710_equation_0 = const()[name = tensor("op_13710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13710_cast_fp16 = einsum(equation = var_13710_equation_0, values = (var_13576_cast_fp16, var_13520_cast_fp16))[name = tensor("op_13710_cast_fp16")]; tensor var_13711_to_fp16 = const()[name = tensor("op_13711_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1385_cast_fp16 = mul(x = var_13710_cast_fp16, y = var_13711_to_fp16)[name = tensor("aw_chunk_1385_cast_fp16")]; tensor var_13714_equation_0 = const()[name = tensor("op_13714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13714_cast_fp16 = einsum(equation = var_13714_equation_0, values = (var_13576_cast_fp16, var_13521_cast_fp16))[name = tensor("op_13714_cast_fp16")]; tensor var_13715_to_fp16 = const()[name = tensor("op_13715_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1387_cast_fp16 = mul(x = var_13714_cast_fp16, y = var_13715_to_fp16)[name = tensor("aw_chunk_1387_cast_fp16")]; tensor var_13718_equation_0 = const()[name = tensor("op_13718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13718_cast_fp16 = einsum(equation = var_13718_equation_0, values = (var_13576_cast_fp16, var_13522_cast_fp16))[name = tensor("op_13718_cast_fp16")]; tensor var_13719_to_fp16 = const()[name = tensor("op_13719_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1389_cast_fp16 = mul(x = var_13718_cast_fp16, y = var_13719_to_fp16)[name = tensor("aw_chunk_1389_cast_fp16")]; tensor var_13722_equation_0 = const()[name = tensor("op_13722_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13722_cast_fp16 = einsum(equation = var_13722_equation_0, values = (var_13576_cast_fp16, var_13523_cast_fp16))[name = tensor("op_13722_cast_fp16")]; tensor var_13723_to_fp16 = const()[name = tensor("op_13723_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1391_cast_fp16 = mul(x = var_13722_cast_fp16, y = var_13723_to_fp16)[name = tensor("aw_chunk_1391_cast_fp16")]; tensor var_13726_equation_0 = const()[name = tensor("op_13726_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13726_cast_fp16 = einsum(equation = var_13726_equation_0, values = (var_13580_cast_fp16, var_13524_cast_fp16))[name = tensor("op_13726_cast_fp16")]; tensor var_13727_to_fp16 = const()[name = tensor("op_13727_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1393_cast_fp16 = mul(x = var_13726_cast_fp16, y = var_13727_to_fp16)[name = tensor("aw_chunk_1393_cast_fp16")]; tensor var_13730_equation_0 = const()[name = tensor("op_13730_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13730_cast_fp16 = einsum(equation = var_13730_equation_0, values = (var_13580_cast_fp16, var_13525_cast_fp16))[name = tensor("op_13730_cast_fp16")]; tensor var_13731_to_fp16 = const()[name = tensor("op_13731_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1395_cast_fp16 = mul(x = var_13730_cast_fp16, y = var_13731_to_fp16)[name = tensor("aw_chunk_1395_cast_fp16")]; tensor var_13734_equation_0 = const()[name = tensor("op_13734_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13734_cast_fp16 = einsum(equation = var_13734_equation_0, values = (var_13580_cast_fp16, var_13526_cast_fp16))[name = tensor("op_13734_cast_fp16")]; tensor var_13735_to_fp16 = const()[name = tensor("op_13735_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1397_cast_fp16 = mul(x = var_13734_cast_fp16, y = var_13735_to_fp16)[name = tensor("aw_chunk_1397_cast_fp16")]; tensor var_13738_equation_0 = const()[name = tensor("op_13738_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13738_cast_fp16 = einsum(equation = var_13738_equation_0, values = (var_13580_cast_fp16, var_13527_cast_fp16))[name = tensor("op_13738_cast_fp16")]; tensor var_13739_to_fp16 = const()[name = tensor("op_13739_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1399_cast_fp16 = mul(x = var_13738_cast_fp16, y = var_13739_to_fp16)[name = tensor("aw_chunk_1399_cast_fp16")]; tensor var_13742_equation_0 = const()[name = tensor("op_13742_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13742_cast_fp16 = einsum(equation = var_13742_equation_0, values = (var_13580_cast_fp16, var_13528_cast_fp16))[name = tensor("op_13742_cast_fp16")]; tensor var_13743_to_fp16 = const()[name = tensor("op_13743_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1401_cast_fp16 = mul(x = var_13742_cast_fp16, y = var_13743_to_fp16)[name = tensor("aw_chunk_1401_cast_fp16")]; tensor var_13746_equation_0 = const()[name = tensor("op_13746_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13746_cast_fp16 = einsum(equation = var_13746_equation_0, values = (var_13580_cast_fp16, var_13529_cast_fp16))[name = tensor("op_13746_cast_fp16")]; tensor var_13747_to_fp16 = const()[name = tensor("op_13747_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1403_cast_fp16 = mul(x = var_13746_cast_fp16, y = var_13747_to_fp16)[name = tensor("aw_chunk_1403_cast_fp16")]; tensor var_13750_equation_0 = const()[name = tensor("op_13750_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13750_cast_fp16 = einsum(equation = var_13750_equation_0, values = (var_13580_cast_fp16, var_13530_cast_fp16))[name = tensor("op_13750_cast_fp16")]; tensor var_13751_to_fp16 = const()[name = tensor("op_13751_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1405_cast_fp16 = mul(x = var_13750_cast_fp16, y = var_13751_to_fp16)[name = tensor("aw_chunk_1405_cast_fp16")]; tensor var_13754_equation_0 = const()[name = tensor("op_13754_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13754_cast_fp16 = einsum(equation = var_13754_equation_0, values = (var_13580_cast_fp16, var_13531_cast_fp16))[name = tensor("op_13754_cast_fp16")]; tensor var_13755_to_fp16 = const()[name = tensor("op_13755_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1407_cast_fp16 = mul(x = var_13754_cast_fp16, y = var_13755_to_fp16)[name = tensor("aw_chunk_1407_cast_fp16")]; tensor var_13758_equation_0 = const()[name = tensor("op_13758_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13758_cast_fp16 = einsum(equation = var_13758_equation_0, values = (var_13584_cast_fp16, var_13532_cast_fp16))[name = tensor("op_13758_cast_fp16")]; tensor var_13759_to_fp16 = const()[name = tensor("op_13759_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1409_cast_fp16 = mul(x = var_13758_cast_fp16, y = var_13759_to_fp16)[name = tensor("aw_chunk_1409_cast_fp16")]; tensor var_13762_equation_0 = const()[name = tensor("op_13762_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13762_cast_fp16 = einsum(equation = var_13762_equation_0, values = (var_13584_cast_fp16, var_13533_cast_fp16))[name = tensor("op_13762_cast_fp16")]; tensor var_13763_to_fp16 = const()[name = tensor("op_13763_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1411_cast_fp16 = mul(x = var_13762_cast_fp16, y = var_13763_to_fp16)[name = tensor("aw_chunk_1411_cast_fp16")]; tensor var_13766_equation_0 = const()[name = tensor("op_13766_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13766_cast_fp16 = einsum(equation = var_13766_equation_0, values = (var_13584_cast_fp16, var_13534_cast_fp16))[name = tensor("op_13766_cast_fp16")]; tensor var_13767_to_fp16 = const()[name = tensor("op_13767_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1413_cast_fp16 = mul(x = var_13766_cast_fp16, y = var_13767_to_fp16)[name = tensor("aw_chunk_1413_cast_fp16")]; tensor var_13770_equation_0 = const()[name = tensor("op_13770_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13770_cast_fp16 = einsum(equation = var_13770_equation_0, values = (var_13584_cast_fp16, var_13535_cast_fp16))[name = tensor("op_13770_cast_fp16")]; tensor var_13771_to_fp16 = const()[name = tensor("op_13771_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1415_cast_fp16 = mul(x = var_13770_cast_fp16, y = var_13771_to_fp16)[name = tensor("aw_chunk_1415_cast_fp16")]; tensor var_13774_equation_0 = const()[name = tensor("op_13774_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13774_cast_fp16 = einsum(equation = var_13774_equation_0, values = (var_13584_cast_fp16, var_13536_cast_fp16))[name = tensor("op_13774_cast_fp16")]; tensor var_13775_to_fp16 = const()[name = tensor("op_13775_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1417_cast_fp16 = mul(x = var_13774_cast_fp16, y = var_13775_to_fp16)[name = tensor("aw_chunk_1417_cast_fp16")]; tensor var_13778_equation_0 = const()[name = tensor("op_13778_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13778_cast_fp16 = einsum(equation = var_13778_equation_0, values = (var_13584_cast_fp16, var_13537_cast_fp16))[name = tensor("op_13778_cast_fp16")]; tensor var_13779_to_fp16 = const()[name = tensor("op_13779_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1419_cast_fp16 = mul(x = var_13778_cast_fp16, y = var_13779_to_fp16)[name = tensor("aw_chunk_1419_cast_fp16")]; tensor var_13782_equation_0 = const()[name = tensor("op_13782_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13782_cast_fp16 = einsum(equation = var_13782_equation_0, values = (var_13584_cast_fp16, var_13538_cast_fp16))[name = tensor("op_13782_cast_fp16")]; tensor var_13783_to_fp16 = const()[name = tensor("op_13783_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1421_cast_fp16 = mul(x = var_13782_cast_fp16, y = var_13783_to_fp16)[name = tensor("aw_chunk_1421_cast_fp16")]; tensor var_13786_equation_0 = const()[name = tensor("op_13786_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13786_cast_fp16 = einsum(equation = var_13786_equation_0, values = (var_13584_cast_fp16, var_13539_cast_fp16))[name = tensor("op_13786_cast_fp16")]; tensor var_13787_to_fp16 = const()[name = tensor("op_13787_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1423_cast_fp16 = mul(x = var_13786_cast_fp16, y = var_13787_to_fp16)[name = tensor("aw_chunk_1423_cast_fp16")]; tensor var_13790_equation_0 = const()[name = tensor("op_13790_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13790_cast_fp16 = einsum(equation = var_13790_equation_0, values = (var_13588_cast_fp16, var_13540_cast_fp16))[name = tensor("op_13790_cast_fp16")]; tensor var_13791_to_fp16 = const()[name = tensor("op_13791_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1425_cast_fp16 = mul(x = var_13790_cast_fp16, y = var_13791_to_fp16)[name = tensor("aw_chunk_1425_cast_fp16")]; tensor var_13794_equation_0 = const()[name = tensor("op_13794_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13794_cast_fp16 = einsum(equation = var_13794_equation_0, values = (var_13588_cast_fp16, var_13541_cast_fp16))[name = tensor("op_13794_cast_fp16")]; tensor var_13795_to_fp16 = const()[name = tensor("op_13795_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1427_cast_fp16 = mul(x = var_13794_cast_fp16, y = var_13795_to_fp16)[name = tensor("aw_chunk_1427_cast_fp16")]; tensor var_13798_equation_0 = const()[name = tensor("op_13798_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13798_cast_fp16 = einsum(equation = var_13798_equation_0, values = (var_13588_cast_fp16, var_13542_cast_fp16))[name = tensor("op_13798_cast_fp16")]; tensor var_13799_to_fp16 = const()[name = tensor("op_13799_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1429_cast_fp16 = mul(x = var_13798_cast_fp16, y = var_13799_to_fp16)[name = tensor("aw_chunk_1429_cast_fp16")]; tensor var_13802_equation_0 = const()[name = tensor("op_13802_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13802_cast_fp16 = einsum(equation = var_13802_equation_0, values = (var_13588_cast_fp16, var_13543_cast_fp16))[name = tensor("op_13802_cast_fp16")]; tensor var_13803_to_fp16 = const()[name = tensor("op_13803_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1431_cast_fp16 = mul(x = var_13802_cast_fp16, y = var_13803_to_fp16)[name = tensor("aw_chunk_1431_cast_fp16")]; tensor var_13806_equation_0 = const()[name = tensor("op_13806_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13806_cast_fp16 = einsum(equation = var_13806_equation_0, values = (var_13588_cast_fp16, var_13544_cast_fp16))[name = tensor("op_13806_cast_fp16")]; tensor var_13807_to_fp16 = const()[name = tensor("op_13807_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1433_cast_fp16 = mul(x = var_13806_cast_fp16, y = var_13807_to_fp16)[name = tensor("aw_chunk_1433_cast_fp16")]; tensor var_13810_equation_0 = const()[name = tensor("op_13810_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13810_cast_fp16 = einsum(equation = var_13810_equation_0, values = (var_13588_cast_fp16, var_13545_cast_fp16))[name = tensor("op_13810_cast_fp16")]; tensor var_13811_to_fp16 = const()[name = tensor("op_13811_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1435_cast_fp16 = mul(x = var_13810_cast_fp16, y = var_13811_to_fp16)[name = tensor("aw_chunk_1435_cast_fp16")]; tensor var_13814_equation_0 = const()[name = tensor("op_13814_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13814_cast_fp16 = einsum(equation = var_13814_equation_0, values = (var_13588_cast_fp16, var_13546_cast_fp16))[name = tensor("op_13814_cast_fp16")]; tensor var_13815_to_fp16 = const()[name = tensor("op_13815_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1437_cast_fp16 = mul(x = var_13814_cast_fp16, y = var_13815_to_fp16)[name = tensor("aw_chunk_1437_cast_fp16")]; tensor var_13818_equation_0 = const()[name = tensor("op_13818_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13818_cast_fp16 = einsum(equation = var_13818_equation_0, values = (var_13588_cast_fp16, var_13547_cast_fp16))[name = tensor("op_13818_cast_fp16")]; tensor var_13819_to_fp16 = const()[name = tensor("op_13819_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1439_cast_fp16 = mul(x = var_13818_cast_fp16, y = var_13819_to_fp16)[name = tensor("aw_chunk_1439_cast_fp16")]; tensor var_13822_equation_0 = const()[name = tensor("op_13822_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13822_cast_fp16 = einsum(equation = var_13822_equation_0, values = (var_13592_cast_fp16, var_13548_cast_fp16))[name = tensor("op_13822_cast_fp16")]; tensor var_13823_to_fp16 = const()[name = tensor("op_13823_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1441_cast_fp16 = mul(x = var_13822_cast_fp16, y = var_13823_to_fp16)[name = tensor("aw_chunk_1441_cast_fp16")]; tensor var_13826_equation_0 = const()[name = tensor("op_13826_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13826_cast_fp16 = einsum(equation = var_13826_equation_0, values = (var_13592_cast_fp16, var_13549_cast_fp16))[name = tensor("op_13826_cast_fp16")]; tensor var_13827_to_fp16 = const()[name = tensor("op_13827_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1443_cast_fp16 = mul(x = var_13826_cast_fp16, y = var_13827_to_fp16)[name = tensor("aw_chunk_1443_cast_fp16")]; tensor var_13830_equation_0 = const()[name = tensor("op_13830_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13830_cast_fp16 = einsum(equation = var_13830_equation_0, values = (var_13592_cast_fp16, var_13550_cast_fp16))[name = tensor("op_13830_cast_fp16")]; tensor var_13831_to_fp16 = const()[name = tensor("op_13831_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1445_cast_fp16 = mul(x = var_13830_cast_fp16, y = var_13831_to_fp16)[name = tensor("aw_chunk_1445_cast_fp16")]; tensor var_13834_equation_0 = const()[name = tensor("op_13834_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13834_cast_fp16 = einsum(equation = var_13834_equation_0, values = (var_13592_cast_fp16, var_13551_cast_fp16))[name = tensor("op_13834_cast_fp16")]; tensor var_13835_to_fp16 = const()[name = tensor("op_13835_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1447_cast_fp16 = mul(x = var_13834_cast_fp16, y = var_13835_to_fp16)[name = tensor("aw_chunk_1447_cast_fp16")]; tensor var_13838_equation_0 = const()[name = tensor("op_13838_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13838_cast_fp16 = einsum(equation = var_13838_equation_0, values = (var_13592_cast_fp16, var_13552_cast_fp16))[name = tensor("op_13838_cast_fp16")]; tensor var_13839_to_fp16 = const()[name = tensor("op_13839_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1449_cast_fp16 = mul(x = var_13838_cast_fp16, y = var_13839_to_fp16)[name = tensor("aw_chunk_1449_cast_fp16")]; tensor var_13842_equation_0 = const()[name = tensor("op_13842_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13842_cast_fp16 = einsum(equation = var_13842_equation_0, values = (var_13592_cast_fp16, var_13553_cast_fp16))[name = tensor("op_13842_cast_fp16")]; tensor var_13843_to_fp16 = const()[name = tensor("op_13843_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1451_cast_fp16 = mul(x = var_13842_cast_fp16, y = var_13843_to_fp16)[name = tensor("aw_chunk_1451_cast_fp16")]; tensor var_13846_equation_0 = const()[name = tensor("op_13846_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13846_cast_fp16 = einsum(equation = var_13846_equation_0, values = (var_13592_cast_fp16, var_13554_cast_fp16))[name = tensor("op_13846_cast_fp16")]; tensor var_13847_to_fp16 = const()[name = tensor("op_13847_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1453_cast_fp16 = mul(x = var_13846_cast_fp16, y = var_13847_to_fp16)[name = tensor("aw_chunk_1453_cast_fp16")]; tensor var_13850_equation_0 = const()[name = tensor("op_13850_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13850_cast_fp16 = einsum(equation = var_13850_equation_0, values = (var_13592_cast_fp16, var_13555_cast_fp16))[name = tensor("op_13850_cast_fp16")]; tensor var_13851_to_fp16 = const()[name = tensor("op_13851_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1455_cast_fp16 = mul(x = var_13850_cast_fp16, y = var_13851_to_fp16)[name = tensor("aw_chunk_1455_cast_fp16")]; tensor var_13854_equation_0 = const()[name = tensor("op_13854_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13854_cast_fp16 = einsum(equation = var_13854_equation_0, values = (var_13596_cast_fp16, var_13556_cast_fp16))[name = tensor("op_13854_cast_fp16")]; tensor var_13855_to_fp16 = const()[name = tensor("op_13855_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1457_cast_fp16 = mul(x = var_13854_cast_fp16, y = var_13855_to_fp16)[name = tensor("aw_chunk_1457_cast_fp16")]; tensor var_13858_equation_0 = const()[name = tensor("op_13858_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13858_cast_fp16 = einsum(equation = var_13858_equation_0, values = (var_13596_cast_fp16, var_13557_cast_fp16))[name = tensor("op_13858_cast_fp16")]; tensor var_13859_to_fp16 = const()[name = tensor("op_13859_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1459_cast_fp16 = mul(x = var_13858_cast_fp16, y = var_13859_to_fp16)[name = tensor("aw_chunk_1459_cast_fp16")]; tensor var_13862_equation_0 = const()[name = tensor("op_13862_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13862_cast_fp16 = einsum(equation = var_13862_equation_0, values = (var_13596_cast_fp16, var_13558_cast_fp16))[name = tensor("op_13862_cast_fp16")]; tensor var_13863_to_fp16 = const()[name = tensor("op_13863_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1461_cast_fp16 = mul(x = var_13862_cast_fp16, y = var_13863_to_fp16)[name = tensor("aw_chunk_1461_cast_fp16")]; tensor var_13866_equation_0 = const()[name = tensor("op_13866_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13866_cast_fp16 = einsum(equation = var_13866_equation_0, values = (var_13596_cast_fp16, var_13559_cast_fp16))[name = tensor("op_13866_cast_fp16")]; tensor var_13867_to_fp16 = const()[name = tensor("op_13867_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1463_cast_fp16 = mul(x = var_13866_cast_fp16, y = var_13867_to_fp16)[name = tensor("aw_chunk_1463_cast_fp16")]; tensor var_13870_equation_0 = const()[name = tensor("op_13870_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13870_cast_fp16 = einsum(equation = var_13870_equation_0, values = (var_13596_cast_fp16, var_13560_cast_fp16))[name = tensor("op_13870_cast_fp16")]; tensor var_13871_to_fp16 = const()[name = tensor("op_13871_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1465_cast_fp16 = mul(x = var_13870_cast_fp16, y = var_13871_to_fp16)[name = tensor("aw_chunk_1465_cast_fp16")]; tensor var_13874_equation_0 = const()[name = tensor("op_13874_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13874_cast_fp16 = einsum(equation = var_13874_equation_0, values = (var_13596_cast_fp16, var_13561_cast_fp16))[name = tensor("op_13874_cast_fp16")]; tensor var_13875_to_fp16 = const()[name = tensor("op_13875_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1467_cast_fp16 = mul(x = var_13874_cast_fp16, y = var_13875_to_fp16)[name = tensor("aw_chunk_1467_cast_fp16")]; tensor var_13878_equation_0 = const()[name = tensor("op_13878_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13878_cast_fp16 = einsum(equation = var_13878_equation_0, values = (var_13596_cast_fp16, var_13562_cast_fp16))[name = tensor("op_13878_cast_fp16")]; tensor var_13879_to_fp16 = const()[name = tensor("op_13879_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1469_cast_fp16 = mul(x = var_13878_cast_fp16, y = var_13879_to_fp16)[name = tensor("aw_chunk_1469_cast_fp16")]; tensor var_13882_equation_0 = const()[name = tensor("op_13882_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_13882_cast_fp16 = einsum(equation = var_13882_equation_0, values = (var_13596_cast_fp16, var_13563_cast_fp16))[name = tensor("op_13882_cast_fp16")]; tensor var_13883_to_fp16 = const()[name = tensor("op_13883_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1471_cast_fp16 = mul(x = var_13882_cast_fp16, y = var_13883_to_fp16)[name = tensor("aw_chunk_1471_cast_fp16")]; tensor var_13885_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1345_cast_fp16)[name = tensor("op_13885_cast_fp16")]; tensor var_13886_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1347_cast_fp16)[name = tensor("op_13886_cast_fp16")]; tensor var_13887_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1349_cast_fp16)[name = tensor("op_13887_cast_fp16")]; tensor var_13888_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1351_cast_fp16)[name = tensor("op_13888_cast_fp16")]; tensor var_13889_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1353_cast_fp16)[name = tensor("op_13889_cast_fp16")]; tensor var_13890_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1355_cast_fp16)[name = tensor("op_13890_cast_fp16")]; tensor var_13891_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1357_cast_fp16)[name = tensor("op_13891_cast_fp16")]; tensor var_13892_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1359_cast_fp16)[name = tensor("op_13892_cast_fp16")]; tensor var_13893_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1361_cast_fp16)[name = tensor("op_13893_cast_fp16")]; tensor var_13894_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1363_cast_fp16)[name = tensor("op_13894_cast_fp16")]; tensor var_13895_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1365_cast_fp16)[name = tensor("op_13895_cast_fp16")]; tensor var_13896_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1367_cast_fp16)[name = tensor("op_13896_cast_fp16")]; tensor var_13897_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1369_cast_fp16)[name = tensor("op_13897_cast_fp16")]; tensor var_13898_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1371_cast_fp16)[name = tensor("op_13898_cast_fp16")]; tensor var_13899_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1373_cast_fp16)[name = tensor("op_13899_cast_fp16")]; tensor var_13900_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1375_cast_fp16)[name = tensor("op_13900_cast_fp16")]; tensor var_13901_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1377_cast_fp16)[name = tensor("op_13901_cast_fp16")]; tensor var_13902_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1379_cast_fp16)[name = tensor("op_13902_cast_fp16")]; tensor var_13903_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1381_cast_fp16)[name = tensor("op_13903_cast_fp16")]; tensor var_13904_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1383_cast_fp16)[name = tensor("op_13904_cast_fp16")]; tensor var_13905_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1385_cast_fp16)[name = tensor("op_13905_cast_fp16")]; tensor var_13906_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1387_cast_fp16)[name = tensor("op_13906_cast_fp16")]; tensor var_13907_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1389_cast_fp16)[name = tensor("op_13907_cast_fp16")]; tensor var_13908_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1391_cast_fp16)[name = tensor("op_13908_cast_fp16")]; tensor var_13909_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1393_cast_fp16)[name = tensor("op_13909_cast_fp16")]; tensor var_13910_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1395_cast_fp16)[name = tensor("op_13910_cast_fp16")]; tensor var_13911_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1397_cast_fp16)[name = tensor("op_13911_cast_fp16")]; tensor var_13912_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1399_cast_fp16)[name = tensor("op_13912_cast_fp16")]; tensor var_13913_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1401_cast_fp16)[name = tensor("op_13913_cast_fp16")]; tensor var_13914_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1403_cast_fp16)[name = tensor("op_13914_cast_fp16")]; tensor var_13915_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1405_cast_fp16)[name = tensor("op_13915_cast_fp16")]; tensor var_13916_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1407_cast_fp16)[name = tensor("op_13916_cast_fp16")]; tensor var_13917_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1409_cast_fp16)[name = tensor("op_13917_cast_fp16")]; tensor var_13918_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1411_cast_fp16)[name = tensor("op_13918_cast_fp16")]; tensor var_13919_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1413_cast_fp16)[name = tensor("op_13919_cast_fp16")]; tensor var_13920_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1415_cast_fp16)[name = tensor("op_13920_cast_fp16")]; tensor var_13921_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1417_cast_fp16)[name = tensor("op_13921_cast_fp16")]; tensor var_13922_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1419_cast_fp16)[name = tensor("op_13922_cast_fp16")]; tensor var_13923_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1421_cast_fp16)[name = tensor("op_13923_cast_fp16")]; tensor var_13924_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1423_cast_fp16)[name = tensor("op_13924_cast_fp16")]; tensor var_13925_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1425_cast_fp16)[name = tensor("op_13925_cast_fp16")]; tensor var_13926_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1427_cast_fp16)[name = tensor("op_13926_cast_fp16")]; tensor var_13927_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1429_cast_fp16)[name = tensor("op_13927_cast_fp16")]; tensor var_13928_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1431_cast_fp16)[name = tensor("op_13928_cast_fp16")]; tensor var_13929_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1433_cast_fp16)[name = tensor("op_13929_cast_fp16")]; tensor var_13930_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1435_cast_fp16)[name = tensor("op_13930_cast_fp16")]; tensor var_13931_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1437_cast_fp16)[name = tensor("op_13931_cast_fp16")]; tensor var_13932_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1439_cast_fp16)[name = tensor("op_13932_cast_fp16")]; tensor var_13933_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1441_cast_fp16)[name = tensor("op_13933_cast_fp16")]; tensor var_13934_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1443_cast_fp16)[name = tensor("op_13934_cast_fp16")]; tensor var_13935_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1445_cast_fp16)[name = tensor("op_13935_cast_fp16")]; tensor var_13936_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1447_cast_fp16)[name = tensor("op_13936_cast_fp16")]; tensor var_13937_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1449_cast_fp16)[name = tensor("op_13937_cast_fp16")]; tensor var_13938_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1451_cast_fp16)[name = tensor("op_13938_cast_fp16")]; tensor var_13939_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1453_cast_fp16)[name = tensor("op_13939_cast_fp16")]; tensor var_13940_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1455_cast_fp16)[name = tensor("op_13940_cast_fp16")]; tensor var_13941_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1457_cast_fp16)[name = tensor("op_13941_cast_fp16")]; tensor var_13942_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1459_cast_fp16)[name = tensor("op_13942_cast_fp16")]; tensor var_13943_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1461_cast_fp16)[name = tensor("op_13943_cast_fp16")]; tensor var_13944_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1463_cast_fp16)[name = tensor("op_13944_cast_fp16")]; tensor var_13945_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1465_cast_fp16)[name = tensor("op_13945_cast_fp16")]; tensor var_13946_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1467_cast_fp16)[name = tensor("op_13946_cast_fp16")]; tensor var_13947_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1469_cast_fp16)[name = tensor("op_13947_cast_fp16")]; tensor var_13948_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1471_cast_fp16)[name = tensor("op_13948_cast_fp16")]; tensor var_13950_equation_0 = const()[name = tensor("op_13950_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13950_cast_fp16 = einsum(equation = var_13950_equation_0, values = (var_13598_cast_fp16, var_13885_cast_fp16))[name = tensor("op_13950_cast_fp16")]; tensor var_13952_equation_0 = const()[name = tensor("op_13952_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13952_cast_fp16 = einsum(equation = var_13952_equation_0, values = (var_13598_cast_fp16, var_13886_cast_fp16))[name = tensor("op_13952_cast_fp16")]; tensor var_13954_equation_0 = const()[name = tensor("op_13954_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13954_cast_fp16 = einsum(equation = var_13954_equation_0, values = (var_13598_cast_fp16, var_13887_cast_fp16))[name = tensor("op_13954_cast_fp16")]; tensor var_13956_equation_0 = const()[name = tensor("op_13956_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13956_cast_fp16 = einsum(equation = var_13956_equation_0, values = (var_13598_cast_fp16, var_13888_cast_fp16))[name = tensor("op_13956_cast_fp16")]; tensor var_13958_equation_0 = const()[name = tensor("op_13958_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13958_cast_fp16 = einsum(equation = var_13958_equation_0, values = (var_13598_cast_fp16, var_13889_cast_fp16))[name = tensor("op_13958_cast_fp16")]; tensor var_13960_equation_0 = const()[name = tensor("op_13960_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13960_cast_fp16 = einsum(equation = var_13960_equation_0, values = (var_13598_cast_fp16, var_13890_cast_fp16))[name = tensor("op_13960_cast_fp16")]; tensor var_13962_equation_0 = const()[name = tensor("op_13962_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13962_cast_fp16 = einsum(equation = var_13962_equation_0, values = (var_13598_cast_fp16, var_13891_cast_fp16))[name = tensor("op_13962_cast_fp16")]; tensor var_13964_equation_0 = const()[name = tensor("op_13964_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13964_cast_fp16 = einsum(equation = var_13964_equation_0, values = (var_13598_cast_fp16, var_13892_cast_fp16))[name = tensor("op_13964_cast_fp16")]; tensor var_13966_equation_0 = const()[name = tensor("op_13966_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13966_cast_fp16 = einsum(equation = var_13966_equation_0, values = (var_13602_cast_fp16, var_13893_cast_fp16))[name = tensor("op_13966_cast_fp16")]; tensor var_13968_equation_0 = const()[name = tensor("op_13968_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13968_cast_fp16 = einsum(equation = var_13968_equation_0, values = (var_13602_cast_fp16, var_13894_cast_fp16))[name = tensor("op_13968_cast_fp16")]; tensor var_13970_equation_0 = const()[name = tensor("op_13970_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13970_cast_fp16 = einsum(equation = var_13970_equation_0, values = (var_13602_cast_fp16, var_13895_cast_fp16))[name = tensor("op_13970_cast_fp16")]; tensor var_13972_equation_0 = const()[name = tensor("op_13972_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13972_cast_fp16 = einsum(equation = var_13972_equation_0, values = (var_13602_cast_fp16, var_13896_cast_fp16))[name = tensor("op_13972_cast_fp16")]; tensor var_13974_equation_0 = const()[name = tensor("op_13974_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13974_cast_fp16 = einsum(equation = var_13974_equation_0, values = (var_13602_cast_fp16, var_13897_cast_fp16))[name = tensor("op_13974_cast_fp16")]; tensor var_13976_equation_0 = const()[name = tensor("op_13976_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13976_cast_fp16 = einsum(equation = var_13976_equation_0, values = (var_13602_cast_fp16, var_13898_cast_fp16))[name = tensor("op_13976_cast_fp16")]; tensor var_13978_equation_0 = const()[name = tensor("op_13978_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13978_cast_fp16 = einsum(equation = var_13978_equation_0, values = (var_13602_cast_fp16, var_13899_cast_fp16))[name = tensor("op_13978_cast_fp16")]; tensor var_13980_equation_0 = const()[name = tensor("op_13980_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13980_cast_fp16 = einsum(equation = var_13980_equation_0, values = (var_13602_cast_fp16, var_13900_cast_fp16))[name = tensor("op_13980_cast_fp16")]; tensor var_13982_equation_0 = const()[name = tensor("op_13982_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13982_cast_fp16 = einsum(equation = var_13982_equation_0, values = (var_13606_cast_fp16, var_13901_cast_fp16))[name = tensor("op_13982_cast_fp16")]; tensor var_13984_equation_0 = const()[name = tensor("op_13984_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13984_cast_fp16 = einsum(equation = var_13984_equation_0, values = (var_13606_cast_fp16, var_13902_cast_fp16))[name = tensor("op_13984_cast_fp16")]; tensor var_13986_equation_0 = const()[name = tensor("op_13986_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13986_cast_fp16 = einsum(equation = var_13986_equation_0, values = (var_13606_cast_fp16, var_13903_cast_fp16))[name = tensor("op_13986_cast_fp16")]; tensor var_13988_equation_0 = const()[name = tensor("op_13988_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13988_cast_fp16 = einsum(equation = var_13988_equation_0, values = (var_13606_cast_fp16, var_13904_cast_fp16))[name = tensor("op_13988_cast_fp16")]; tensor var_13990_equation_0 = const()[name = tensor("op_13990_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13990_cast_fp16 = einsum(equation = var_13990_equation_0, values = (var_13606_cast_fp16, var_13905_cast_fp16))[name = tensor("op_13990_cast_fp16")]; tensor var_13992_equation_0 = const()[name = tensor("op_13992_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13992_cast_fp16 = einsum(equation = var_13992_equation_0, values = (var_13606_cast_fp16, var_13906_cast_fp16))[name = tensor("op_13992_cast_fp16")]; tensor var_13994_equation_0 = const()[name = tensor("op_13994_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13994_cast_fp16 = einsum(equation = var_13994_equation_0, values = (var_13606_cast_fp16, var_13907_cast_fp16))[name = tensor("op_13994_cast_fp16")]; tensor var_13996_equation_0 = const()[name = tensor("op_13996_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13996_cast_fp16 = einsum(equation = var_13996_equation_0, values = (var_13606_cast_fp16, var_13908_cast_fp16))[name = tensor("op_13996_cast_fp16")]; tensor var_13998_equation_0 = const()[name = tensor("op_13998_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_13998_cast_fp16 = einsum(equation = var_13998_equation_0, values = (var_13610_cast_fp16, var_13909_cast_fp16))[name = tensor("op_13998_cast_fp16")]; tensor var_14000_equation_0 = const()[name = tensor("op_14000_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14000_cast_fp16 = einsum(equation = var_14000_equation_0, values = (var_13610_cast_fp16, var_13910_cast_fp16))[name = tensor("op_14000_cast_fp16")]; tensor var_14002_equation_0 = const()[name = tensor("op_14002_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14002_cast_fp16 = einsum(equation = var_14002_equation_0, values = (var_13610_cast_fp16, var_13911_cast_fp16))[name = tensor("op_14002_cast_fp16")]; tensor var_14004_equation_0 = const()[name = tensor("op_14004_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14004_cast_fp16 = einsum(equation = var_14004_equation_0, values = (var_13610_cast_fp16, var_13912_cast_fp16))[name = tensor("op_14004_cast_fp16")]; tensor var_14006_equation_0 = const()[name = tensor("op_14006_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14006_cast_fp16 = einsum(equation = var_14006_equation_0, values = (var_13610_cast_fp16, var_13913_cast_fp16))[name = tensor("op_14006_cast_fp16")]; tensor var_14008_equation_0 = const()[name = tensor("op_14008_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14008_cast_fp16 = einsum(equation = var_14008_equation_0, values = (var_13610_cast_fp16, var_13914_cast_fp16))[name = tensor("op_14008_cast_fp16")]; tensor var_14010_equation_0 = const()[name = tensor("op_14010_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14010_cast_fp16 = einsum(equation = var_14010_equation_0, values = (var_13610_cast_fp16, var_13915_cast_fp16))[name = tensor("op_14010_cast_fp16")]; tensor var_14012_equation_0 = const()[name = tensor("op_14012_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14012_cast_fp16 = einsum(equation = var_14012_equation_0, values = (var_13610_cast_fp16, var_13916_cast_fp16))[name = tensor("op_14012_cast_fp16")]; tensor var_14014_equation_0 = const()[name = tensor("op_14014_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14014_cast_fp16 = einsum(equation = var_14014_equation_0, values = (var_13614_cast_fp16, var_13917_cast_fp16))[name = tensor("op_14014_cast_fp16")]; tensor var_14016_equation_0 = const()[name = tensor("op_14016_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14016_cast_fp16 = einsum(equation = var_14016_equation_0, values = (var_13614_cast_fp16, var_13918_cast_fp16))[name = tensor("op_14016_cast_fp16")]; tensor var_14018_equation_0 = const()[name = tensor("op_14018_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14018_cast_fp16 = einsum(equation = var_14018_equation_0, values = (var_13614_cast_fp16, var_13919_cast_fp16))[name = tensor("op_14018_cast_fp16")]; tensor var_14020_equation_0 = const()[name = tensor("op_14020_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14020_cast_fp16 = einsum(equation = var_14020_equation_0, values = (var_13614_cast_fp16, var_13920_cast_fp16))[name = tensor("op_14020_cast_fp16")]; tensor var_14022_equation_0 = const()[name = tensor("op_14022_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14022_cast_fp16 = einsum(equation = var_14022_equation_0, values = (var_13614_cast_fp16, var_13921_cast_fp16))[name = tensor("op_14022_cast_fp16")]; tensor var_14024_equation_0 = const()[name = tensor("op_14024_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14024_cast_fp16 = einsum(equation = var_14024_equation_0, values = (var_13614_cast_fp16, var_13922_cast_fp16))[name = tensor("op_14024_cast_fp16")]; tensor var_14026_equation_0 = const()[name = tensor("op_14026_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14026_cast_fp16 = einsum(equation = var_14026_equation_0, values = (var_13614_cast_fp16, var_13923_cast_fp16))[name = tensor("op_14026_cast_fp16")]; tensor var_14028_equation_0 = const()[name = tensor("op_14028_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14028_cast_fp16 = einsum(equation = var_14028_equation_0, values = (var_13614_cast_fp16, var_13924_cast_fp16))[name = tensor("op_14028_cast_fp16")]; tensor var_14030_equation_0 = const()[name = tensor("op_14030_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14030_cast_fp16 = einsum(equation = var_14030_equation_0, values = (var_13618_cast_fp16, var_13925_cast_fp16))[name = tensor("op_14030_cast_fp16")]; tensor var_14032_equation_0 = const()[name = tensor("op_14032_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14032_cast_fp16 = einsum(equation = var_14032_equation_0, values = (var_13618_cast_fp16, var_13926_cast_fp16))[name = tensor("op_14032_cast_fp16")]; tensor var_14034_equation_0 = const()[name = tensor("op_14034_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14034_cast_fp16 = einsum(equation = var_14034_equation_0, values = (var_13618_cast_fp16, var_13927_cast_fp16))[name = tensor("op_14034_cast_fp16")]; tensor var_14036_equation_0 = const()[name = tensor("op_14036_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14036_cast_fp16 = einsum(equation = var_14036_equation_0, values = (var_13618_cast_fp16, var_13928_cast_fp16))[name = tensor("op_14036_cast_fp16")]; tensor var_14038_equation_0 = const()[name = tensor("op_14038_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14038_cast_fp16 = einsum(equation = var_14038_equation_0, values = (var_13618_cast_fp16, var_13929_cast_fp16))[name = tensor("op_14038_cast_fp16")]; tensor var_14040_equation_0 = const()[name = tensor("op_14040_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14040_cast_fp16 = einsum(equation = var_14040_equation_0, values = (var_13618_cast_fp16, var_13930_cast_fp16))[name = tensor("op_14040_cast_fp16")]; tensor var_14042_equation_0 = const()[name = tensor("op_14042_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14042_cast_fp16 = einsum(equation = var_14042_equation_0, values = (var_13618_cast_fp16, var_13931_cast_fp16))[name = tensor("op_14042_cast_fp16")]; tensor var_14044_equation_0 = const()[name = tensor("op_14044_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14044_cast_fp16 = einsum(equation = var_14044_equation_0, values = (var_13618_cast_fp16, var_13932_cast_fp16))[name = tensor("op_14044_cast_fp16")]; tensor var_14046_equation_0 = const()[name = tensor("op_14046_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14046_cast_fp16 = einsum(equation = var_14046_equation_0, values = (var_13622_cast_fp16, var_13933_cast_fp16))[name = tensor("op_14046_cast_fp16")]; tensor var_14048_equation_0 = const()[name = tensor("op_14048_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14048_cast_fp16 = einsum(equation = var_14048_equation_0, values = (var_13622_cast_fp16, var_13934_cast_fp16))[name = tensor("op_14048_cast_fp16")]; tensor var_14050_equation_0 = const()[name = tensor("op_14050_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14050_cast_fp16 = einsum(equation = var_14050_equation_0, values = (var_13622_cast_fp16, var_13935_cast_fp16))[name = tensor("op_14050_cast_fp16")]; tensor var_14052_equation_0 = const()[name = tensor("op_14052_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14052_cast_fp16 = einsum(equation = var_14052_equation_0, values = (var_13622_cast_fp16, var_13936_cast_fp16))[name = tensor("op_14052_cast_fp16")]; tensor var_14054_equation_0 = const()[name = tensor("op_14054_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14054_cast_fp16 = einsum(equation = var_14054_equation_0, values = (var_13622_cast_fp16, var_13937_cast_fp16))[name = tensor("op_14054_cast_fp16")]; tensor var_14056_equation_0 = const()[name = tensor("op_14056_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14056_cast_fp16 = einsum(equation = var_14056_equation_0, values = (var_13622_cast_fp16, var_13938_cast_fp16))[name = tensor("op_14056_cast_fp16")]; tensor var_14058_equation_0 = const()[name = tensor("op_14058_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14058_cast_fp16 = einsum(equation = var_14058_equation_0, values = (var_13622_cast_fp16, var_13939_cast_fp16))[name = tensor("op_14058_cast_fp16")]; tensor var_14060_equation_0 = const()[name = tensor("op_14060_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14060_cast_fp16 = einsum(equation = var_14060_equation_0, values = (var_13622_cast_fp16, var_13940_cast_fp16))[name = tensor("op_14060_cast_fp16")]; tensor var_14062_equation_0 = const()[name = tensor("op_14062_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14062_cast_fp16 = einsum(equation = var_14062_equation_0, values = (var_13626_cast_fp16, var_13941_cast_fp16))[name = tensor("op_14062_cast_fp16")]; tensor var_14064_equation_0 = const()[name = tensor("op_14064_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14064_cast_fp16 = einsum(equation = var_14064_equation_0, values = (var_13626_cast_fp16, var_13942_cast_fp16))[name = tensor("op_14064_cast_fp16")]; tensor var_14066_equation_0 = const()[name = tensor("op_14066_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14066_cast_fp16 = einsum(equation = var_14066_equation_0, values = (var_13626_cast_fp16, var_13943_cast_fp16))[name = tensor("op_14066_cast_fp16")]; tensor var_14068_equation_0 = const()[name = tensor("op_14068_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14068_cast_fp16 = einsum(equation = var_14068_equation_0, values = (var_13626_cast_fp16, var_13944_cast_fp16))[name = tensor("op_14068_cast_fp16")]; tensor var_14070_equation_0 = const()[name = tensor("op_14070_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14070_cast_fp16 = einsum(equation = var_14070_equation_0, values = (var_13626_cast_fp16, var_13945_cast_fp16))[name = tensor("op_14070_cast_fp16")]; tensor var_14072_equation_0 = const()[name = tensor("op_14072_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14072_cast_fp16 = einsum(equation = var_14072_equation_0, values = (var_13626_cast_fp16, var_13946_cast_fp16))[name = tensor("op_14072_cast_fp16")]; tensor var_14074_equation_0 = const()[name = tensor("op_14074_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14074_cast_fp16 = einsum(equation = var_14074_equation_0, values = (var_13626_cast_fp16, var_13947_cast_fp16))[name = tensor("op_14074_cast_fp16")]; tensor var_14076_equation_0 = const()[name = tensor("op_14076_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14076_cast_fp16 = einsum(equation = var_14076_equation_0, values = (var_13626_cast_fp16, var_13948_cast_fp16))[name = tensor("op_14076_cast_fp16")]; tensor var_14078_interleave_0 = const()[name = tensor("op_14078_interleave_0"), val = tensor(false)]; tensor var_14078_cast_fp16 = concat(axis = var_10347, interleave = var_14078_interleave_0, values = (var_13950_cast_fp16, var_13952_cast_fp16, var_13954_cast_fp16, var_13956_cast_fp16, var_13958_cast_fp16, var_13960_cast_fp16, var_13962_cast_fp16, var_13964_cast_fp16))[name = tensor("op_14078_cast_fp16")]; tensor var_14080_interleave_0 = const()[name = tensor("op_14080_interleave_0"), val = tensor(false)]; tensor var_14080_cast_fp16 = concat(axis = var_10347, interleave = var_14080_interleave_0, values = (var_13966_cast_fp16, var_13968_cast_fp16, var_13970_cast_fp16, var_13972_cast_fp16, var_13974_cast_fp16, var_13976_cast_fp16, var_13978_cast_fp16, var_13980_cast_fp16))[name = tensor("op_14080_cast_fp16")]; tensor var_14082_interleave_0 = const()[name = tensor("op_14082_interleave_0"), val = tensor(false)]; tensor var_14082_cast_fp16 = concat(axis = var_10347, interleave = var_14082_interleave_0, values = (var_13982_cast_fp16, var_13984_cast_fp16, var_13986_cast_fp16, var_13988_cast_fp16, var_13990_cast_fp16, var_13992_cast_fp16, var_13994_cast_fp16, var_13996_cast_fp16))[name = tensor("op_14082_cast_fp16")]; tensor var_14084_interleave_0 = const()[name = tensor("op_14084_interleave_0"), val = tensor(false)]; tensor var_14084_cast_fp16 = concat(axis = var_10347, interleave = var_14084_interleave_0, values = (var_13998_cast_fp16, var_14000_cast_fp16, var_14002_cast_fp16, var_14004_cast_fp16, var_14006_cast_fp16, var_14008_cast_fp16, var_14010_cast_fp16, var_14012_cast_fp16))[name = tensor("op_14084_cast_fp16")]; tensor var_14086_interleave_0 = const()[name = tensor("op_14086_interleave_0"), val = tensor(false)]; tensor var_14086_cast_fp16 = concat(axis = var_10347, interleave = var_14086_interleave_0, values = (var_14014_cast_fp16, var_14016_cast_fp16, var_14018_cast_fp16, var_14020_cast_fp16, var_14022_cast_fp16, var_14024_cast_fp16, var_14026_cast_fp16, var_14028_cast_fp16))[name = tensor("op_14086_cast_fp16")]; tensor var_14088_interleave_0 = const()[name = tensor("op_14088_interleave_0"), val = tensor(false)]; tensor var_14088_cast_fp16 = concat(axis = var_10347, interleave = var_14088_interleave_0, values = (var_14030_cast_fp16, var_14032_cast_fp16, var_14034_cast_fp16, var_14036_cast_fp16, var_14038_cast_fp16, var_14040_cast_fp16, var_14042_cast_fp16, var_14044_cast_fp16))[name = tensor("op_14088_cast_fp16")]; tensor var_14090_interleave_0 = const()[name = tensor("op_14090_interleave_0"), val = tensor(false)]; tensor var_14090_cast_fp16 = concat(axis = var_10347, interleave = var_14090_interleave_0, values = (var_14046_cast_fp16, var_14048_cast_fp16, var_14050_cast_fp16, var_14052_cast_fp16, var_14054_cast_fp16, var_14056_cast_fp16, var_14058_cast_fp16, var_14060_cast_fp16))[name = tensor("op_14090_cast_fp16")]; tensor var_14092_interleave_0 = const()[name = tensor("op_14092_interleave_0"), val = tensor(false)]; tensor var_14092_cast_fp16 = concat(axis = var_10347, interleave = var_14092_interleave_0, values = (var_14062_cast_fp16, var_14064_cast_fp16, var_14066_cast_fp16, var_14068_cast_fp16, var_14070_cast_fp16, var_14072_cast_fp16, var_14074_cast_fp16, var_14076_cast_fp16))[name = tensor("op_14092_cast_fp16")]; tensor input_517_interleave_0 = const()[name = tensor("input_517_interleave_0"), val = tensor(false)]; tensor input_517_cast_fp16 = concat(axis = var_10375, interleave = input_517_interleave_0, values = (var_14078_cast_fp16, var_14080_cast_fp16, var_14082_cast_fp16, var_14084_cast_fp16, var_14086_cast_fp16, var_14088_cast_fp16, var_14090_cast_fp16, var_14092_cast_fp16))[name = tensor("input_517_cast_fp16")]; tensor var_14098 = const()[name = tensor("op_14098"), val = tensor([1, 1])]; tensor var_14100 = const()[name = tensor("op_14100"), val = tensor([1, 1])]; tensor var_14102_pad_type_0 = const()[name = tensor("op_14102_pad_type_0"), val = tensor("custom")]; tensor var_14102_pad_0 = const()[name = tensor("op_14102_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1714822016)))]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1715026880)))]; tensor var_14102_cast_fp16 = conv(bias = up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_out_0_bias_to_fp16, dilations = var_14100, groups = var_10375, pad = var_14102_pad_0, pad_type = var_14102_pad_type_0, strides = var_14098, weight = up_blocks_3_attentions_2_transformer_blocks_0_attn1_to_out_0_weight_to_fp16, x = input_517_cast_fp16)[name = tensor("op_14102_cast_fp16")]; tensor inputs_93_cast_fp16 = add(x = var_14102_cast_fp16, y = inputs_91_cast_fp16)[name = tensor("inputs_93_cast_fp16")]; tensor hidden_states_329_axes_0 = const()[name = tensor("hidden_states_329_axes_0"), val = tensor([1])]; tensor hidden_states_329_gamma_0_to_fp16 = const()[name = tensor("hidden_states_329_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1715027584)))]; tensor hidden_states_329_beta_0_to_fp16 = const()[name = tensor("hidden_states_329_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1715028288)))]; tensor var_14112_to_fp16 = const()[name = tensor("op_14112_to_fp16"), val = tensor(0x1.5p-17)]; tensor hidden_states_329_cast_fp16 = layer_norm(axes = hidden_states_329_axes_0, beta = hidden_states_329_beta_0_to_fp16, epsilon = var_14112_to_fp16, gamma = hidden_states_329_gamma_0_to_fp16, x = inputs_93_cast_fp16)[name = tensor("hidden_states_329_cast_fp16")]; tensor var_14127 = const()[name = tensor("op_14127"), val = tensor([1, 1])]; tensor var_14129 = const()[name = tensor("op_14129"), val = tensor([1, 1])]; tensor q_pad_type_0 = const()[name = tensor("q_pad_type_0"), val = tensor("custom")]; tensor q_pad_0 = const()[name = tensor("q_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1715028992)))]; tensor q_cast_fp16 = conv(dilations = var_14129, groups = var_10375, pad = q_pad_0, pad_type = q_pad_type_0, strides = var_14127, weight = up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_q_weight_to_fp16, x = hidden_states_329_cast_fp16)[name = tensor("q_cast_fp16")]; tensor var_14133 = const()[name = tensor("op_14133"), val = tensor([1, 1])]; tensor var_14135 = const()[name = tensor("op_14135"), val = tensor([1, 1])]; tensor k_125_pad_type_0 = const()[name = tensor("k_125_pad_type_0"), val = tensor("custom")]; tensor k_125_pad_0 = const()[name = tensor("k_125_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1715233856)))]; tensor k_125_cast_fp16 = conv(dilations = var_14135, groups = var_10375, pad = k_125_pad_0, pad_type = k_125_pad_type_0, strides = var_14133, weight = up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_k_weight_to_fp16, x = encoder_hidden_states)[name = tensor("k_125_cast_fp16")]; tensor var_14139 = const()[name = tensor("op_14139"), val = tensor([1, 1])]; tensor var_14141 = const()[name = tensor("op_14141"), val = tensor([1, 1])]; tensor v_pad_type_0 = const()[name = tensor("v_pad_type_0"), val = tensor("custom")]; tensor v_pad_0 = const()[name = tensor("v_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1715725440)))]; tensor v_cast_fp16 = conv(dilations = var_14141, groups = var_10375, pad = v_pad_0, pad_type = v_pad_type_0, strides = var_14139, weight = up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_v_weight_to_fp16, x = encoder_hidden_states)[name = tensor("v_cast_fp16")]; tensor var_14145_begin_0 = const()[name = tensor("op_14145_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14145_end_0 = const()[name = tensor("op_14145_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14145_end_mask_0 = const()[name = tensor("op_14145_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14145_cast_fp16 = slice_by_index(begin = var_14145_begin_0, end = var_14145_end_0, end_mask = var_14145_end_mask_0, x = q_cast_fp16)[name = tensor("op_14145_cast_fp16")]; tensor var_14149_begin_0 = const()[name = tensor("op_14149_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_14149_end_0 = const()[name = tensor("op_14149_end_0"), val = tensor([2, 80, 1, 4096])]; tensor var_14149_end_mask_0 = const()[name = tensor("op_14149_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14149_cast_fp16 = slice_by_index(begin = var_14149_begin_0, end = var_14149_end_0, end_mask = var_14149_end_mask_0, x = q_cast_fp16)[name = tensor("op_14149_cast_fp16")]; tensor var_14153_begin_0 = const()[name = tensor("op_14153_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_14153_end_0 = const()[name = tensor("op_14153_end_0"), val = tensor([2, 120, 1, 4096])]; tensor var_14153_end_mask_0 = const()[name = tensor("op_14153_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14153_cast_fp16 = slice_by_index(begin = var_14153_begin_0, end = var_14153_end_0, end_mask = var_14153_end_mask_0, x = q_cast_fp16)[name = tensor("op_14153_cast_fp16")]; tensor var_14157_begin_0 = const()[name = tensor("op_14157_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_14157_end_0 = const()[name = tensor("op_14157_end_0"), val = tensor([2, 160, 1, 4096])]; tensor var_14157_end_mask_0 = const()[name = tensor("op_14157_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14157_cast_fp16 = slice_by_index(begin = var_14157_begin_0, end = var_14157_end_0, end_mask = var_14157_end_mask_0, x = q_cast_fp16)[name = tensor("op_14157_cast_fp16")]; tensor var_14161_begin_0 = const()[name = tensor("op_14161_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_14161_end_0 = const()[name = tensor("op_14161_end_0"), val = tensor([2, 200, 1, 4096])]; tensor var_14161_end_mask_0 = const()[name = tensor("op_14161_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14161_cast_fp16 = slice_by_index(begin = var_14161_begin_0, end = var_14161_end_0, end_mask = var_14161_end_mask_0, x = q_cast_fp16)[name = tensor("op_14161_cast_fp16")]; tensor var_14165_begin_0 = const()[name = tensor("op_14165_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_14165_end_0 = const()[name = tensor("op_14165_end_0"), val = tensor([2, 240, 1, 4096])]; tensor var_14165_end_mask_0 = const()[name = tensor("op_14165_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14165_cast_fp16 = slice_by_index(begin = var_14165_begin_0, end = var_14165_end_0, end_mask = var_14165_end_mask_0, x = q_cast_fp16)[name = tensor("op_14165_cast_fp16")]; tensor var_14169_begin_0 = const()[name = tensor("op_14169_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_14169_end_0 = const()[name = tensor("op_14169_end_0"), val = tensor([2, 280, 1, 4096])]; tensor var_14169_end_mask_0 = const()[name = tensor("op_14169_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14169_cast_fp16 = slice_by_index(begin = var_14169_begin_0, end = var_14169_end_0, end_mask = var_14169_end_mask_0, x = q_cast_fp16)[name = tensor("op_14169_cast_fp16")]; tensor var_14173_begin_0 = const()[name = tensor("op_14173_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_14173_end_0 = const()[name = tensor("op_14173_end_0"), val = tensor([2, 320, 1, 4096])]; tensor var_14173_end_mask_0 = const()[name = tensor("op_14173_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14173_cast_fp16 = slice_by_index(begin = var_14173_begin_0, end = var_14173_end_0, end_mask = var_14173_end_mask_0, x = q_cast_fp16)[name = tensor("op_14173_cast_fp16")]; tensor var_14176_begin_0 = const()[name = tensor("op_14176_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14176_end_0 = const()[name = tensor("op_14176_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_14176_end_mask_0 = const()[name = tensor("op_14176_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14176_cast_fp16 = slice_by_index(begin = var_14176_begin_0, end = var_14176_end_0, end_mask = var_14176_end_mask_0, x = var_14145_cast_fp16)[name = tensor("op_14176_cast_fp16")]; tensor var_14177_begin_0 = const()[name = tensor("op_14177_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_14177_end_0 = const()[name = tensor("op_14177_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_14177_end_mask_0 = const()[name = tensor("op_14177_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14177_cast_fp16 = slice_by_index(begin = var_14177_begin_0, end = var_14177_end_0, end_mask = var_14177_end_mask_0, x = var_14145_cast_fp16)[name = tensor("op_14177_cast_fp16")]; tensor var_14178_begin_0 = const()[name = tensor("op_14178_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_14178_end_0 = const()[name = tensor("op_14178_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_14178_end_mask_0 = const()[name = tensor("op_14178_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14178_cast_fp16 = slice_by_index(begin = var_14178_begin_0, end = var_14178_end_0, end_mask = var_14178_end_mask_0, x = var_14145_cast_fp16)[name = tensor("op_14178_cast_fp16")]; tensor var_14179_begin_0 = const()[name = tensor("op_14179_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_14179_end_0 = const()[name = tensor("op_14179_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_14179_end_mask_0 = const()[name = tensor("op_14179_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14179_cast_fp16 = slice_by_index(begin = var_14179_begin_0, end = var_14179_end_0, end_mask = var_14179_end_mask_0, x = var_14145_cast_fp16)[name = tensor("op_14179_cast_fp16")]; tensor var_14180_begin_0 = const()[name = tensor("op_14180_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_14180_end_0 = const()[name = tensor("op_14180_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_14180_end_mask_0 = const()[name = tensor("op_14180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14180_cast_fp16 = slice_by_index(begin = var_14180_begin_0, end = var_14180_end_0, end_mask = var_14180_end_mask_0, x = var_14145_cast_fp16)[name = tensor("op_14180_cast_fp16")]; tensor var_14181_begin_0 = const()[name = tensor("op_14181_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_14181_end_0 = const()[name = tensor("op_14181_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_14181_end_mask_0 = const()[name = tensor("op_14181_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14181_cast_fp16 = slice_by_index(begin = var_14181_begin_0, end = var_14181_end_0, end_mask = var_14181_end_mask_0, x = var_14145_cast_fp16)[name = tensor("op_14181_cast_fp16")]; tensor var_14182_begin_0 = const()[name = tensor("op_14182_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_14182_end_0 = const()[name = tensor("op_14182_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_14182_end_mask_0 = const()[name = tensor("op_14182_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14182_cast_fp16 = slice_by_index(begin = var_14182_begin_0, end = var_14182_end_0, end_mask = var_14182_end_mask_0, x = var_14145_cast_fp16)[name = tensor("op_14182_cast_fp16")]; tensor var_14183_begin_0 = const()[name = tensor("op_14183_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_14183_end_0 = const()[name = tensor("op_14183_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14183_end_mask_0 = const()[name = tensor("op_14183_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14183_cast_fp16 = slice_by_index(begin = var_14183_begin_0, end = var_14183_end_0, end_mask = var_14183_end_mask_0, x = var_14145_cast_fp16)[name = tensor("op_14183_cast_fp16")]; tensor var_14184_begin_0 = const()[name = tensor("op_14184_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14184_end_0 = const()[name = tensor("op_14184_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_14184_end_mask_0 = const()[name = tensor("op_14184_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14184_cast_fp16 = slice_by_index(begin = var_14184_begin_0, end = var_14184_end_0, end_mask = var_14184_end_mask_0, x = var_14149_cast_fp16)[name = tensor("op_14184_cast_fp16")]; tensor var_14185_begin_0 = const()[name = tensor("op_14185_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_14185_end_0 = const()[name = tensor("op_14185_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_14185_end_mask_0 = const()[name = tensor("op_14185_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14185_cast_fp16 = slice_by_index(begin = var_14185_begin_0, end = var_14185_end_0, end_mask = var_14185_end_mask_0, x = var_14149_cast_fp16)[name = tensor("op_14185_cast_fp16")]; tensor var_14186_begin_0 = const()[name = tensor("op_14186_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_14186_end_0 = const()[name = tensor("op_14186_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_14186_end_mask_0 = const()[name = tensor("op_14186_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14186_cast_fp16 = slice_by_index(begin = var_14186_begin_0, end = var_14186_end_0, end_mask = var_14186_end_mask_0, x = var_14149_cast_fp16)[name = tensor("op_14186_cast_fp16")]; tensor var_14187_begin_0 = const()[name = tensor("op_14187_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_14187_end_0 = const()[name = tensor("op_14187_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_14187_end_mask_0 = const()[name = tensor("op_14187_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14187_cast_fp16 = slice_by_index(begin = var_14187_begin_0, end = var_14187_end_0, end_mask = var_14187_end_mask_0, x = var_14149_cast_fp16)[name = tensor("op_14187_cast_fp16")]; tensor var_14188_begin_0 = const()[name = tensor("op_14188_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_14188_end_0 = const()[name = tensor("op_14188_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_14188_end_mask_0 = const()[name = tensor("op_14188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14188_cast_fp16 = slice_by_index(begin = var_14188_begin_0, end = var_14188_end_0, end_mask = var_14188_end_mask_0, x = var_14149_cast_fp16)[name = tensor("op_14188_cast_fp16")]; tensor var_14189_begin_0 = const()[name = tensor("op_14189_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_14189_end_0 = const()[name = tensor("op_14189_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_14189_end_mask_0 = const()[name = tensor("op_14189_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14189_cast_fp16 = slice_by_index(begin = var_14189_begin_0, end = var_14189_end_0, end_mask = var_14189_end_mask_0, x = var_14149_cast_fp16)[name = tensor("op_14189_cast_fp16")]; tensor var_14190_begin_0 = const()[name = tensor("op_14190_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_14190_end_0 = const()[name = tensor("op_14190_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_14190_end_mask_0 = const()[name = tensor("op_14190_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14190_cast_fp16 = slice_by_index(begin = var_14190_begin_0, end = var_14190_end_0, end_mask = var_14190_end_mask_0, x = var_14149_cast_fp16)[name = tensor("op_14190_cast_fp16")]; tensor var_14191_begin_0 = const()[name = tensor("op_14191_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_14191_end_0 = const()[name = tensor("op_14191_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14191_end_mask_0 = const()[name = tensor("op_14191_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14191_cast_fp16 = slice_by_index(begin = var_14191_begin_0, end = var_14191_end_0, end_mask = var_14191_end_mask_0, x = var_14149_cast_fp16)[name = tensor("op_14191_cast_fp16")]; tensor var_14192_begin_0 = const()[name = tensor("op_14192_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14192_end_0 = const()[name = tensor("op_14192_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_14192_end_mask_0 = const()[name = tensor("op_14192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14192_cast_fp16 = slice_by_index(begin = var_14192_begin_0, end = var_14192_end_0, end_mask = var_14192_end_mask_0, x = var_14153_cast_fp16)[name = tensor("op_14192_cast_fp16")]; tensor var_14193_begin_0 = const()[name = tensor("op_14193_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_14193_end_0 = const()[name = tensor("op_14193_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_14193_end_mask_0 = const()[name = tensor("op_14193_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14193_cast_fp16 = slice_by_index(begin = var_14193_begin_0, end = var_14193_end_0, end_mask = var_14193_end_mask_0, x = var_14153_cast_fp16)[name = tensor("op_14193_cast_fp16")]; tensor var_14194_begin_0 = const()[name = tensor("op_14194_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_14194_end_0 = const()[name = tensor("op_14194_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_14194_end_mask_0 = const()[name = tensor("op_14194_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14194_cast_fp16 = slice_by_index(begin = var_14194_begin_0, end = var_14194_end_0, end_mask = var_14194_end_mask_0, x = var_14153_cast_fp16)[name = tensor("op_14194_cast_fp16")]; tensor var_14195_begin_0 = const()[name = tensor("op_14195_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_14195_end_0 = const()[name = tensor("op_14195_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_14195_end_mask_0 = const()[name = tensor("op_14195_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14195_cast_fp16 = slice_by_index(begin = var_14195_begin_0, end = var_14195_end_0, end_mask = var_14195_end_mask_0, x = var_14153_cast_fp16)[name = tensor("op_14195_cast_fp16")]; tensor var_14196_begin_0 = const()[name = tensor("op_14196_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_14196_end_0 = const()[name = tensor("op_14196_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_14196_end_mask_0 = const()[name = tensor("op_14196_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14196_cast_fp16 = slice_by_index(begin = var_14196_begin_0, end = var_14196_end_0, end_mask = var_14196_end_mask_0, x = var_14153_cast_fp16)[name = tensor("op_14196_cast_fp16")]; tensor var_14197_begin_0 = const()[name = tensor("op_14197_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_14197_end_0 = const()[name = tensor("op_14197_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_14197_end_mask_0 = const()[name = tensor("op_14197_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14197_cast_fp16 = slice_by_index(begin = var_14197_begin_0, end = var_14197_end_0, end_mask = var_14197_end_mask_0, x = var_14153_cast_fp16)[name = tensor("op_14197_cast_fp16")]; tensor var_14198_begin_0 = const()[name = tensor("op_14198_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_14198_end_0 = const()[name = tensor("op_14198_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_14198_end_mask_0 = const()[name = tensor("op_14198_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14198_cast_fp16 = slice_by_index(begin = var_14198_begin_0, end = var_14198_end_0, end_mask = var_14198_end_mask_0, x = var_14153_cast_fp16)[name = tensor("op_14198_cast_fp16")]; tensor var_14199_begin_0 = const()[name = tensor("op_14199_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_14199_end_0 = const()[name = tensor("op_14199_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14199_end_mask_0 = const()[name = tensor("op_14199_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14199_cast_fp16 = slice_by_index(begin = var_14199_begin_0, end = var_14199_end_0, end_mask = var_14199_end_mask_0, x = var_14153_cast_fp16)[name = tensor("op_14199_cast_fp16")]; tensor var_14200_begin_0 = const()[name = tensor("op_14200_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14200_end_0 = const()[name = tensor("op_14200_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_14200_end_mask_0 = const()[name = tensor("op_14200_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14200_cast_fp16 = slice_by_index(begin = var_14200_begin_0, end = var_14200_end_0, end_mask = var_14200_end_mask_0, x = var_14157_cast_fp16)[name = tensor("op_14200_cast_fp16")]; tensor var_14201_begin_0 = const()[name = tensor("op_14201_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_14201_end_0 = const()[name = tensor("op_14201_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_14201_end_mask_0 = const()[name = tensor("op_14201_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14201_cast_fp16 = slice_by_index(begin = var_14201_begin_0, end = var_14201_end_0, end_mask = var_14201_end_mask_0, x = var_14157_cast_fp16)[name = tensor("op_14201_cast_fp16")]; tensor var_14202_begin_0 = const()[name = tensor("op_14202_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_14202_end_0 = const()[name = tensor("op_14202_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_14202_end_mask_0 = const()[name = tensor("op_14202_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14202_cast_fp16 = slice_by_index(begin = var_14202_begin_0, end = var_14202_end_0, end_mask = var_14202_end_mask_0, x = var_14157_cast_fp16)[name = tensor("op_14202_cast_fp16")]; tensor var_14203_begin_0 = const()[name = tensor("op_14203_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_14203_end_0 = const()[name = tensor("op_14203_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_14203_end_mask_0 = const()[name = tensor("op_14203_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14203_cast_fp16 = slice_by_index(begin = var_14203_begin_0, end = var_14203_end_0, end_mask = var_14203_end_mask_0, x = var_14157_cast_fp16)[name = tensor("op_14203_cast_fp16")]; tensor var_14204_begin_0 = const()[name = tensor("op_14204_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_14204_end_0 = const()[name = tensor("op_14204_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_14204_end_mask_0 = const()[name = tensor("op_14204_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14204_cast_fp16 = slice_by_index(begin = var_14204_begin_0, end = var_14204_end_0, end_mask = var_14204_end_mask_0, x = var_14157_cast_fp16)[name = tensor("op_14204_cast_fp16")]; tensor var_14205_begin_0 = const()[name = tensor("op_14205_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_14205_end_0 = const()[name = tensor("op_14205_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_14205_end_mask_0 = const()[name = tensor("op_14205_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14205_cast_fp16 = slice_by_index(begin = var_14205_begin_0, end = var_14205_end_0, end_mask = var_14205_end_mask_0, x = var_14157_cast_fp16)[name = tensor("op_14205_cast_fp16")]; tensor var_14206_begin_0 = const()[name = tensor("op_14206_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_14206_end_0 = const()[name = tensor("op_14206_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_14206_end_mask_0 = const()[name = tensor("op_14206_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14206_cast_fp16 = slice_by_index(begin = var_14206_begin_0, end = var_14206_end_0, end_mask = var_14206_end_mask_0, x = var_14157_cast_fp16)[name = tensor("op_14206_cast_fp16")]; tensor var_14207_begin_0 = const()[name = tensor("op_14207_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_14207_end_0 = const()[name = tensor("op_14207_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14207_end_mask_0 = const()[name = tensor("op_14207_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14207_cast_fp16 = slice_by_index(begin = var_14207_begin_0, end = var_14207_end_0, end_mask = var_14207_end_mask_0, x = var_14157_cast_fp16)[name = tensor("op_14207_cast_fp16")]; tensor var_14208_begin_0 = const()[name = tensor("op_14208_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14208_end_0 = const()[name = tensor("op_14208_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_14208_end_mask_0 = const()[name = tensor("op_14208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14208_cast_fp16 = slice_by_index(begin = var_14208_begin_0, end = var_14208_end_0, end_mask = var_14208_end_mask_0, x = var_14161_cast_fp16)[name = tensor("op_14208_cast_fp16")]; tensor var_14209_begin_0 = const()[name = tensor("op_14209_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_14209_end_0 = const()[name = tensor("op_14209_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_14209_end_mask_0 = const()[name = tensor("op_14209_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14209_cast_fp16 = slice_by_index(begin = var_14209_begin_0, end = var_14209_end_0, end_mask = var_14209_end_mask_0, x = var_14161_cast_fp16)[name = tensor("op_14209_cast_fp16")]; tensor var_14210_begin_0 = const()[name = tensor("op_14210_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_14210_end_0 = const()[name = tensor("op_14210_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_14210_end_mask_0 = const()[name = tensor("op_14210_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14210_cast_fp16 = slice_by_index(begin = var_14210_begin_0, end = var_14210_end_0, end_mask = var_14210_end_mask_0, x = var_14161_cast_fp16)[name = tensor("op_14210_cast_fp16")]; tensor var_14211_begin_0 = const()[name = tensor("op_14211_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_14211_end_0 = const()[name = tensor("op_14211_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_14211_end_mask_0 = const()[name = tensor("op_14211_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14211_cast_fp16 = slice_by_index(begin = var_14211_begin_0, end = var_14211_end_0, end_mask = var_14211_end_mask_0, x = var_14161_cast_fp16)[name = tensor("op_14211_cast_fp16")]; tensor var_14212_begin_0 = const()[name = tensor("op_14212_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_14212_end_0 = const()[name = tensor("op_14212_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_14212_end_mask_0 = const()[name = tensor("op_14212_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14212_cast_fp16 = slice_by_index(begin = var_14212_begin_0, end = var_14212_end_0, end_mask = var_14212_end_mask_0, x = var_14161_cast_fp16)[name = tensor("op_14212_cast_fp16")]; tensor var_14213_begin_0 = const()[name = tensor("op_14213_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_14213_end_0 = const()[name = tensor("op_14213_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_14213_end_mask_0 = const()[name = tensor("op_14213_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14213_cast_fp16 = slice_by_index(begin = var_14213_begin_0, end = var_14213_end_0, end_mask = var_14213_end_mask_0, x = var_14161_cast_fp16)[name = tensor("op_14213_cast_fp16")]; tensor var_14214_begin_0 = const()[name = tensor("op_14214_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_14214_end_0 = const()[name = tensor("op_14214_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_14214_end_mask_0 = const()[name = tensor("op_14214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14214_cast_fp16 = slice_by_index(begin = var_14214_begin_0, end = var_14214_end_0, end_mask = var_14214_end_mask_0, x = var_14161_cast_fp16)[name = tensor("op_14214_cast_fp16")]; tensor var_14215_begin_0 = const()[name = tensor("op_14215_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_14215_end_0 = const()[name = tensor("op_14215_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14215_end_mask_0 = const()[name = tensor("op_14215_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14215_cast_fp16 = slice_by_index(begin = var_14215_begin_0, end = var_14215_end_0, end_mask = var_14215_end_mask_0, x = var_14161_cast_fp16)[name = tensor("op_14215_cast_fp16")]; tensor var_14216_begin_0 = const()[name = tensor("op_14216_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14216_end_0 = const()[name = tensor("op_14216_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_14216_end_mask_0 = const()[name = tensor("op_14216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14216_cast_fp16 = slice_by_index(begin = var_14216_begin_0, end = var_14216_end_0, end_mask = var_14216_end_mask_0, x = var_14165_cast_fp16)[name = tensor("op_14216_cast_fp16")]; tensor var_14217_begin_0 = const()[name = tensor("op_14217_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_14217_end_0 = const()[name = tensor("op_14217_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_14217_end_mask_0 = const()[name = tensor("op_14217_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14217_cast_fp16 = slice_by_index(begin = var_14217_begin_0, end = var_14217_end_0, end_mask = var_14217_end_mask_0, x = var_14165_cast_fp16)[name = tensor("op_14217_cast_fp16")]; tensor var_14218_begin_0 = const()[name = tensor("op_14218_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_14218_end_0 = const()[name = tensor("op_14218_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_14218_end_mask_0 = const()[name = tensor("op_14218_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14218_cast_fp16 = slice_by_index(begin = var_14218_begin_0, end = var_14218_end_0, end_mask = var_14218_end_mask_0, x = var_14165_cast_fp16)[name = tensor("op_14218_cast_fp16")]; tensor var_14219_begin_0 = const()[name = tensor("op_14219_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_14219_end_0 = const()[name = tensor("op_14219_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_14219_end_mask_0 = const()[name = tensor("op_14219_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14219_cast_fp16 = slice_by_index(begin = var_14219_begin_0, end = var_14219_end_0, end_mask = var_14219_end_mask_0, x = var_14165_cast_fp16)[name = tensor("op_14219_cast_fp16")]; tensor var_14220_begin_0 = const()[name = tensor("op_14220_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_14220_end_0 = const()[name = tensor("op_14220_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_14220_end_mask_0 = const()[name = tensor("op_14220_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14220_cast_fp16 = slice_by_index(begin = var_14220_begin_0, end = var_14220_end_0, end_mask = var_14220_end_mask_0, x = var_14165_cast_fp16)[name = tensor("op_14220_cast_fp16")]; tensor var_14221_begin_0 = const()[name = tensor("op_14221_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_14221_end_0 = const()[name = tensor("op_14221_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_14221_end_mask_0 = const()[name = tensor("op_14221_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14221_cast_fp16 = slice_by_index(begin = var_14221_begin_0, end = var_14221_end_0, end_mask = var_14221_end_mask_0, x = var_14165_cast_fp16)[name = tensor("op_14221_cast_fp16")]; tensor var_14222_begin_0 = const()[name = tensor("op_14222_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_14222_end_0 = const()[name = tensor("op_14222_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_14222_end_mask_0 = const()[name = tensor("op_14222_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14222_cast_fp16 = slice_by_index(begin = var_14222_begin_0, end = var_14222_end_0, end_mask = var_14222_end_mask_0, x = var_14165_cast_fp16)[name = tensor("op_14222_cast_fp16")]; tensor var_14223_begin_0 = const()[name = tensor("op_14223_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_14223_end_0 = const()[name = tensor("op_14223_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14223_end_mask_0 = const()[name = tensor("op_14223_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14223_cast_fp16 = slice_by_index(begin = var_14223_begin_0, end = var_14223_end_0, end_mask = var_14223_end_mask_0, x = var_14165_cast_fp16)[name = tensor("op_14223_cast_fp16")]; tensor var_14224_begin_0 = const()[name = tensor("op_14224_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14224_end_0 = const()[name = tensor("op_14224_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_14224_end_mask_0 = const()[name = tensor("op_14224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14224_cast_fp16 = slice_by_index(begin = var_14224_begin_0, end = var_14224_end_0, end_mask = var_14224_end_mask_0, x = var_14169_cast_fp16)[name = tensor("op_14224_cast_fp16")]; tensor var_14225_begin_0 = const()[name = tensor("op_14225_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_14225_end_0 = const()[name = tensor("op_14225_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_14225_end_mask_0 = const()[name = tensor("op_14225_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14225_cast_fp16 = slice_by_index(begin = var_14225_begin_0, end = var_14225_end_0, end_mask = var_14225_end_mask_0, x = var_14169_cast_fp16)[name = tensor("op_14225_cast_fp16")]; tensor var_14226_begin_0 = const()[name = tensor("op_14226_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_14226_end_0 = const()[name = tensor("op_14226_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_14226_end_mask_0 = const()[name = tensor("op_14226_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14226_cast_fp16 = slice_by_index(begin = var_14226_begin_0, end = var_14226_end_0, end_mask = var_14226_end_mask_0, x = var_14169_cast_fp16)[name = tensor("op_14226_cast_fp16")]; tensor var_14227_begin_0 = const()[name = tensor("op_14227_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_14227_end_0 = const()[name = tensor("op_14227_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_14227_end_mask_0 = const()[name = tensor("op_14227_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14227_cast_fp16 = slice_by_index(begin = var_14227_begin_0, end = var_14227_end_0, end_mask = var_14227_end_mask_0, x = var_14169_cast_fp16)[name = tensor("op_14227_cast_fp16")]; tensor var_14228_begin_0 = const()[name = tensor("op_14228_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_14228_end_0 = const()[name = tensor("op_14228_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_14228_end_mask_0 = const()[name = tensor("op_14228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14228_cast_fp16 = slice_by_index(begin = var_14228_begin_0, end = var_14228_end_0, end_mask = var_14228_end_mask_0, x = var_14169_cast_fp16)[name = tensor("op_14228_cast_fp16")]; tensor var_14229_begin_0 = const()[name = tensor("op_14229_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_14229_end_0 = const()[name = tensor("op_14229_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_14229_end_mask_0 = const()[name = tensor("op_14229_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14229_cast_fp16 = slice_by_index(begin = var_14229_begin_0, end = var_14229_end_0, end_mask = var_14229_end_mask_0, x = var_14169_cast_fp16)[name = tensor("op_14229_cast_fp16")]; tensor var_14230_begin_0 = const()[name = tensor("op_14230_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_14230_end_0 = const()[name = tensor("op_14230_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_14230_end_mask_0 = const()[name = tensor("op_14230_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14230_cast_fp16 = slice_by_index(begin = var_14230_begin_0, end = var_14230_end_0, end_mask = var_14230_end_mask_0, x = var_14169_cast_fp16)[name = tensor("op_14230_cast_fp16")]; tensor var_14231_begin_0 = const()[name = tensor("op_14231_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_14231_end_0 = const()[name = tensor("op_14231_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14231_end_mask_0 = const()[name = tensor("op_14231_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14231_cast_fp16 = slice_by_index(begin = var_14231_begin_0, end = var_14231_end_0, end_mask = var_14231_end_mask_0, x = var_14169_cast_fp16)[name = tensor("op_14231_cast_fp16")]; tensor var_14232_begin_0 = const()[name = tensor("op_14232_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14232_end_0 = const()[name = tensor("op_14232_end_0"), val = tensor([2, 40, 1, 512])]; tensor var_14232_end_mask_0 = const()[name = tensor("op_14232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14232_cast_fp16 = slice_by_index(begin = var_14232_begin_0, end = var_14232_end_0, end_mask = var_14232_end_mask_0, x = var_14173_cast_fp16)[name = tensor("op_14232_cast_fp16")]; tensor var_14233_begin_0 = const()[name = tensor("op_14233_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_14233_end_0 = const()[name = tensor("op_14233_end_0"), val = tensor([2, 40, 1, 1024])]; tensor var_14233_end_mask_0 = const()[name = tensor("op_14233_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14233_cast_fp16 = slice_by_index(begin = var_14233_begin_0, end = var_14233_end_0, end_mask = var_14233_end_mask_0, x = var_14173_cast_fp16)[name = tensor("op_14233_cast_fp16")]; tensor var_14234_begin_0 = const()[name = tensor("op_14234_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_14234_end_0 = const()[name = tensor("op_14234_end_0"), val = tensor([2, 40, 1, 1536])]; tensor var_14234_end_mask_0 = const()[name = tensor("op_14234_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14234_cast_fp16 = slice_by_index(begin = var_14234_begin_0, end = var_14234_end_0, end_mask = var_14234_end_mask_0, x = var_14173_cast_fp16)[name = tensor("op_14234_cast_fp16")]; tensor var_14235_begin_0 = const()[name = tensor("op_14235_begin_0"), val = tensor([0, 0, 0, 1536])]; tensor var_14235_end_0 = const()[name = tensor("op_14235_end_0"), val = tensor([2, 40, 1, 2048])]; tensor var_14235_end_mask_0 = const()[name = tensor("op_14235_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14235_cast_fp16 = slice_by_index(begin = var_14235_begin_0, end = var_14235_end_0, end_mask = var_14235_end_mask_0, x = var_14173_cast_fp16)[name = tensor("op_14235_cast_fp16")]; tensor var_14236_begin_0 = const()[name = tensor("op_14236_begin_0"), val = tensor([0, 0, 0, 2048])]; tensor var_14236_end_0 = const()[name = tensor("op_14236_end_0"), val = tensor([2, 40, 1, 2560])]; tensor var_14236_end_mask_0 = const()[name = tensor("op_14236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14236_cast_fp16 = slice_by_index(begin = var_14236_begin_0, end = var_14236_end_0, end_mask = var_14236_end_mask_0, x = var_14173_cast_fp16)[name = tensor("op_14236_cast_fp16")]; tensor var_14237_begin_0 = const()[name = tensor("op_14237_begin_0"), val = tensor([0, 0, 0, 2560])]; tensor var_14237_end_0 = const()[name = tensor("op_14237_end_0"), val = tensor([2, 40, 1, 3072])]; tensor var_14237_end_mask_0 = const()[name = tensor("op_14237_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14237_cast_fp16 = slice_by_index(begin = var_14237_begin_0, end = var_14237_end_0, end_mask = var_14237_end_mask_0, x = var_14173_cast_fp16)[name = tensor("op_14237_cast_fp16")]; tensor var_14238_begin_0 = const()[name = tensor("op_14238_begin_0"), val = tensor([0, 0, 0, 3072])]; tensor var_14238_end_0 = const()[name = tensor("op_14238_end_0"), val = tensor([2, 40, 1, 3584])]; tensor var_14238_end_mask_0 = const()[name = tensor("op_14238_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14238_cast_fp16 = slice_by_index(begin = var_14238_begin_0, end = var_14238_end_0, end_mask = var_14238_end_mask_0, x = var_14173_cast_fp16)[name = tensor("op_14238_cast_fp16")]; tensor var_14239_begin_0 = const()[name = tensor("op_14239_begin_0"), val = tensor([0, 0, 0, 3584])]; tensor var_14239_end_0 = const()[name = tensor("op_14239_end_0"), val = tensor([2, 40, 1, 4096])]; tensor var_14239_end_mask_0 = const()[name = tensor("op_14239_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14239_cast_fp16 = slice_by_index(begin = var_14239_begin_0, end = var_14239_end_0, end_mask = var_14239_end_mask_0, x = var_14173_cast_fp16)[name = tensor("op_14239_cast_fp16")]; tensor k_perm_0 = const()[name = tensor("k_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_14244_begin_0 = const()[name = tensor("op_14244_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14244_end_0 = const()[name = tensor("op_14244_end_0"), val = tensor([2, 77, 1, 40])]; tensor var_14244_end_mask_0 = const()[name = tensor("op_14244_end_mask_0"), val = tensor([true, true, true, false])]; tensor transpose_0 = transpose(perm = k_perm_0, x = k_125_cast_fp16)[name = tensor("transpose_0")]; tensor var_14244_cast_fp16 = slice_by_index(begin = var_14244_begin_0, end = var_14244_end_0, end_mask = var_14244_end_mask_0, x = transpose_0)[name = tensor("op_14244_cast_fp16")]; tensor var_14248_begin_0 = const()[name = tensor("op_14248_begin_0"), val = tensor([0, 0, 0, 40])]; tensor var_14248_end_0 = const()[name = tensor("op_14248_end_0"), val = tensor([2, 77, 1, 80])]; tensor var_14248_end_mask_0 = const()[name = tensor("op_14248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14248_cast_fp16 = slice_by_index(begin = var_14248_begin_0, end = var_14248_end_0, end_mask = var_14248_end_mask_0, x = transpose_0)[name = tensor("op_14248_cast_fp16")]; tensor var_14252_begin_0 = const()[name = tensor("op_14252_begin_0"), val = tensor([0, 0, 0, 80])]; tensor var_14252_end_0 = const()[name = tensor("op_14252_end_0"), val = tensor([2, 77, 1, 120])]; tensor var_14252_end_mask_0 = const()[name = tensor("op_14252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14252_cast_fp16 = slice_by_index(begin = var_14252_begin_0, end = var_14252_end_0, end_mask = var_14252_end_mask_0, x = transpose_0)[name = tensor("op_14252_cast_fp16")]; tensor var_14256_begin_0 = const()[name = tensor("op_14256_begin_0"), val = tensor([0, 0, 0, 120])]; tensor var_14256_end_0 = const()[name = tensor("op_14256_end_0"), val = tensor([2, 77, 1, 160])]; tensor var_14256_end_mask_0 = const()[name = tensor("op_14256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14256_cast_fp16 = slice_by_index(begin = var_14256_begin_0, end = var_14256_end_0, end_mask = var_14256_end_mask_0, x = transpose_0)[name = tensor("op_14256_cast_fp16")]; tensor var_14260_begin_0 = const()[name = tensor("op_14260_begin_0"), val = tensor([0, 0, 0, 160])]; tensor var_14260_end_0 = const()[name = tensor("op_14260_end_0"), val = tensor([2, 77, 1, 200])]; tensor var_14260_end_mask_0 = const()[name = tensor("op_14260_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14260_cast_fp16 = slice_by_index(begin = var_14260_begin_0, end = var_14260_end_0, end_mask = var_14260_end_mask_0, x = transpose_0)[name = tensor("op_14260_cast_fp16")]; tensor var_14264_begin_0 = const()[name = tensor("op_14264_begin_0"), val = tensor([0, 0, 0, 200])]; tensor var_14264_end_0 = const()[name = tensor("op_14264_end_0"), val = tensor([2, 77, 1, 240])]; tensor var_14264_end_mask_0 = const()[name = tensor("op_14264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14264_cast_fp16 = slice_by_index(begin = var_14264_begin_0, end = var_14264_end_0, end_mask = var_14264_end_mask_0, x = transpose_0)[name = tensor("op_14264_cast_fp16")]; tensor var_14268_begin_0 = const()[name = tensor("op_14268_begin_0"), val = tensor([0, 0, 0, 240])]; tensor var_14268_end_0 = const()[name = tensor("op_14268_end_0"), val = tensor([2, 77, 1, 280])]; tensor var_14268_end_mask_0 = const()[name = tensor("op_14268_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14268_cast_fp16 = slice_by_index(begin = var_14268_begin_0, end = var_14268_end_0, end_mask = var_14268_end_mask_0, x = transpose_0)[name = tensor("op_14268_cast_fp16")]; tensor var_14272_begin_0 = const()[name = tensor("op_14272_begin_0"), val = tensor([0, 0, 0, 280])]; tensor var_14272_end_0 = const()[name = tensor("op_14272_end_0"), val = tensor([2, 77, 1, 320])]; tensor var_14272_end_mask_0 = const()[name = tensor("op_14272_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14272_cast_fp16 = slice_by_index(begin = var_14272_begin_0, end = var_14272_end_0, end_mask = var_14272_end_mask_0, x = transpose_0)[name = tensor("op_14272_cast_fp16")]; tensor var_14274_begin_0 = const()[name = tensor("op_14274_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14274_end_0 = const()[name = tensor("op_14274_end_0"), val = tensor([2, 40, 1, 77])]; tensor var_14274_end_mask_0 = const()[name = tensor("op_14274_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14274_cast_fp16 = slice_by_index(begin = var_14274_begin_0, end = var_14274_end_0, end_mask = var_14274_end_mask_0, x = v_cast_fp16)[name = tensor("op_14274_cast_fp16")]; tensor var_14278_begin_0 = const()[name = tensor("op_14278_begin_0"), val = tensor([0, 40, 0, 0])]; tensor var_14278_end_0 = const()[name = tensor("op_14278_end_0"), val = tensor([2, 80, 1, 77])]; tensor var_14278_end_mask_0 = const()[name = tensor("op_14278_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14278_cast_fp16 = slice_by_index(begin = var_14278_begin_0, end = var_14278_end_0, end_mask = var_14278_end_mask_0, x = v_cast_fp16)[name = tensor("op_14278_cast_fp16")]; tensor var_14282_begin_0 = const()[name = tensor("op_14282_begin_0"), val = tensor([0, 80, 0, 0])]; tensor var_14282_end_0 = const()[name = tensor("op_14282_end_0"), val = tensor([2, 120, 1, 77])]; tensor var_14282_end_mask_0 = const()[name = tensor("op_14282_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14282_cast_fp16 = slice_by_index(begin = var_14282_begin_0, end = var_14282_end_0, end_mask = var_14282_end_mask_0, x = v_cast_fp16)[name = tensor("op_14282_cast_fp16")]; tensor var_14286_begin_0 = const()[name = tensor("op_14286_begin_0"), val = tensor([0, 120, 0, 0])]; tensor var_14286_end_0 = const()[name = tensor("op_14286_end_0"), val = tensor([2, 160, 1, 77])]; tensor var_14286_end_mask_0 = const()[name = tensor("op_14286_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14286_cast_fp16 = slice_by_index(begin = var_14286_begin_0, end = var_14286_end_0, end_mask = var_14286_end_mask_0, x = v_cast_fp16)[name = tensor("op_14286_cast_fp16")]; tensor var_14290_begin_0 = const()[name = tensor("op_14290_begin_0"), val = tensor([0, 160, 0, 0])]; tensor var_14290_end_0 = const()[name = tensor("op_14290_end_0"), val = tensor([2, 200, 1, 77])]; tensor var_14290_end_mask_0 = const()[name = tensor("op_14290_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14290_cast_fp16 = slice_by_index(begin = var_14290_begin_0, end = var_14290_end_0, end_mask = var_14290_end_mask_0, x = v_cast_fp16)[name = tensor("op_14290_cast_fp16")]; tensor var_14294_begin_0 = const()[name = tensor("op_14294_begin_0"), val = tensor([0, 200, 0, 0])]; tensor var_14294_end_0 = const()[name = tensor("op_14294_end_0"), val = tensor([2, 240, 1, 77])]; tensor var_14294_end_mask_0 = const()[name = tensor("op_14294_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14294_cast_fp16 = slice_by_index(begin = var_14294_begin_0, end = var_14294_end_0, end_mask = var_14294_end_mask_0, x = v_cast_fp16)[name = tensor("op_14294_cast_fp16")]; tensor var_14298_begin_0 = const()[name = tensor("op_14298_begin_0"), val = tensor([0, 240, 0, 0])]; tensor var_14298_end_0 = const()[name = tensor("op_14298_end_0"), val = tensor([2, 280, 1, 77])]; tensor var_14298_end_mask_0 = const()[name = tensor("op_14298_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14298_cast_fp16 = slice_by_index(begin = var_14298_begin_0, end = var_14298_end_0, end_mask = var_14298_end_mask_0, x = v_cast_fp16)[name = tensor("op_14298_cast_fp16")]; tensor var_14302_begin_0 = const()[name = tensor("op_14302_begin_0"), val = tensor([0, 280, 0, 0])]; tensor var_14302_end_0 = const()[name = tensor("op_14302_end_0"), val = tensor([2, 320, 1, 77])]; tensor var_14302_end_mask_0 = const()[name = tensor("op_14302_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14302_cast_fp16 = slice_by_index(begin = var_14302_begin_0, end = var_14302_end_0, end_mask = var_14302_end_mask_0, x = v_cast_fp16)[name = tensor("op_14302_cast_fp16")]; tensor var_14306_equation_0 = const()[name = tensor("op_14306_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14306_cast_fp16 = einsum(equation = var_14306_equation_0, values = (var_14244_cast_fp16, var_14176_cast_fp16))[name = tensor("op_14306_cast_fp16")]; tensor var_14307_to_fp16 = const()[name = tensor("op_14307_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1473_cast_fp16 = mul(x = var_14306_cast_fp16, y = var_14307_to_fp16)[name = tensor("aw_chunk_1473_cast_fp16")]; tensor var_14310_equation_0 = const()[name = tensor("op_14310_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14310_cast_fp16 = einsum(equation = var_14310_equation_0, values = (var_14244_cast_fp16, var_14177_cast_fp16))[name = tensor("op_14310_cast_fp16")]; tensor var_14311_to_fp16 = const()[name = tensor("op_14311_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1475_cast_fp16 = mul(x = var_14310_cast_fp16, y = var_14311_to_fp16)[name = tensor("aw_chunk_1475_cast_fp16")]; tensor var_14314_equation_0 = const()[name = tensor("op_14314_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14314_cast_fp16 = einsum(equation = var_14314_equation_0, values = (var_14244_cast_fp16, var_14178_cast_fp16))[name = tensor("op_14314_cast_fp16")]; tensor var_14315_to_fp16 = const()[name = tensor("op_14315_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1477_cast_fp16 = mul(x = var_14314_cast_fp16, y = var_14315_to_fp16)[name = tensor("aw_chunk_1477_cast_fp16")]; tensor var_14318_equation_0 = const()[name = tensor("op_14318_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14318_cast_fp16 = einsum(equation = var_14318_equation_0, values = (var_14244_cast_fp16, var_14179_cast_fp16))[name = tensor("op_14318_cast_fp16")]; tensor var_14319_to_fp16 = const()[name = tensor("op_14319_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1479_cast_fp16 = mul(x = var_14318_cast_fp16, y = var_14319_to_fp16)[name = tensor("aw_chunk_1479_cast_fp16")]; tensor var_14322_equation_0 = const()[name = tensor("op_14322_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14322_cast_fp16 = einsum(equation = var_14322_equation_0, values = (var_14244_cast_fp16, var_14180_cast_fp16))[name = tensor("op_14322_cast_fp16")]; tensor var_14323_to_fp16 = const()[name = tensor("op_14323_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1481_cast_fp16 = mul(x = var_14322_cast_fp16, y = var_14323_to_fp16)[name = tensor("aw_chunk_1481_cast_fp16")]; tensor var_14326_equation_0 = const()[name = tensor("op_14326_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14326_cast_fp16 = einsum(equation = var_14326_equation_0, values = (var_14244_cast_fp16, var_14181_cast_fp16))[name = tensor("op_14326_cast_fp16")]; tensor var_14327_to_fp16 = const()[name = tensor("op_14327_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1483_cast_fp16 = mul(x = var_14326_cast_fp16, y = var_14327_to_fp16)[name = tensor("aw_chunk_1483_cast_fp16")]; tensor var_14330_equation_0 = const()[name = tensor("op_14330_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14330_cast_fp16 = einsum(equation = var_14330_equation_0, values = (var_14244_cast_fp16, var_14182_cast_fp16))[name = tensor("op_14330_cast_fp16")]; tensor var_14331_to_fp16 = const()[name = tensor("op_14331_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1485_cast_fp16 = mul(x = var_14330_cast_fp16, y = var_14331_to_fp16)[name = tensor("aw_chunk_1485_cast_fp16")]; tensor var_14334_equation_0 = const()[name = tensor("op_14334_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14334_cast_fp16 = einsum(equation = var_14334_equation_0, values = (var_14244_cast_fp16, var_14183_cast_fp16))[name = tensor("op_14334_cast_fp16")]; tensor var_14335_to_fp16 = const()[name = tensor("op_14335_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1487_cast_fp16 = mul(x = var_14334_cast_fp16, y = var_14335_to_fp16)[name = tensor("aw_chunk_1487_cast_fp16")]; tensor var_14338_equation_0 = const()[name = tensor("op_14338_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14338_cast_fp16 = einsum(equation = var_14338_equation_0, values = (var_14248_cast_fp16, var_14184_cast_fp16))[name = tensor("op_14338_cast_fp16")]; tensor var_14339_to_fp16 = const()[name = tensor("op_14339_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1489_cast_fp16 = mul(x = var_14338_cast_fp16, y = var_14339_to_fp16)[name = tensor("aw_chunk_1489_cast_fp16")]; tensor var_14342_equation_0 = const()[name = tensor("op_14342_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14342_cast_fp16 = einsum(equation = var_14342_equation_0, values = (var_14248_cast_fp16, var_14185_cast_fp16))[name = tensor("op_14342_cast_fp16")]; tensor var_14343_to_fp16 = const()[name = tensor("op_14343_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1491_cast_fp16 = mul(x = var_14342_cast_fp16, y = var_14343_to_fp16)[name = tensor("aw_chunk_1491_cast_fp16")]; tensor var_14346_equation_0 = const()[name = tensor("op_14346_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14346_cast_fp16 = einsum(equation = var_14346_equation_0, values = (var_14248_cast_fp16, var_14186_cast_fp16))[name = tensor("op_14346_cast_fp16")]; tensor var_14347_to_fp16 = const()[name = tensor("op_14347_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1493_cast_fp16 = mul(x = var_14346_cast_fp16, y = var_14347_to_fp16)[name = tensor("aw_chunk_1493_cast_fp16")]; tensor var_14350_equation_0 = const()[name = tensor("op_14350_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14350_cast_fp16 = einsum(equation = var_14350_equation_0, values = (var_14248_cast_fp16, var_14187_cast_fp16))[name = tensor("op_14350_cast_fp16")]; tensor var_14351_to_fp16 = const()[name = tensor("op_14351_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1495_cast_fp16 = mul(x = var_14350_cast_fp16, y = var_14351_to_fp16)[name = tensor("aw_chunk_1495_cast_fp16")]; tensor var_14354_equation_0 = const()[name = tensor("op_14354_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14354_cast_fp16 = einsum(equation = var_14354_equation_0, values = (var_14248_cast_fp16, var_14188_cast_fp16))[name = tensor("op_14354_cast_fp16")]; tensor var_14355_to_fp16 = const()[name = tensor("op_14355_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1497_cast_fp16 = mul(x = var_14354_cast_fp16, y = var_14355_to_fp16)[name = tensor("aw_chunk_1497_cast_fp16")]; tensor var_14358_equation_0 = const()[name = tensor("op_14358_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14358_cast_fp16 = einsum(equation = var_14358_equation_0, values = (var_14248_cast_fp16, var_14189_cast_fp16))[name = tensor("op_14358_cast_fp16")]; tensor var_14359_to_fp16 = const()[name = tensor("op_14359_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1499_cast_fp16 = mul(x = var_14358_cast_fp16, y = var_14359_to_fp16)[name = tensor("aw_chunk_1499_cast_fp16")]; tensor var_14362_equation_0 = const()[name = tensor("op_14362_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14362_cast_fp16 = einsum(equation = var_14362_equation_0, values = (var_14248_cast_fp16, var_14190_cast_fp16))[name = tensor("op_14362_cast_fp16")]; tensor var_14363_to_fp16 = const()[name = tensor("op_14363_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1501_cast_fp16 = mul(x = var_14362_cast_fp16, y = var_14363_to_fp16)[name = tensor("aw_chunk_1501_cast_fp16")]; tensor var_14366_equation_0 = const()[name = tensor("op_14366_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14366_cast_fp16 = einsum(equation = var_14366_equation_0, values = (var_14248_cast_fp16, var_14191_cast_fp16))[name = tensor("op_14366_cast_fp16")]; tensor var_14367_to_fp16 = const()[name = tensor("op_14367_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1503_cast_fp16 = mul(x = var_14366_cast_fp16, y = var_14367_to_fp16)[name = tensor("aw_chunk_1503_cast_fp16")]; tensor var_14370_equation_0 = const()[name = tensor("op_14370_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14370_cast_fp16 = einsum(equation = var_14370_equation_0, values = (var_14252_cast_fp16, var_14192_cast_fp16))[name = tensor("op_14370_cast_fp16")]; tensor var_14371_to_fp16 = const()[name = tensor("op_14371_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1505_cast_fp16 = mul(x = var_14370_cast_fp16, y = var_14371_to_fp16)[name = tensor("aw_chunk_1505_cast_fp16")]; tensor var_14374_equation_0 = const()[name = tensor("op_14374_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14374_cast_fp16 = einsum(equation = var_14374_equation_0, values = (var_14252_cast_fp16, var_14193_cast_fp16))[name = tensor("op_14374_cast_fp16")]; tensor var_14375_to_fp16 = const()[name = tensor("op_14375_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1507_cast_fp16 = mul(x = var_14374_cast_fp16, y = var_14375_to_fp16)[name = tensor("aw_chunk_1507_cast_fp16")]; tensor var_14378_equation_0 = const()[name = tensor("op_14378_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14378_cast_fp16 = einsum(equation = var_14378_equation_0, values = (var_14252_cast_fp16, var_14194_cast_fp16))[name = tensor("op_14378_cast_fp16")]; tensor var_14379_to_fp16 = const()[name = tensor("op_14379_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1509_cast_fp16 = mul(x = var_14378_cast_fp16, y = var_14379_to_fp16)[name = tensor("aw_chunk_1509_cast_fp16")]; tensor var_14382_equation_0 = const()[name = tensor("op_14382_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14382_cast_fp16 = einsum(equation = var_14382_equation_0, values = (var_14252_cast_fp16, var_14195_cast_fp16))[name = tensor("op_14382_cast_fp16")]; tensor var_14383_to_fp16 = const()[name = tensor("op_14383_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1511_cast_fp16 = mul(x = var_14382_cast_fp16, y = var_14383_to_fp16)[name = tensor("aw_chunk_1511_cast_fp16")]; tensor var_14386_equation_0 = const()[name = tensor("op_14386_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14386_cast_fp16 = einsum(equation = var_14386_equation_0, values = (var_14252_cast_fp16, var_14196_cast_fp16))[name = tensor("op_14386_cast_fp16")]; tensor var_14387_to_fp16 = const()[name = tensor("op_14387_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1513_cast_fp16 = mul(x = var_14386_cast_fp16, y = var_14387_to_fp16)[name = tensor("aw_chunk_1513_cast_fp16")]; tensor var_14390_equation_0 = const()[name = tensor("op_14390_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14390_cast_fp16 = einsum(equation = var_14390_equation_0, values = (var_14252_cast_fp16, var_14197_cast_fp16))[name = tensor("op_14390_cast_fp16")]; tensor var_14391_to_fp16 = const()[name = tensor("op_14391_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1515_cast_fp16 = mul(x = var_14390_cast_fp16, y = var_14391_to_fp16)[name = tensor("aw_chunk_1515_cast_fp16")]; tensor var_14394_equation_0 = const()[name = tensor("op_14394_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14394_cast_fp16 = einsum(equation = var_14394_equation_0, values = (var_14252_cast_fp16, var_14198_cast_fp16))[name = tensor("op_14394_cast_fp16")]; tensor var_14395_to_fp16 = const()[name = tensor("op_14395_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1517_cast_fp16 = mul(x = var_14394_cast_fp16, y = var_14395_to_fp16)[name = tensor("aw_chunk_1517_cast_fp16")]; tensor var_14398_equation_0 = const()[name = tensor("op_14398_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14398_cast_fp16 = einsum(equation = var_14398_equation_0, values = (var_14252_cast_fp16, var_14199_cast_fp16))[name = tensor("op_14398_cast_fp16")]; tensor var_14399_to_fp16 = const()[name = tensor("op_14399_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1519_cast_fp16 = mul(x = var_14398_cast_fp16, y = var_14399_to_fp16)[name = tensor("aw_chunk_1519_cast_fp16")]; tensor var_14402_equation_0 = const()[name = tensor("op_14402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14402_cast_fp16 = einsum(equation = var_14402_equation_0, values = (var_14256_cast_fp16, var_14200_cast_fp16))[name = tensor("op_14402_cast_fp16")]; tensor var_14403_to_fp16 = const()[name = tensor("op_14403_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1521_cast_fp16 = mul(x = var_14402_cast_fp16, y = var_14403_to_fp16)[name = tensor("aw_chunk_1521_cast_fp16")]; tensor var_14406_equation_0 = const()[name = tensor("op_14406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14406_cast_fp16 = einsum(equation = var_14406_equation_0, values = (var_14256_cast_fp16, var_14201_cast_fp16))[name = tensor("op_14406_cast_fp16")]; tensor var_14407_to_fp16 = const()[name = tensor("op_14407_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1523_cast_fp16 = mul(x = var_14406_cast_fp16, y = var_14407_to_fp16)[name = tensor("aw_chunk_1523_cast_fp16")]; tensor var_14410_equation_0 = const()[name = tensor("op_14410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14410_cast_fp16 = einsum(equation = var_14410_equation_0, values = (var_14256_cast_fp16, var_14202_cast_fp16))[name = tensor("op_14410_cast_fp16")]; tensor var_14411_to_fp16 = const()[name = tensor("op_14411_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1525_cast_fp16 = mul(x = var_14410_cast_fp16, y = var_14411_to_fp16)[name = tensor("aw_chunk_1525_cast_fp16")]; tensor var_14414_equation_0 = const()[name = tensor("op_14414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14414_cast_fp16 = einsum(equation = var_14414_equation_0, values = (var_14256_cast_fp16, var_14203_cast_fp16))[name = tensor("op_14414_cast_fp16")]; tensor var_14415_to_fp16 = const()[name = tensor("op_14415_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1527_cast_fp16 = mul(x = var_14414_cast_fp16, y = var_14415_to_fp16)[name = tensor("aw_chunk_1527_cast_fp16")]; tensor var_14418_equation_0 = const()[name = tensor("op_14418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14418_cast_fp16 = einsum(equation = var_14418_equation_0, values = (var_14256_cast_fp16, var_14204_cast_fp16))[name = tensor("op_14418_cast_fp16")]; tensor var_14419_to_fp16 = const()[name = tensor("op_14419_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1529_cast_fp16 = mul(x = var_14418_cast_fp16, y = var_14419_to_fp16)[name = tensor("aw_chunk_1529_cast_fp16")]; tensor var_14422_equation_0 = const()[name = tensor("op_14422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14422_cast_fp16 = einsum(equation = var_14422_equation_0, values = (var_14256_cast_fp16, var_14205_cast_fp16))[name = tensor("op_14422_cast_fp16")]; tensor var_14423_to_fp16 = const()[name = tensor("op_14423_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1531_cast_fp16 = mul(x = var_14422_cast_fp16, y = var_14423_to_fp16)[name = tensor("aw_chunk_1531_cast_fp16")]; tensor var_14426_equation_0 = const()[name = tensor("op_14426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14426_cast_fp16 = einsum(equation = var_14426_equation_0, values = (var_14256_cast_fp16, var_14206_cast_fp16))[name = tensor("op_14426_cast_fp16")]; tensor var_14427_to_fp16 = const()[name = tensor("op_14427_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1533_cast_fp16 = mul(x = var_14426_cast_fp16, y = var_14427_to_fp16)[name = tensor("aw_chunk_1533_cast_fp16")]; tensor var_14430_equation_0 = const()[name = tensor("op_14430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14430_cast_fp16 = einsum(equation = var_14430_equation_0, values = (var_14256_cast_fp16, var_14207_cast_fp16))[name = tensor("op_14430_cast_fp16")]; tensor var_14431_to_fp16 = const()[name = tensor("op_14431_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1535_cast_fp16 = mul(x = var_14430_cast_fp16, y = var_14431_to_fp16)[name = tensor("aw_chunk_1535_cast_fp16")]; tensor var_14434_equation_0 = const()[name = tensor("op_14434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14434_cast_fp16 = einsum(equation = var_14434_equation_0, values = (var_14260_cast_fp16, var_14208_cast_fp16))[name = tensor("op_14434_cast_fp16")]; tensor var_14435_to_fp16 = const()[name = tensor("op_14435_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1537_cast_fp16 = mul(x = var_14434_cast_fp16, y = var_14435_to_fp16)[name = tensor("aw_chunk_1537_cast_fp16")]; tensor var_14438_equation_0 = const()[name = tensor("op_14438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14438_cast_fp16 = einsum(equation = var_14438_equation_0, values = (var_14260_cast_fp16, var_14209_cast_fp16))[name = tensor("op_14438_cast_fp16")]; tensor var_14439_to_fp16 = const()[name = tensor("op_14439_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1539_cast_fp16 = mul(x = var_14438_cast_fp16, y = var_14439_to_fp16)[name = tensor("aw_chunk_1539_cast_fp16")]; tensor var_14442_equation_0 = const()[name = tensor("op_14442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14442_cast_fp16 = einsum(equation = var_14442_equation_0, values = (var_14260_cast_fp16, var_14210_cast_fp16))[name = tensor("op_14442_cast_fp16")]; tensor var_14443_to_fp16 = const()[name = tensor("op_14443_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1541_cast_fp16 = mul(x = var_14442_cast_fp16, y = var_14443_to_fp16)[name = tensor("aw_chunk_1541_cast_fp16")]; tensor var_14446_equation_0 = const()[name = tensor("op_14446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14446_cast_fp16 = einsum(equation = var_14446_equation_0, values = (var_14260_cast_fp16, var_14211_cast_fp16))[name = tensor("op_14446_cast_fp16")]; tensor var_14447_to_fp16 = const()[name = tensor("op_14447_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1543_cast_fp16 = mul(x = var_14446_cast_fp16, y = var_14447_to_fp16)[name = tensor("aw_chunk_1543_cast_fp16")]; tensor var_14450_equation_0 = const()[name = tensor("op_14450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14450_cast_fp16 = einsum(equation = var_14450_equation_0, values = (var_14260_cast_fp16, var_14212_cast_fp16))[name = tensor("op_14450_cast_fp16")]; tensor var_14451_to_fp16 = const()[name = tensor("op_14451_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1545_cast_fp16 = mul(x = var_14450_cast_fp16, y = var_14451_to_fp16)[name = tensor("aw_chunk_1545_cast_fp16")]; tensor var_14454_equation_0 = const()[name = tensor("op_14454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14454_cast_fp16 = einsum(equation = var_14454_equation_0, values = (var_14260_cast_fp16, var_14213_cast_fp16))[name = tensor("op_14454_cast_fp16")]; tensor var_14455_to_fp16 = const()[name = tensor("op_14455_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1547_cast_fp16 = mul(x = var_14454_cast_fp16, y = var_14455_to_fp16)[name = tensor("aw_chunk_1547_cast_fp16")]; tensor var_14458_equation_0 = const()[name = tensor("op_14458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14458_cast_fp16 = einsum(equation = var_14458_equation_0, values = (var_14260_cast_fp16, var_14214_cast_fp16))[name = tensor("op_14458_cast_fp16")]; tensor var_14459_to_fp16 = const()[name = tensor("op_14459_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1549_cast_fp16 = mul(x = var_14458_cast_fp16, y = var_14459_to_fp16)[name = tensor("aw_chunk_1549_cast_fp16")]; tensor var_14462_equation_0 = const()[name = tensor("op_14462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14462_cast_fp16 = einsum(equation = var_14462_equation_0, values = (var_14260_cast_fp16, var_14215_cast_fp16))[name = tensor("op_14462_cast_fp16")]; tensor var_14463_to_fp16 = const()[name = tensor("op_14463_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1551_cast_fp16 = mul(x = var_14462_cast_fp16, y = var_14463_to_fp16)[name = tensor("aw_chunk_1551_cast_fp16")]; tensor var_14466_equation_0 = const()[name = tensor("op_14466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14466_cast_fp16 = einsum(equation = var_14466_equation_0, values = (var_14264_cast_fp16, var_14216_cast_fp16))[name = tensor("op_14466_cast_fp16")]; tensor var_14467_to_fp16 = const()[name = tensor("op_14467_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1553_cast_fp16 = mul(x = var_14466_cast_fp16, y = var_14467_to_fp16)[name = tensor("aw_chunk_1553_cast_fp16")]; tensor var_14470_equation_0 = const()[name = tensor("op_14470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14470_cast_fp16 = einsum(equation = var_14470_equation_0, values = (var_14264_cast_fp16, var_14217_cast_fp16))[name = tensor("op_14470_cast_fp16")]; tensor var_14471_to_fp16 = const()[name = tensor("op_14471_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1555_cast_fp16 = mul(x = var_14470_cast_fp16, y = var_14471_to_fp16)[name = tensor("aw_chunk_1555_cast_fp16")]; tensor var_14474_equation_0 = const()[name = tensor("op_14474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14474_cast_fp16 = einsum(equation = var_14474_equation_0, values = (var_14264_cast_fp16, var_14218_cast_fp16))[name = tensor("op_14474_cast_fp16")]; tensor var_14475_to_fp16 = const()[name = tensor("op_14475_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1557_cast_fp16 = mul(x = var_14474_cast_fp16, y = var_14475_to_fp16)[name = tensor("aw_chunk_1557_cast_fp16")]; tensor var_14478_equation_0 = const()[name = tensor("op_14478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14478_cast_fp16 = einsum(equation = var_14478_equation_0, values = (var_14264_cast_fp16, var_14219_cast_fp16))[name = tensor("op_14478_cast_fp16")]; tensor var_14479_to_fp16 = const()[name = tensor("op_14479_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1559_cast_fp16 = mul(x = var_14478_cast_fp16, y = var_14479_to_fp16)[name = tensor("aw_chunk_1559_cast_fp16")]; tensor var_14482_equation_0 = const()[name = tensor("op_14482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14482_cast_fp16 = einsum(equation = var_14482_equation_0, values = (var_14264_cast_fp16, var_14220_cast_fp16))[name = tensor("op_14482_cast_fp16")]; tensor var_14483_to_fp16 = const()[name = tensor("op_14483_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1561_cast_fp16 = mul(x = var_14482_cast_fp16, y = var_14483_to_fp16)[name = tensor("aw_chunk_1561_cast_fp16")]; tensor var_14486_equation_0 = const()[name = tensor("op_14486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14486_cast_fp16 = einsum(equation = var_14486_equation_0, values = (var_14264_cast_fp16, var_14221_cast_fp16))[name = tensor("op_14486_cast_fp16")]; tensor var_14487_to_fp16 = const()[name = tensor("op_14487_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1563_cast_fp16 = mul(x = var_14486_cast_fp16, y = var_14487_to_fp16)[name = tensor("aw_chunk_1563_cast_fp16")]; tensor var_14490_equation_0 = const()[name = tensor("op_14490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14490_cast_fp16 = einsum(equation = var_14490_equation_0, values = (var_14264_cast_fp16, var_14222_cast_fp16))[name = tensor("op_14490_cast_fp16")]; tensor var_14491_to_fp16 = const()[name = tensor("op_14491_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1565_cast_fp16 = mul(x = var_14490_cast_fp16, y = var_14491_to_fp16)[name = tensor("aw_chunk_1565_cast_fp16")]; tensor var_14494_equation_0 = const()[name = tensor("op_14494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14494_cast_fp16 = einsum(equation = var_14494_equation_0, values = (var_14264_cast_fp16, var_14223_cast_fp16))[name = tensor("op_14494_cast_fp16")]; tensor var_14495_to_fp16 = const()[name = tensor("op_14495_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1567_cast_fp16 = mul(x = var_14494_cast_fp16, y = var_14495_to_fp16)[name = tensor("aw_chunk_1567_cast_fp16")]; tensor var_14498_equation_0 = const()[name = tensor("op_14498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14498_cast_fp16 = einsum(equation = var_14498_equation_0, values = (var_14268_cast_fp16, var_14224_cast_fp16))[name = tensor("op_14498_cast_fp16")]; tensor var_14499_to_fp16 = const()[name = tensor("op_14499_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1569_cast_fp16 = mul(x = var_14498_cast_fp16, y = var_14499_to_fp16)[name = tensor("aw_chunk_1569_cast_fp16")]; tensor var_14502_equation_0 = const()[name = tensor("op_14502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14502_cast_fp16 = einsum(equation = var_14502_equation_0, values = (var_14268_cast_fp16, var_14225_cast_fp16))[name = tensor("op_14502_cast_fp16")]; tensor var_14503_to_fp16 = const()[name = tensor("op_14503_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1571_cast_fp16 = mul(x = var_14502_cast_fp16, y = var_14503_to_fp16)[name = tensor("aw_chunk_1571_cast_fp16")]; tensor var_14506_equation_0 = const()[name = tensor("op_14506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14506_cast_fp16 = einsum(equation = var_14506_equation_0, values = (var_14268_cast_fp16, var_14226_cast_fp16))[name = tensor("op_14506_cast_fp16")]; tensor var_14507_to_fp16 = const()[name = tensor("op_14507_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1573_cast_fp16 = mul(x = var_14506_cast_fp16, y = var_14507_to_fp16)[name = tensor("aw_chunk_1573_cast_fp16")]; tensor var_14510_equation_0 = const()[name = tensor("op_14510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14510_cast_fp16 = einsum(equation = var_14510_equation_0, values = (var_14268_cast_fp16, var_14227_cast_fp16))[name = tensor("op_14510_cast_fp16")]; tensor var_14511_to_fp16 = const()[name = tensor("op_14511_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1575_cast_fp16 = mul(x = var_14510_cast_fp16, y = var_14511_to_fp16)[name = tensor("aw_chunk_1575_cast_fp16")]; tensor var_14514_equation_0 = const()[name = tensor("op_14514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14514_cast_fp16 = einsum(equation = var_14514_equation_0, values = (var_14268_cast_fp16, var_14228_cast_fp16))[name = tensor("op_14514_cast_fp16")]; tensor var_14515_to_fp16 = const()[name = tensor("op_14515_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1577_cast_fp16 = mul(x = var_14514_cast_fp16, y = var_14515_to_fp16)[name = tensor("aw_chunk_1577_cast_fp16")]; tensor var_14518_equation_0 = const()[name = tensor("op_14518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14518_cast_fp16 = einsum(equation = var_14518_equation_0, values = (var_14268_cast_fp16, var_14229_cast_fp16))[name = tensor("op_14518_cast_fp16")]; tensor var_14519_to_fp16 = const()[name = tensor("op_14519_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1579_cast_fp16 = mul(x = var_14518_cast_fp16, y = var_14519_to_fp16)[name = tensor("aw_chunk_1579_cast_fp16")]; tensor var_14522_equation_0 = const()[name = tensor("op_14522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14522_cast_fp16 = einsum(equation = var_14522_equation_0, values = (var_14268_cast_fp16, var_14230_cast_fp16))[name = tensor("op_14522_cast_fp16")]; tensor var_14523_to_fp16 = const()[name = tensor("op_14523_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1581_cast_fp16 = mul(x = var_14522_cast_fp16, y = var_14523_to_fp16)[name = tensor("aw_chunk_1581_cast_fp16")]; tensor var_14526_equation_0 = const()[name = tensor("op_14526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14526_cast_fp16 = einsum(equation = var_14526_equation_0, values = (var_14268_cast_fp16, var_14231_cast_fp16))[name = tensor("op_14526_cast_fp16")]; tensor var_14527_to_fp16 = const()[name = tensor("op_14527_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1583_cast_fp16 = mul(x = var_14526_cast_fp16, y = var_14527_to_fp16)[name = tensor("aw_chunk_1583_cast_fp16")]; tensor var_14530_equation_0 = const()[name = tensor("op_14530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14530_cast_fp16 = einsum(equation = var_14530_equation_0, values = (var_14272_cast_fp16, var_14232_cast_fp16))[name = tensor("op_14530_cast_fp16")]; tensor var_14531_to_fp16 = const()[name = tensor("op_14531_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1585_cast_fp16 = mul(x = var_14530_cast_fp16, y = var_14531_to_fp16)[name = tensor("aw_chunk_1585_cast_fp16")]; tensor var_14534_equation_0 = const()[name = tensor("op_14534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14534_cast_fp16 = einsum(equation = var_14534_equation_0, values = (var_14272_cast_fp16, var_14233_cast_fp16))[name = tensor("op_14534_cast_fp16")]; tensor var_14535_to_fp16 = const()[name = tensor("op_14535_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1587_cast_fp16 = mul(x = var_14534_cast_fp16, y = var_14535_to_fp16)[name = tensor("aw_chunk_1587_cast_fp16")]; tensor var_14538_equation_0 = const()[name = tensor("op_14538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14538_cast_fp16 = einsum(equation = var_14538_equation_0, values = (var_14272_cast_fp16, var_14234_cast_fp16))[name = tensor("op_14538_cast_fp16")]; tensor var_14539_to_fp16 = const()[name = tensor("op_14539_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1589_cast_fp16 = mul(x = var_14538_cast_fp16, y = var_14539_to_fp16)[name = tensor("aw_chunk_1589_cast_fp16")]; tensor var_14542_equation_0 = const()[name = tensor("op_14542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14542_cast_fp16 = einsum(equation = var_14542_equation_0, values = (var_14272_cast_fp16, var_14235_cast_fp16))[name = tensor("op_14542_cast_fp16")]; tensor var_14543_to_fp16 = const()[name = tensor("op_14543_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1591_cast_fp16 = mul(x = var_14542_cast_fp16, y = var_14543_to_fp16)[name = tensor("aw_chunk_1591_cast_fp16")]; tensor var_14546_equation_0 = const()[name = tensor("op_14546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14546_cast_fp16 = einsum(equation = var_14546_equation_0, values = (var_14272_cast_fp16, var_14236_cast_fp16))[name = tensor("op_14546_cast_fp16")]; tensor var_14547_to_fp16 = const()[name = tensor("op_14547_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1593_cast_fp16 = mul(x = var_14546_cast_fp16, y = var_14547_to_fp16)[name = tensor("aw_chunk_1593_cast_fp16")]; tensor var_14550_equation_0 = const()[name = tensor("op_14550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14550_cast_fp16 = einsum(equation = var_14550_equation_0, values = (var_14272_cast_fp16, var_14237_cast_fp16))[name = tensor("op_14550_cast_fp16")]; tensor var_14551_to_fp16 = const()[name = tensor("op_14551_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1595_cast_fp16 = mul(x = var_14550_cast_fp16, y = var_14551_to_fp16)[name = tensor("aw_chunk_1595_cast_fp16")]; tensor var_14554_equation_0 = const()[name = tensor("op_14554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14554_cast_fp16 = einsum(equation = var_14554_equation_0, values = (var_14272_cast_fp16, var_14238_cast_fp16))[name = tensor("op_14554_cast_fp16")]; tensor var_14555_to_fp16 = const()[name = tensor("op_14555_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_1597_cast_fp16 = mul(x = var_14554_cast_fp16, y = var_14555_to_fp16)[name = tensor("aw_chunk_1597_cast_fp16")]; tensor var_14558_equation_0 = const()[name = tensor("op_14558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor var_14558_cast_fp16 = einsum(equation = var_14558_equation_0, values = (var_14272_cast_fp16, var_14239_cast_fp16))[name = tensor("op_14558_cast_fp16")]; tensor var_14559_to_fp16 = const()[name = tensor("op_14559_to_fp16"), val = tensor(0x1.43cp-3)]; tensor aw_chunk_cast_fp16 = mul(x = var_14558_cast_fp16, y = var_14559_to_fp16)[name = tensor("aw_chunk_cast_fp16")]; tensor var_14561_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1473_cast_fp16)[name = tensor("op_14561_cast_fp16")]; tensor var_14562_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1475_cast_fp16)[name = tensor("op_14562_cast_fp16")]; tensor var_14563_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1477_cast_fp16)[name = tensor("op_14563_cast_fp16")]; tensor var_14564_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1479_cast_fp16)[name = tensor("op_14564_cast_fp16")]; tensor var_14565_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1481_cast_fp16)[name = tensor("op_14565_cast_fp16")]; tensor var_14566_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1483_cast_fp16)[name = tensor("op_14566_cast_fp16")]; tensor var_14567_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1485_cast_fp16)[name = tensor("op_14567_cast_fp16")]; tensor var_14568_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1487_cast_fp16)[name = tensor("op_14568_cast_fp16")]; tensor var_14569_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1489_cast_fp16)[name = tensor("op_14569_cast_fp16")]; tensor var_14570_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1491_cast_fp16)[name = tensor("op_14570_cast_fp16")]; tensor var_14571_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1493_cast_fp16)[name = tensor("op_14571_cast_fp16")]; tensor var_14572_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1495_cast_fp16)[name = tensor("op_14572_cast_fp16")]; tensor var_14573_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1497_cast_fp16)[name = tensor("op_14573_cast_fp16")]; tensor var_14574_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1499_cast_fp16)[name = tensor("op_14574_cast_fp16")]; tensor var_14575_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1501_cast_fp16)[name = tensor("op_14575_cast_fp16")]; tensor var_14576_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1503_cast_fp16)[name = tensor("op_14576_cast_fp16")]; tensor var_14577_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1505_cast_fp16)[name = tensor("op_14577_cast_fp16")]; tensor var_14578_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1507_cast_fp16)[name = tensor("op_14578_cast_fp16")]; tensor var_14579_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1509_cast_fp16)[name = tensor("op_14579_cast_fp16")]; tensor var_14580_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1511_cast_fp16)[name = tensor("op_14580_cast_fp16")]; tensor var_14581_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1513_cast_fp16)[name = tensor("op_14581_cast_fp16")]; tensor var_14582_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1515_cast_fp16)[name = tensor("op_14582_cast_fp16")]; tensor var_14583_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1517_cast_fp16)[name = tensor("op_14583_cast_fp16")]; tensor var_14584_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1519_cast_fp16)[name = tensor("op_14584_cast_fp16")]; tensor var_14585_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1521_cast_fp16)[name = tensor("op_14585_cast_fp16")]; tensor var_14586_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1523_cast_fp16)[name = tensor("op_14586_cast_fp16")]; tensor var_14587_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1525_cast_fp16)[name = tensor("op_14587_cast_fp16")]; tensor var_14588_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1527_cast_fp16)[name = tensor("op_14588_cast_fp16")]; tensor var_14589_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1529_cast_fp16)[name = tensor("op_14589_cast_fp16")]; tensor var_14590_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1531_cast_fp16)[name = tensor("op_14590_cast_fp16")]; tensor var_14591_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1533_cast_fp16)[name = tensor("op_14591_cast_fp16")]; tensor var_14592_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1535_cast_fp16)[name = tensor("op_14592_cast_fp16")]; tensor var_14593_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1537_cast_fp16)[name = tensor("op_14593_cast_fp16")]; tensor var_14594_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1539_cast_fp16)[name = tensor("op_14594_cast_fp16")]; tensor var_14595_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1541_cast_fp16)[name = tensor("op_14595_cast_fp16")]; tensor var_14596_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1543_cast_fp16)[name = tensor("op_14596_cast_fp16")]; tensor var_14597_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1545_cast_fp16)[name = tensor("op_14597_cast_fp16")]; tensor var_14598_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1547_cast_fp16)[name = tensor("op_14598_cast_fp16")]; tensor var_14599_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1549_cast_fp16)[name = tensor("op_14599_cast_fp16")]; tensor var_14600_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1551_cast_fp16)[name = tensor("op_14600_cast_fp16")]; tensor var_14601_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1553_cast_fp16)[name = tensor("op_14601_cast_fp16")]; tensor var_14602_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1555_cast_fp16)[name = tensor("op_14602_cast_fp16")]; tensor var_14603_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1557_cast_fp16)[name = tensor("op_14603_cast_fp16")]; tensor var_14604_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1559_cast_fp16)[name = tensor("op_14604_cast_fp16")]; tensor var_14605_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1561_cast_fp16)[name = tensor("op_14605_cast_fp16")]; tensor var_14606_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1563_cast_fp16)[name = tensor("op_14606_cast_fp16")]; tensor var_14607_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1565_cast_fp16)[name = tensor("op_14607_cast_fp16")]; tensor var_14608_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1567_cast_fp16)[name = tensor("op_14608_cast_fp16")]; tensor var_14609_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1569_cast_fp16)[name = tensor("op_14609_cast_fp16")]; tensor var_14610_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1571_cast_fp16)[name = tensor("op_14610_cast_fp16")]; tensor var_14611_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1573_cast_fp16)[name = tensor("op_14611_cast_fp16")]; tensor var_14612_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1575_cast_fp16)[name = tensor("op_14612_cast_fp16")]; tensor var_14613_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1577_cast_fp16)[name = tensor("op_14613_cast_fp16")]; tensor var_14614_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1579_cast_fp16)[name = tensor("op_14614_cast_fp16")]; tensor var_14615_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1581_cast_fp16)[name = tensor("op_14615_cast_fp16")]; tensor var_14616_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1583_cast_fp16)[name = tensor("op_14616_cast_fp16")]; tensor var_14617_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1585_cast_fp16)[name = tensor("op_14617_cast_fp16")]; tensor var_14618_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1587_cast_fp16)[name = tensor("op_14618_cast_fp16")]; tensor var_14619_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1589_cast_fp16)[name = tensor("op_14619_cast_fp16")]; tensor var_14620_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1591_cast_fp16)[name = tensor("op_14620_cast_fp16")]; tensor var_14621_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1593_cast_fp16)[name = tensor("op_14621_cast_fp16")]; tensor var_14622_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1595_cast_fp16)[name = tensor("op_14622_cast_fp16")]; tensor var_14623_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_1597_cast_fp16)[name = tensor("op_14623_cast_fp16")]; tensor var_14624_cast_fp16 = softmax(axis = var_10375, x = aw_chunk_cast_fp16)[name = tensor("op_14624_cast_fp16")]; tensor var_14626_equation_0 = const()[name = tensor("op_14626_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14626_cast_fp16 = einsum(equation = var_14626_equation_0, values = (var_14274_cast_fp16, var_14561_cast_fp16))[name = tensor("op_14626_cast_fp16")]; tensor var_14628_equation_0 = const()[name = tensor("op_14628_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14628_cast_fp16 = einsum(equation = var_14628_equation_0, values = (var_14274_cast_fp16, var_14562_cast_fp16))[name = tensor("op_14628_cast_fp16")]; tensor var_14630_equation_0 = const()[name = tensor("op_14630_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14630_cast_fp16 = einsum(equation = var_14630_equation_0, values = (var_14274_cast_fp16, var_14563_cast_fp16))[name = tensor("op_14630_cast_fp16")]; tensor var_14632_equation_0 = const()[name = tensor("op_14632_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14632_cast_fp16 = einsum(equation = var_14632_equation_0, values = (var_14274_cast_fp16, var_14564_cast_fp16))[name = tensor("op_14632_cast_fp16")]; tensor var_14634_equation_0 = const()[name = tensor("op_14634_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14634_cast_fp16 = einsum(equation = var_14634_equation_0, values = (var_14274_cast_fp16, var_14565_cast_fp16))[name = tensor("op_14634_cast_fp16")]; tensor var_14636_equation_0 = const()[name = tensor("op_14636_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14636_cast_fp16 = einsum(equation = var_14636_equation_0, values = (var_14274_cast_fp16, var_14566_cast_fp16))[name = tensor("op_14636_cast_fp16")]; tensor var_14638_equation_0 = const()[name = tensor("op_14638_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14638_cast_fp16 = einsum(equation = var_14638_equation_0, values = (var_14274_cast_fp16, var_14567_cast_fp16))[name = tensor("op_14638_cast_fp16")]; tensor var_14640_equation_0 = const()[name = tensor("op_14640_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14640_cast_fp16 = einsum(equation = var_14640_equation_0, values = (var_14274_cast_fp16, var_14568_cast_fp16))[name = tensor("op_14640_cast_fp16")]; tensor var_14642_equation_0 = const()[name = tensor("op_14642_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14642_cast_fp16 = einsum(equation = var_14642_equation_0, values = (var_14278_cast_fp16, var_14569_cast_fp16))[name = tensor("op_14642_cast_fp16")]; tensor var_14644_equation_0 = const()[name = tensor("op_14644_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14644_cast_fp16 = einsum(equation = var_14644_equation_0, values = (var_14278_cast_fp16, var_14570_cast_fp16))[name = tensor("op_14644_cast_fp16")]; tensor var_14646_equation_0 = const()[name = tensor("op_14646_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14646_cast_fp16 = einsum(equation = var_14646_equation_0, values = (var_14278_cast_fp16, var_14571_cast_fp16))[name = tensor("op_14646_cast_fp16")]; tensor var_14648_equation_0 = const()[name = tensor("op_14648_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14648_cast_fp16 = einsum(equation = var_14648_equation_0, values = (var_14278_cast_fp16, var_14572_cast_fp16))[name = tensor("op_14648_cast_fp16")]; tensor var_14650_equation_0 = const()[name = tensor("op_14650_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14650_cast_fp16 = einsum(equation = var_14650_equation_0, values = (var_14278_cast_fp16, var_14573_cast_fp16))[name = tensor("op_14650_cast_fp16")]; tensor var_14652_equation_0 = const()[name = tensor("op_14652_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14652_cast_fp16 = einsum(equation = var_14652_equation_0, values = (var_14278_cast_fp16, var_14574_cast_fp16))[name = tensor("op_14652_cast_fp16")]; tensor var_14654_equation_0 = const()[name = tensor("op_14654_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14654_cast_fp16 = einsum(equation = var_14654_equation_0, values = (var_14278_cast_fp16, var_14575_cast_fp16))[name = tensor("op_14654_cast_fp16")]; tensor var_14656_equation_0 = const()[name = tensor("op_14656_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14656_cast_fp16 = einsum(equation = var_14656_equation_0, values = (var_14278_cast_fp16, var_14576_cast_fp16))[name = tensor("op_14656_cast_fp16")]; tensor var_14658_equation_0 = const()[name = tensor("op_14658_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14658_cast_fp16 = einsum(equation = var_14658_equation_0, values = (var_14282_cast_fp16, var_14577_cast_fp16))[name = tensor("op_14658_cast_fp16")]; tensor var_14660_equation_0 = const()[name = tensor("op_14660_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14660_cast_fp16 = einsum(equation = var_14660_equation_0, values = (var_14282_cast_fp16, var_14578_cast_fp16))[name = tensor("op_14660_cast_fp16")]; tensor var_14662_equation_0 = const()[name = tensor("op_14662_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14662_cast_fp16 = einsum(equation = var_14662_equation_0, values = (var_14282_cast_fp16, var_14579_cast_fp16))[name = tensor("op_14662_cast_fp16")]; tensor var_14664_equation_0 = const()[name = tensor("op_14664_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14664_cast_fp16 = einsum(equation = var_14664_equation_0, values = (var_14282_cast_fp16, var_14580_cast_fp16))[name = tensor("op_14664_cast_fp16")]; tensor var_14666_equation_0 = const()[name = tensor("op_14666_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14666_cast_fp16 = einsum(equation = var_14666_equation_0, values = (var_14282_cast_fp16, var_14581_cast_fp16))[name = tensor("op_14666_cast_fp16")]; tensor var_14668_equation_0 = const()[name = tensor("op_14668_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14668_cast_fp16 = einsum(equation = var_14668_equation_0, values = (var_14282_cast_fp16, var_14582_cast_fp16))[name = tensor("op_14668_cast_fp16")]; tensor var_14670_equation_0 = const()[name = tensor("op_14670_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14670_cast_fp16 = einsum(equation = var_14670_equation_0, values = (var_14282_cast_fp16, var_14583_cast_fp16))[name = tensor("op_14670_cast_fp16")]; tensor var_14672_equation_0 = const()[name = tensor("op_14672_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14672_cast_fp16 = einsum(equation = var_14672_equation_0, values = (var_14282_cast_fp16, var_14584_cast_fp16))[name = tensor("op_14672_cast_fp16")]; tensor var_14674_equation_0 = const()[name = tensor("op_14674_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14674_cast_fp16 = einsum(equation = var_14674_equation_0, values = (var_14286_cast_fp16, var_14585_cast_fp16))[name = tensor("op_14674_cast_fp16")]; tensor var_14676_equation_0 = const()[name = tensor("op_14676_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14676_cast_fp16 = einsum(equation = var_14676_equation_0, values = (var_14286_cast_fp16, var_14586_cast_fp16))[name = tensor("op_14676_cast_fp16")]; tensor var_14678_equation_0 = const()[name = tensor("op_14678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14678_cast_fp16 = einsum(equation = var_14678_equation_0, values = (var_14286_cast_fp16, var_14587_cast_fp16))[name = tensor("op_14678_cast_fp16")]; tensor var_14680_equation_0 = const()[name = tensor("op_14680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14680_cast_fp16 = einsum(equation = var_14680_equation_0, values = (var_14286_cast_fp16, var_14588_cast_fp16))[name = tensor("op_14680_cast_fp16")]; tensor var_14682_equation_0 = const()[name = tensor("op_14682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14682_cast_fp16 = einsum(equation = var_14682_equation_0, values = (var_14286_cast_fp16, var_14589_cast_fp16))[name = tensor("op_14682_cast_fp16")]; tensor var_14684_equation_0 = const()[name = tensor("op_14684_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14684_cast_fp16 = einsum(equation = var_14684_equation_0, values = (var_14286_cast_fp16, var_14590_cast_fp16))[name = tensor("op_14684_cast_fp16")]; tensor var_14686_equation_0 = const()[name = tensor("op_14686_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14686_cast_fp16 = einsum(equation = var_14686_equation_0, values = (var_14286_cast_fp16, var_14591_cast_fp16))[name = tensor("op_14686_cast_fp16")]; tensor var_14688_equation_0 = const()[name = tensor("op_14688_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14688_cast_fp16 = einsum(equation = var_14688_equation_0, values = (var_14286_cast_fp16, var_14592_cast_fp16))[name = tensor("op_14688_cast_fp16")]; tensor var_14690_equation_0 = const()[name = tensor("op_14690_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14690_cast_fp16 = einsum(equation = var_14690_equation_0, values = (var_14290_cast_fp16, var_14593_cast_fp16))[name = tensor("op_14690_cast_fp16")]; tensor var_14692_equation_0 = const()[name = tensor("op_14692_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14692_cast_fp16 = einsum(equation = var_14692_equation_0, values = (var_14290_cast_fp16, var_14594_cast_fp16))[name = tensor("op_14692_cast_fp16")]; tensor var_14694_equation_0 = const()[name = tensor("op_14694_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14694_cast_fp16 = einsum(equation = var_14694_equation_0, values = (var_14290_cast_fp16, var_14595_cast_fp16))[name = tensor("op_14694_cast_fp16")]; tensor var_14696_equation_0 = const()[name = tensor("op_14696_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14696_cast_fp16 = einsum(equation = var_14696_equation_0, values = (var_14290_cast_fp16, var_14596_cast_fp16))[name = tensor("op_14696_cast_fp16")]; tensor var_14698_equation_0 = const()[name = tensor("op_14698_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14698_cast_fp16 = einsum(equation = var_14698_equation_0, values = (var_14290_cast_fp16, var_14597_cast_fp16))[name = tensor("op_14698_cast_fp16")]; tensor var_14700_equation_0 = const()[name = tensor("op_14700_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14700_cast_fp16 = einsum(equation = var_14700_equation_0, values = (var_14290_cast_fp16, var_14598_cast_fp16))[name = tensor("op_14700_cast_fp16")]; tensor var_14702_equation_0 = const()[name = tensor("op_14702_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14702_cast_fp16 = einsum(equation = var_14702_equation_0, values = (var_14290_cast_fp16, var_14599_cast_fp16))[name = tensor("op_14702_cast_fp16")]; tensor var_14704_equation_0 = const()[name = tensor("op_14704_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14704_cast_fp16 = einsum(equation = var_14704_equation_0, values = (var_14290_cast_fp16, var_14600_cast_fp16))[name = tensor("op_14704_cast_fp16")]; tensor var_14706_equation_0 = const()[name = tensor("op_14706_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14706_cast_fp16 = einsum(equation = var_14706_equation_0, values = (var_14294_cast_fp16, var_14601_cast_fp16))[name = tensor("op_14706_cast_fp16")]; tensor var_14708_equation_0 = const()[name = tensor("op_14708_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14708_cast_fp16 = einsum(equation = var_14708_equation_0, values = (var_14294_cast_fp16, var_14602_cast_fp16))[name = tensor("op_14708_cast_fp16")]; tensor var_14710_equation_0 = const()[name = tensor("op_14710_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14710_cast_fp16 = einsum(equation = var_14710_equation_0, values = (var_14294_cast_fp16, var_14603_cast_fp16))[name = tensor("op_14710_cast_fp16")]; tensor var_14712_equation_0 = const()[name = tensor("op_14712_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14712_cast_fp16 = einsum(equation = var_14712_equation_0, values = (var_14294_cast_fp16, var_14604_cast_fp16))[name = tensor("op_14712_cast_fp16")]; tensor var_14714_equation_0 = const()[name = tensor("op_14714_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14714_cast_fp16 = einsum(equation = var_14714_equation_0, values = (var_14294_cast_fp16, var_14605_cast_fp16))[name = tensor("op_14714_cast_fp16")]; tensor var_14716_equation_0 = const()[name = tensor("op_14716_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14716_cast_fp16 = einsum(equation = var_14716_equation_0, values = (var_14294_cast_fp16, var_14606_cast_fp16))[name = tensor("op_14716_cast_fp16")]; tensor var_14718_equation_0 = const()[name = tensor("op_14718_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14718_cast_fp16 = einsum(equation = var_14718_equation_0, values = (var_14294_cast_fp16, var_14607_cast_fp16))[name = tensor("op_14718_cast_fp16")]; tensor var_14720_equation_0 = const()[name = tensor("op_14720_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14720_cast_fp16 = einsum(equation = var_14720_equation_0, values = (var_14294_cast_fp16, var_14608_cast_fp16))[name = tensor("op_14720_cast_fp16")]; tensor var_14722_equation_0 = const()[name = tensor("op_14722_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14722_cast_fp16 = einsum(equation = var_14722_equation_0, values = (var_14298_cast_fp16, var_14609_cast_fp16))[name = tensor("op_14722_cast_fp16")]; tensor var_14724_equation_0 = const()[name = tensor("op_14724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14724_cast_fp16 = einsum(equation = var_14724_equation_0, values = (var_14298_cast_fp16, var_14610_cast_fp16))[name = tensor("op_14724_cast_fp16")]; tensor var_14726_equation_0 = const()[name = tensor("op_14726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14726_cast_fp16 = einsum(equation = var_14726_equation_0, values = (var_14298_cast_fp16, var_14611_cast_fp16))[name = tensor("op_14726_cast_fp16")]; tensor var_14728_equation_0 = const()[name = tensor("op_14728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14728_cast_fp16 = einsum(equation = var_14728_equation_0, values = (var_14298_cast_fp16, var_14612_cast_fp16))[name = tensor("op_14728_cast_fp16")]; tensor var_14730_equation_0 = const()[name = tensor("op_14730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14730_cast_fp16 = einsum(equation = var_14730_equation_0, values = (var_14298_cast_fp16, var_14613_cast_fp16))[name = tensor("op_14730_cast_fp16")]; tensor var_14732_equation_0 = const()[name = tensor("op_14732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14732_cast_fp16 = einsum(equation = var_14732_equation_0, values = (var_14298_cast_fp16, var_14614_cast_fp16))[name = tensor("op_14732_cast_fp16")]; tensor var_14734_equation_0 = const()[name = tensor("op_14734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14734_cast_fp16 = einsum(equation = var_14734_equation_0, values = (var_14298_cast_fp16, var_14615_cast_fp16))[name = tensor("op_14734_cast_fp16")]; tensor var_14736_equation_0 = const()[name = tensor("op_14736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14736_cast_fp16 = einsum(equation = var_14736_equation_0, values = (var_14298_cast_fp16, var_14616_cast_fp16))[name = tensor("op_14736_cast_fp16")]; tensor var_14738_equation_0 = const()[name = tensor("op_14738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14738_cast_fp16 = einsum(equation = var_14738_equation_0, values = (var_14302_cast_fp16, var_14617_cast_fp16))[name = tensor("op_14738_cast_fp16")]; tensor var_14740_equation_0 = const()[name = tensor("op_14740_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14740_cast_fp16 = einsum(equation = var_14740_equation_0, values = (var_14302_cast_fp16, var_14618_cast_fp16))[name = tensor("op_14740_cast_fp16")]; tensor var_14742_equation_0 = const()[name = tensor("op_14742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14742_cast_fp16 = einsum(equation = var_14742_equation_0, values = (var_14302_cast_fp16, var_14619_cast_fp16))[name = tensor("op_14742_cast_fp16")]; tensor var_14744_equation_0 = const()[name = tensor("op_14744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14744_cast_fp16 = einsum(equation = var_14744_equation_0, values = (var_14302_cast_fp16, var_14620_cast_fp16))[name = tensor("op_14744_cast_fp16")]; tensor var_14746_equation_0 = const()[name = tensor("op_14746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14746_cast_fp16 = einsum(equation = var_14746_equation_0, values = (var_14302_cast_fp16, var_14621_cast_fp16))[name = tensor("op_14746_cast_fp16")]; tensor var_14748_equation_0 = const()[name = tensor("op_14748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14748_cast_fp16 = einsum(equation = var_14748_equation_0, values = (var_14302_cast_fp16, var_14622_cast_fp16))[name = tensor("op_14748_cast_fp16")]; tensor var_14750_equation_0 = const()[name = tensor("op_14750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14750_cast_fp16 = einsum(equation = var_14750_equation_0, values = (var_14302_cast_fp16, var_14623_cast_fp16))[name = tensor("op_14750_cast_fp16")]; tensor var_14752_equation_0 = const()[name = tensor("op_14752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14752_cast_fp16 = einsum(equation = var_14752_equation_0, values = (var_14302_cast_fp16, var_14624_cast_fp16))[name = tensor("op_14752_cast_fp16")]; tensor var_14754_interleave_0 = const()[name = tensor("op_14754_interleave_0"), val = tensor(false)]; tensor var_14754_cast_fp16 = concat(axis = var_10347, interleave = var_14754_interleave_0, values = (var_14626_cast_fp16, var_14628_cast_fp16, var_14630_cast_fp16, var_14632_cast_fp16, var_14634_cast_fp16, var_14636_cast_fp16, var_14638_cast_fp16, var_14640_cast_fp16))[name = tensor("op_14754_cast_fp16")]; tensor var_14756_interleave_0 = const()[name = tensor("op_14756_interleave_0"), val = tensor(false)]; tensor var_14756_cast_fp16 = concat(axis = var_10347, interleave = var_14756_interleave_0, values = (var_14642_cast_fp16, var_14644_cast_fp16, var_14646_cast_fp16, var_14648_cast_fp16, var_14650_cast_fp16, var_14652_cast_fp16, var_14654_cast_fp16, var_14656_cast_fp16))[name = tensor("op_14756_cast_fp16")]; tensor var_14758_interleave_0 = const()[name = tensor("op_14758_interleave_0"), val = tensor(false)]; tensor var_14758_cast_fp16 = concat(axis = var_10347, interleave = var_14758_interleave_0, values = (var_14658_cast_fp16, var_14660_cast_fp16, var_14662_cast_fp16, var_14664_cast_fp16, var_14666_cast_fp16, var_14668_cast_fp16, var_14670_cast_fp16, var_14672_cast_fp16))[name = tensor("op_14758_cast_fp16")]; tensor var_14760_interleave_0 = const()[name = tensor("op_14760_interleave_0"), val = tensor(false)]; tensor var_14760_cast_fp16 = concat(axis = var_10347, interleave = var_14760_interleave_0, values = (var_14674_cast_fp16, var_14676_cast_fp16, var_14678_cast_fp16, var_14680_cast_fp16, var_14682_cast_fp16, var_14684_cast_fp16, var_14686_cast_fp16, var_14688_cast_fp16))[name = tensor("op_14760_cast_fp16")]; tensor var_14762_interleave_0 = const()[name = tensor("op_14762_interleave_0"), val = tensor(false)]; tensor var_14762_cast_fp16 = concat(axis = var_10347, interleave = var_14762_interleave_0, values = (var_14690_cast_fp16, var_14692_cast_fp16, var_14694_cast_fp16, var_14696_cast_fp16, var_14698_cast_fp16, var_14700_cast_fp16, var_14702_cast_fp16, var_14704_cast_fp16))[name = tensor("op_14762_cast_fp16")]; tensor var_14764_interleave_0 = const()[name = tensor("op_14764_interleave_0"), val = tensor(false)]; tensor var_14764_cast_fp16 = concat(axis = var_10347, interleave = var_14764_interleave_0, values = (var_14706_cast_fp16, var_14708_cast_fp16, var_14710_cast_fp16, var_14712_cast_fp16, var_14714_cast_fp16, var_14716_cast_fp16, var_14718_cast_fp16, var_14720_cast_fp16))[name = tensor("op_14764_cast_fp16")]; tensor var_14766_interleave_0 = const()[name = tensor("op_14766_interleave_0"), val = tensor(false)]; tensor var_14766_cast_fp16 = concat(axis = var_10347, interleave = var_14766_interleave_0, values = (var_14722_cast_fp16, var_14724_cast_fp16, var_14726_cast_fp16, var_14728_cast_fp16, var_14730_cast_fp16, var_14732_cast_fp16, var_14734_cast_fp16, var_14736_cast_fp16))[name = tensor("op_14766_cast_fp16")]; tensor var_14768_interleave_0 = const()[name = tensor("op_14768_interleave_0"), val = tensor(false)]; tensor var_14768_cast_fp16 = concat(axis = var_10347, interleave = var_14768_interleave_0, values = (var_14738_cast_fp16, var_14740_cast_fp16, var_14742_cast_fp16, var_14744_cast_fp16, var_14746_cast_fp16, var_14748_cast_fp16, var_14750_cast_fp16, var_14752_cast_fp16))[name = tensor("op_14768_cast_fp16")]; tensor input_519_interleave_0 = const()[name = tensor("input_519_interleave_0"), val = tensor(false)]; tensor input_519_cast_fp16 = concat(axis = var_10375, interleave = input_519_interleave_0, values = (var_14754_cast_fp16, var_14756_cast_fp16, var_14758_cast_fp16, var_14760_cast_fp16, var_14762_cast_fp16, var_14764_cast_fp16, var_14766_cast_fp16, var_14768_cast_fp16))[name = tensor("input_519_cast_fp16")]; tensor var_14774 = const()[name = tensor("op_14774"), val = tensor([1, 1])]; tensor var_14776 = const()[name = tensor("op_14776"), val = tensor([1, 1])]; tensor var_14778_pad_type_0 = const()[name = tensor("op_14778_pad_type_0"), val = tensor("custom")]; tensor var_14778_pad_0 = const()[name = tensor("op_14778_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1716217024)))]; tensor up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1716421888)))]; tensor var_14778_cast_fp16 = conv(bias = up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_out_0_bias_to_fp16, dilations = var_14776, groups = var_10375, pad = var_14778_pad_0, pad_type = var_14778_pad_type_0, strides = var_14774, weight = up_blocks_3_attentions_2_transformer_blocks_0_attn2_to_out_0_weight_to_fp16, x = input_519_cast_fp16)[name = tensor("op_14778_cast_fp16")]; tensor inputs_cast_fp16 = add(x = var_14778_cast_fp16, y = inputs_93_cast_fp16)[name = tensor("inputs_cast_fp16")]; tensor input_521_axes_0 = const()[name = tensor("input_521_axes_0"), val = tensor([1])]; tensor input_521_gamma_0_to_fp16 = const()[name = tensor("input_521_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1716422592)))]; tensor input_521_beta_0_to_fp16 = const()[name = tensor("input_521_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1716423296)))]; tensor var_14788_to_fp16 = const()[name = tensor("op_14788_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_521_cast_fp16 = layer_norm(axes = input_521_axes_0, beta = input_521_beta_0_to_fp16, epsilon = var_14788_to_fp16, gamma = input_521_gamma_0_to_fp16, x = inputs_cast_fp16)[name = tensor("input_521_cast_fp16")]; tensor var_14804 = const()[name = tensor("op_14804"), val = tensor([1, 1])]; tensor var_14806 = const()[name = tensor("op_14806"), val = tensor([1, 1])]; tensor var_14808_pad_type_0 = const()[name = tensor("op_14808_pad_type_0"), val = tensor("custom")]; tensor var_14808_pad_0 = const()[name = tensor("op_14808_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1716424000)))]; tensor up_blocks_3_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1718062464)))]; tensor var_14808_cast_fp16 = conv(bias = up_blocks_3_attentions_2_transformer_blocks_0_ff_net_0_proj_bias_to_fp16, dilations = var_14806, groups = var_10375, pad = var_14808_pad_0, pad_type = var_14808_pad_type_0, strides = var_14804, weight = up_blocks_3_attentions_2_transformer_blocks_0_ff_net_0_proj_weight_to_fp16, x = input_521_cast_fp16)[name = tensor("op_14808_cast_fp16")]; tensor var_14809_split_sizes_0 = const()[name = tensor("op_14809_split_sizes_0"), val = tensor([1280, 1280])]; tensor var_14809_axis_0 = const()[name = tensor("op_14809_axis_0"), val = tensor(1)]; tensor var_14809_cast_fp16_0, tensor var_14809_cast_fp16_1 = split(axis = var_14809_axis_0, split_sizes = var_14809_split_sizes_0, x = var_14808_cast_fp16)[name = tensor("op_14809_cast_fp16")]; tensor var_14811_mode_0 = const()[name = tensor("op_14811_mode_0"), val = tensor("EXACT")]; tensor var_14811_cast_fp16 = gelu(mode = var_14811_mode_0, x = var_14809_cast_fp16_1)[name = tensor("op_14811_cast_fp16")]; tensor input_523_cast_fp16 = mul(x = var_14809_cast_fp16_0, y = var_14811_cast_fp16)[name = tensor("input_523_cast_fp16")]; tensor var_14815 = const()[name = tensor("op_14815"), val = tensor([1, 1])]; tensor var_14817 = const()[name = tensor("op_14817"), val = tensor([1, 1])]; tensor var_14819_pad_type_0 = const()[name = tensor("op_14819_pad_type_0"), val = tensor("custom")]; tensor var_14819_pad_0 = const()[name = tensor("op_14819_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1718067648)))]; tensor up_blocks_3_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1718886912)))]; tensor var_14819_cast_fp16 = conv(bias = up_blocks_3_attentions_2_transformer_blocks_0_ff_net_2_bias_to_fp16, dilations = var_14817, groups = var_10375, pad = var_14819_pad_0, pad_type = var_14819_pad_type_0, strides = var_14815, weight = up_blocks_3_attentions_2_transformer_blocks_0_ff_net_2_weight_to_fp16, x = input_523_cast_fp16)[name = tensor("op_14819_cast_fp16")]; tensor hidden_states_333_cast_fp16 = add(x = var_14819_cast_fp16, y = inputs_cast_fp16)[name = tensor("hidden_states_333_cast_fp16")]; tensor var_14821 = const()[name = tensor("op_14821"), val = tensor([2, 320, 64, 64])]; tensor input_525_cast_fp16 = reshape(shape = var_14821, x = hidden_states_333_cast_fp16)[name = tensor("input_525_cast_fp16")]; tensor var_14825 = const()[name = tensor("op_14825"), val = tensor([1, 1])]; tensor var_14827 = const()[name = tensor("op_14827"), val = tensor([1, 1])]; tensor hidden_states_pad_type_0 = const()[name = tensor("hidden_states_pad_type_0"), val = tensor("custom")]; tensor hidden_states_pad_0 = const()[name = tensor("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; tensor up_blocks_3_attentions_2_proj_out_weight_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_proj_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1718887616)))]; tensor up_blocks_3_attentions_2_proj_out_bias_to_fp16 = const()[name = tensor("up_blocks_3_attentions_2_proj_out_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1719092480)))]; tensor hidden_states_cast_fp16 = conv(bias = up_blocks_3_attentions_2_proj_out_bias_to_fp16, dilations = var_14827, groups = var_10375, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = var_14825, weight = up_blocks_3_attentions_2_proj_out_weight_to_fp16, x = input_525_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; tensor input_527_cast_fp16 = add(x = hidden_states_cast_fp16, y = hidden_states_323_cast_fp16)[name = tensor("input_527_cast_fp16")]; tensor reshape_240_shape_0 = const()[name = tensor("reshape_240_shape_0"), val = tensor([2, 32, 10, 64, 64])]; tensor reshape_240_cast_fp16 = reshape(shape = reshape_240_shape_0, x = input_527_cast_fp16)[name = tensor("reshape_240_cast_fp16")]; tensor reduce_mean_180_axes_0 = const()[name = tensor("reduce_mean_180_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_180_keep_dims_0 = const()[name = tensor("reduce_mean_180_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_180_cast_fp16 = reduce_mean(axes = reduce_mean_180_axes_0, keep_dims = reduce_mean_180_keep_dims_0, x = reshape_240_cast_fp16)[name = tensor("reduce_mean_180_cast_fp16")]; tensor sub_120_cast_fp16 = sub(x = reshape_240_cast_fp16, y = reduce_mean_180_cast_fp16)[name = tensor("sub_120_cast_fp16")]; tensor square_60_cast_fp16 = square(x = sub_120_cast_fp16)[name = tensor("square_60_cast_fp16")]; tensor reduce_mean_182_axes_0 = const()[name = tensor("reduce_mean_182_axes_0"), val = tensor([2, 3, 4])]; tensor reduce_mean_182_keep_dims_0 = const()[name = tensor("reduce_mean_182_keep_dims_0"), val = tensor(true)]; tensor reduce_mean_182_cast_fp16 = reduce_mean(axes = reduce_mean_182_axes_0, keep_dims = reduce_mean_182_keep_dims_0, x = square_60_cast_fp16)[name = tensor("reduce_mean_182_cast_fp16")]; tensor add_120_y_0_to_fp16 = const()[name = tensor("add_120_y_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_120_cast_fp16 = add(x = reduce_mean_182_cast_fp16, y = add_120_y_0_to_fp16)[name = tensor("add_120_cast_fp16")]; tensor sqrt_60_cast_fp16 = sqrt(x = add_120_cast_fp16)[name = tensor("sqrt_60_cast_fp16")]; tensor real_div_60_cast_fp16 = real_div(x = sub_120_cast_fp16, y = sqrt_60_cast_fp16)[name = tensor("real_div_60_cast_fp16")]; tensor reshape_241_shape_0 = const()[name = tensor("reshape_241_shape_0"), val = tensor([2, 320, 64, 64])]; tensor reshape_241_cast_fp16 = reshape(shape = reshape_241_shape_0, x = real_div_60_cast_fp16)[name = tensor("reshape_241_cast_fp16")]; tensor add_121_gamma_0_to_fp16 = const()[name = tensor("add_121_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1719093184)))]; tensor add_121_beta_0_to_fp16 = const()[name = tensor("add_121_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1719093888)))]; tensor add_121_epsilon_0_to_fp16 = const()[name = tensor("add_121_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor add_121_cast_fp16 = batch_norm(beta = add_121_beta_0_to_fp16, epsilon = add_121_epsilon_0_to_fp16, gamma = add_121_gamma_0_to_fp16, mean = add_1_mean_0_to_fp16, variance = add_1_variance_0_to_fp16, x = reshape_241_cast_fp16)[name = tensor("add_121_cast_fp16")]; tensor input_cast_fp16 = silu(x = add_121_cast_fp16)[name = tensor("input_cast_fp16")]; tensor var_14841 = const()[name = tensor("op_14841"), val = tensor(1)]; tensor var_14844 = const()[name = tensor("op_14844"), val = tensor([1, 1])]; tensor var_14846 = const()[name = tensor("op_14846"), val = tensor([1, 1])]; tensor var_14848_pad_type_0 = const()[name = tensor("op_14848_pad_type_0"), val = tensor("custom")]; tensor var_14848_pad_0 = const()[name = tensor("op_14848_pad_0"), val = tensor([1, 1, 1, 1])]; tensor conv_out_weight_to_fp16 = const()[name = tensor("conv_out_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1719094592)))]; tensor conv_out_bias_to_fp16 = const()[name = tensor("conv_out_bias_to_fp16"), val = tensor([-0x1.b34p-10, -0x1.8b8p-10, -0x1.cfp-13, -0x1.c34p-9])]; tensor var_14848_cast_fp16 = conv(bias = conv_out_bias_to_fp16, dilations = var_14846, groups = var_14841, pad = var_14848_pad_0, pad_type = var_14848_pad_type_0, strides = var_14844, weight = conv_out_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_14848_cast_fp16")]; tensor var_14848_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("op_14848_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; tensor noise_pred = cast(dtype = var_14848_cast_fp16_to_fp32_dtype_0, x = var_14848_cast_fp16)[name = tensor("cast_133")]; } -> (noise_pred); }