0-hero
/

gpt2-pos-encoding-experiment-100B

Model card Files Files and versions Community

gpt2-pos-encoding-experiment-100B / .triton /dump /d7a12c0ba96f8920b8147157303ee99f /triton_.llir

0-hero

Add files using upload-large-folder tool

0def249 verified 5 months ago

raw

history blame

18.5 kB

	; ModuleID = 'LLVMDialectModule'
	source_filename = "LLVMDialectModule"

	@global_smem = external addrspace(3) global [0 x i8]
	@.str = private unnamed_addr constant [11 x i8] c"__CUDA_FTZ\00", align 1

	define void @triton__0d1d2d3d4d5de6de(ptr addrspace(1) %0, ptr addrspace(1) %1, ptr addrspace(1) %2, ptr addrspace(1) %3, ptr addrspace(1) %4, i32 %5, i32 %6) local_unnamed_addr !dbg !7 {
	%8 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !dbg !10
	%9 = and i32 %8, 31, !dbg !10
	%10 = lshr i32 %8, 5, !dbg !10
	%11 = and i32 %10, 1, !dbg !10
	%urem = shl i32 %8, 2, !dbg !10
	%12 = and i32 %urem, 252, !dbg !10
	%13 = tail call i32 asm "mov.u32 $0, %ctaid.x;", "=r"() #6, !dbg !11
	%14 = shl i32 %13, 8, !dbg !12
	%15 = or i32 %14, %12, !dbg !13
	%16 = sext i32 %15 to i64, !dbg !14
	%17 = getelementptr float, ptr addrspace(1) %0, i64 %16, !dbg !14
	%18 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %17, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !15
	%19 = extractvalue { i32, i32, i32, i32 } %18, 0, !dbg !15
	%20 = extractvalue { i32, i32, i32, i32 } %18, 1, !dbg !15
	%21 = extractvalue { i32, i32, i32, i32 } %18, 2, !dbg !15
	%22 = extractvalue { i32, i32, i32, i32 } %18, 3, !dbg !15
	%23 = bitcast i32 %21 to float, !dbg !15
	%24 = bitcast i32 %22 to float, !dbg !15
	%25 = getelementptr i16, ptr addrspace(1) %1, i64 %16, !dbg !16
	%26 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %25, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !17
	%27 = extractvalue { i32, i32 } %26, 0, !dbg !17
	%28 = extractvalue { i32, i32 } %26, 1, !dbg !17
	%29 = trunc i32 %27 to i16, !dbg !17
	%extelt.offset = lshr i32 %27, 16, !dbg !17
	%30 = trunc i32 %extelt.offset to i16, !dbg !17
	%31 = trunc i32 %28 to i16, !dbg !17
	%extelt.offset1 = lshr i32 %28, 16, !dbg !17
	%32 = trunc i32 %extelt.offset1 to i16, !dbg !17
	%33 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %29) #6, !dbg !18
	%34 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %30) #6, !dbg !18
	%35 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %31) #6, !dbg !18
	%36 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %32) #6, !dbg !18
	%37 = getelementptr i16, ptr addrspace(1) %2, i64 %16, !dbg !19
	%38 = tail call { i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09@$3 ld.global.v2.b32 { $0, $1 }, [ $2 + 0 ];\0A\09@!$5 mov.u32 $0, $4;\0A\09@!$7 mov.u32 $1, $6;", "=r,=r,l,b,r,b,r,b"(ptr addrspace(1) %37, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !20
	%39 = extractvalue { i32, i32 } %38, 0, !dbg !20
	%40 = extractvalue { i32, i32 } %38, 1, !dbg !20
	%41 = trunc i32 %39 to i16, !dbg !20
	%extelt.offset2 = lshr i32 %39, 16, !dbg !20
	%42 = trunc i32 %extelt.offset2 to i16, !dbg !20
	%43 = trunc i32 %40 to i16, !dbg !20
	%extelt.offset3 = lshr i32 %40, 16, !dbg !20
	%44 = trunc i32 %extelt.offset3 to i16, !dbg !20
	%45 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %41) #6, !dbg !21
	%46 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %42) #6, !dbg !21
	%47 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %43) #6, !dbg !21
	%48 = tail call float asm "cvt.f32.bf16 $0, $1;", "=r,h"(i16 %44) #6, !dbg !21
	%49 = zext nneg i32 %12 to i64, !dbg !22
	%50 = getelementptr float, ptr addrspace(1) %3, i64 %49, !dbg !22
	%51 = tail call { i32, i32, i32, i32 } asm sideeffect "mov.u32 $0, 0x0;\0A\09mov.u32 $1, 0x0;\0A\09mov.u32 $2, 0x0;\0A\09mov.u32 $3, 0x0;\0A\09@$5 ld.global.L1::evict_last.v4.b32 { $0, $1, $2, $3 }, [ $4 + 0 ];\0A\09@!$7 mov.u32 $0, $6;\0A\09@!$9 mov.u32 $1, $8;\0A\09@!$11 mov.u32 $2, $10;\0A\09@!$13 mov.u32 $3, $12;", "=r,=r,=r,=r,l,b,r,b,r,b,r,b,r,b"(ptr addrspace(1) %50, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true, i32 0, i1 true) #6, !dbg !23
	%52 = fadd float %35, %23, !dbg !24
	%53 = fadd float %36, %24, !dbg !24
	%54 = insertelement <2 x i32> poison, i32 %19, i64 0, !dbg !15
	%55 = insertelement <2 x i32> %54, i32 %20, i64 1, !dbg !15
	%56 = bitcast <2 x i32> %55 to <2 x float>, !dbg !15
	%57 = insertelement <2 x float> poison, float %33, i64 0, !dbg !24
	%58 = insertelement <2 x float> %57, float %34, i64 1, !dbg !24
	%59 = fadd <2 x float> %58, %56, !dbg !24
	%60 = insertelement <2 x float> poison, float %45, i64 0, !dbg !25
	%61 = insertelement <2 x float> %60, float %46, i64 1, !dbg !25
	%62 = fadd <2 x float> %59, %61, !dbg !25
	%63 = fadd float %52, %47, !dbg !25
	%64 = fadd float %53, %48, !dbg !25
	%65 = extractelement <2 x float> %62, i64 0, !dbg !26
	%66 = extractelement <2 x float> %62, i64 1, !dbg !26
	%67 = fadd float %65, %66, !dbg !26
	%68 = fadd float %67, %63, !dbg !26
	%69 = fadd float %68, %64, !dbg !26
	%70 = bitcast float %69 to i32, !dbg !32
	%71 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %70, i32 16, i32 31), !dbg !32
	%72 = bitcast i32 %71 to float, !dbg !32
	%73 = fadd float %69, %72, !dbg !26
	%74 = bitcast float %73 to i32, !dbg !32
	%75 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %74, i32 8, i32 31), !dbg !32
	%76 = bitcast i32 %75 to float, !dbg !32
	%77 = fadd float %73, %76, !dbg !26
	%78 = bitcast float %77 to i32, !dbg !32
	%79 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %78, i32 4, i32 31), !dbg !32
	%80 = bitcast i32 %79 to float, !dbg !32
	%81 = fadd float %77, %80, !dbg !26
	%82 = bitcast float %81 to i32, !dbg !32
	%83 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %82, i32 2, i32 31), !dbg !32
	%84 = bitcast i32 %83 to float, !dbg !32
	%85 = fadd float %81, %84, !dbg !26
	%86 = bitcast float %85 to i32, !dbg !32
	%87 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %86, i32 1, i32 31), !dbg !32
	%88 = bitcast i32 %87 to float, !dbg !32
	%89 = fadd float %85, %88, !dbg !26
	%90 = icmp eq i32 %9, 0, !dbg !32
	%91 = zext nneg i32 %11 to i64, !dbg !32
	%92 = getelementptr float, ptr addrspace(3) @global_smem, i64 %91, !dbg !32
	tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %92, float %89, i1 %90) #6, !dbg !32
	tail call void @llvm.nvvm.barrier0(), !dbg !32
	%93 = icmp slt i32 %8, 2, !dbg !32
	%94 = sext i32 %8 to i64, !dbg !32
	%95 = getelementptr float, ptr addrspace(3) @global_smem, i64 %94, !dbg !32
	%96 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %95, i1 %93) #6, !dbg !32
	%97 = bitcast float %96 to i32, !dbg !32
	%98 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %97, i32 1, i32 31), !dbg !32
	%99 = bitcast i32 %98 to float, !dbg !32
	%100 = fadd float %96, %99, !dbg !26
	%101 = and i32 %8, 1, !dbg !32
	%102 = icmp eq i32 %101, 0, !dbg !32
	%103 = and i1 %93, %102, !dbg !32
	tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %95, float %100, i1 %103) #6, !dbg !32
	tail call void @llvm.nvvm.barrier0(), !dbg !32
	%104 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !32
	%105 = fadd float %104, 0.000000e+00, !dbg !34
	%106 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %105, float 2.560000e+02) #6, !dbg !38
	%107 = fsub float %65, %106, !dbg !39
	%108 = fsub float %66, %106, !dbg !39
	%109 = fsub float %63, %106, !dbg !39
	%110 = fsub float %64, %106, !dbg !39
	%111 = fmul float %107, %107, !dbg !40
	%112 = fmul float %108, %108, !dbg !40
	%113 = fmul float %109, %109, !dbg !40
	%114 = fmul float %110, %110, !dbg !40
	tail call void @llvm.nvvm.barrier0(), !dbg !41
	%115 = fadd float %111, %112, !dbg !43
	%116 = fadd float %113, %115, !dbg !43
	%117 = fadd float %114, %116, !dbg !43
	%118 = bitcast float %117 to i32, !dbg !41
	%119 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %118, i32 16, i32 31), !dbg !41
	%120 = bitcast i32 %119 to float, !dbg !41
	%121 = fadd float %117, %120, !dbg !43
	%122 = bitcast float %121 to i32, !dbg !41
	%123 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %122, i32 8, i32 31), !dbg !41
	%124 = bitcast i32 %123 to float, !dbg !41
	%125 = fadd float %121, %124, !dbg !43
	%126 = bitcast float %125 to i32, !dbg !41
	%127 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %126, i32 4, i32 31), !dbg !41
	%128 = bitcast i32 %127 to float, !dbg !41
	%129 = fadd float %125, %128, !dbg !43
	%130 = bitcast float %129 to i32, !dbg !41
	%131 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %130, i32 2, i32 31), !dbg !41
	%132 = bitcast i32 %131 to float, !dbg !41
	%133 = fadd float %129, %132, !dbg !43
	%134 = bitcast float %133 to i32, !dbg !41
	%135 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %134, i32 1, i32 31), !dbg !41
	%136 = bitcast i32 %135 to float, !dbg !41
	%137 = fadd float %133, %136, !dbg !43
	tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %92, float %137, i1 %90) #6, !dbg !41
	tail call void @llvm.nvvm.barrier0(), !dbg !41
	%138 = tail call float asm sideeffect "@$2 ld.shared.b32 $0, [ $1 + 0 ];", "=r,r,b"(ptr addrspace(3) %95, i1 %93) #6, !dbg !41
	%139 = bitcast float %138 to i32, !dbg !41
	%140 = tail call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 -1, i32 %139, i32 1, i32 31), !dbg !41
	%141 = bitcast i32 %140 to float, !dbg !41
	%142 = fadd float %138, %141, !dbg !43
	tail call void asm sideeffect "@$2 st.shared.b32 [ $0 + 0 ], $1;", "r,r,b"(ptr addrspace(3) %95, float %142, i1 %103) #6, !dbg !41
	tail call void @llvm.nvvm.barrier0(), !dbg !41
	%143 = load float, ptr addrspace(3) @global_smem, align 4, !dbg !41
	%144 = fadd float %143, 0.000000e+00, !dbg !46
	%145 = tail call float asm "div.full.f32 $0, $1, $2;", "=r,r,r"(float %144, float 2.560000e+02) #6, !dbg !48
	%146 = fadd float %145, 0x3EE4F8B580000000, !dbg !49
	%147 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6, !dbg !50
	%.not.i = icmp eq i32 %147, 0, !dbg !50
	br i1 %.not.i, label %150, label %148, !dbg !50

	148: ; preds = %7
	%149 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %146), !dbg !50
	br label %__nv_rsqrtf.exit, !dbg !50

	150: ; preds = %7
	%151 = tail call float @llvm.nvvm.rsqrt.approx.f(float %146), !dbg !50
	br label %__nv_rsqrtf.exit, !dbg !50

	__nv_rsqrtf.exit: ; preds = %148, %150
	%.0.i = phi float [ %149, %148 ], [ %151, %150 ], !dbg !50
	%152 = extractvalue { i32, i32, i32, i32 } %51, 3, !dbg !23
	%153 = bitcast i32 %152 to float, !dbg !23
	%154 = extractvalue { i32, i32, i32, i32 } %51, 2, !dbg !23
	%155 = bitcast i32 %154 to float, !dbg !23
	%156 = extractvalue { i32, i32, i32, i32 } %51, 1, !dbg !23
	%157 = bitcast i32 %156 to float, !dbg !23
	%158 = extractvalue { i32, i32, i32, i32 } %51, 0, !dbg !23
	%159 = bitcast i32 %158 to float, !dbg !23
	%160 = fmul float %107, %.0.i, !dbg !51
	%161 = fmul float %108, %.0.i, !dbg !51
	%162 = fmul float %109, %.0.i, !dbg !51
	%163 = fmul float %110, %.0.i, !dbg !51
	%164 = fmul float %160, %159, !dbg !52
	%165 = fmul float %161, %157, !dbg !52
	%166 = fmul float %162, %155, !dbg !52
	%167 = fmul float %163, %153, !dbg !52
	%168 = getelementptr i16, ptr addrspace(1) %4, i64 %16, !dbg !53
	%169 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %164) #6, !dbg !54
	%170 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %165) #6, !dbg !54
	%171 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %166) #6, !dbg !54
	%172 = tail call i16 asm "cvt.rn.bf16.f32 $0, $1;", "=h,r"(float %167) #6, !dbg !54
	%173 = insertelement <2 x i16> undef, i16 %169, i64 0, !dbg !54
	%174 = insertelement <2 x i16> %173, i16 %170, i64 1, !dbg !54
	%175 = bitcast <2 x i16> %174 to i32, !dbg !54
	%176 = insertelement <2 x i16> undef, i16 %171, i64 0, !dbg !54
	%177 = insertelement <2 x i16> %176, i16 %172, i64 1, !dbg !54
	%178 = bitcast <2 x i16> %177 to i32, !dbg !54
	tail call void asm sideeffect "@$3 st.global.v2.b32 [ $2 + 0 ], { $0, $1 };", "r,r,l,b"(i32 %175, i32 %178, ptr addrspace(1) %168, i1 true) #6, !dbg !54
	ret void, !dbg !55
	}

	; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
	declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0

	; Function Attrs: convergent nocallback nounwind memory(inaccessiblemem: readwrite)
	declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) #1

	; Function Attrs: convergent nocallback nounwind
	declare void @llvm.nvvm.barrier0() #2

	; Function Attrs: alwaysinline nounwind
	define float @__nv_rsqrtf(float %x) local_unnamed_addr #3 {
	%1 = tail call i32 @__nvvm_reflect(ptr nonnull @.str) #6
	%.not = icmp eq i32 %1, 0
	br i1 %.not, label %4, label %2

	2: ; preds = %0
	%3 = tail call float @llvm.nvvm.rsqrt.approx.ftz.f(float %x)
	br label %6

	4: ; preds = %0
	%5 = tail call float @llvm.nvvm.rsqrt.approx.f(float %x)
	br label %6

	6: ; preds = %4, %2
	%.0 = phi float [ %3, %2 ], [ %5, %4 ]
	ret float %.0
	}

	declare i32 @__nvvm_reflect(ptr) local_unnamed_addr #4

	; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
	declare float @llvm.nvvm.rsqrt.approx.ftz.f(float) #5

	; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
	declare float @llvm.nvvm.rsqrt.approx.f(float) #5

	attributes #0 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
	attributes #1 = { convergent nocallback nounwind memory(inaccessiblemem: readwrite) }
	attributes #2 = { convergent nocallback nounwind }
	attributes #3 = { alwaysinline nounwind "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
	attributes #4 = { "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
	attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
	attributes #6 = { nounwind }

	!llvm.module.flags = !{!0, !1}
	!llvm.dbg.cu = !{!2}
	!nvvm.annotations = !{!4, !5, !5, !4}
	!llvm.ident = !{!6}

	!0 = !{i32 2, !"Debug Info Version", i32 3}
	!1 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
	!2 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "triton", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
	!3 = !DIFile(filename: "cdohrmmhfsykzlva6pepxaa7gf7klw7w5jzorpspyaldhfg3acr2.py", directory: "/tmp/torchinductor_root/do")
	!4 = !{ptr @triton__0d1d2d3d4d5de6de, !"kernel", i32 1}
	!5 = !{ptr @triton__0d1d2d3d4d5de6de, !"maxntidx", i32 64}
	!6 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"}
	!7 = distinct !DISubprogram(name: "triton__0d1d2d3d4d5de6de", linkageName: "triton__0d1d2d3d4d5de6de", scope: !3, file: !3, line: 18, type: !8, scopeLine: 18, spFlags: DISPFlagDefinition \| DISPFlagOptimized, unit: !2)
	!8 = !DISubroutineType(cc: DW_CC_normal, types: !9)
	!9 = !{}
	!10 = !DILocation(line: 26, column: 26, scope: !7)
	!11 = !DILocation(line: 23, column: 28, scope: !7)
	!12 = !DILocation(line: 30, column: 40, scope: !7)
	!13 = !DILocation(line: 30, column: 36, scope: !7)
	!14 = !DILocation(line: 30, column: 30, scope: !7)
	!15 = !DILocation(line: 30, column: 46, scope: !7)
	!16 = !DILocation(line: 31, column: 30, scope: !7)
	!17 = !DILocation(line: 31, column: 46, scope: !7)
	!18 = !DILocation(line: 31, column: 67, scope: !7)
	!19 = !DILocation(line: 32, column: 30, scope: !7)
	!20 = !DILocation(line: 32, column: 46, scope: !7)
	!21 = !DILocation(line: 32, column: 67, scope: !7)
	!22 = !DILocation(line: 33, column: 31, scope: !7)
	!23 = !DILocation(line: 33, column: 36, scope: !7)
	!24 = !DILocation(line: 35, column: 18, scope: !7)
	!25 = !DILocation(line: 37, column: 18, scope: !7)
	!26 = !DILocation(line: 233, column: 15, scope: !27, inlinedAt: !30)
	!27 = distinct !DILexicalBlockFile(scope: !29, file: !28, discriminator: 0)
	!28 = !DIFile(filename: "standard.py", directory: "/usr/local/lib/python3.10/dist-packages/triton/language")
	!29 = distinct !DILexicalBlockFile(scope: !7, file: !28, discriminator: 0)
	!30 = !DILocation(line: 243, column: 36, scope: !27, inlinedAt: !31)
	!31 = !DILocation(line: 42, column: 59, scope: !27)
	!32 = !DILocation(line: 243, column: 36, scope: !29, inlinedAt: !33)
	!33 = !DILocation(line: 42, column: 59, scope: !29)
	!34 = !DILocation(line: 8, column: 15, scope: !35, inlinedAt: !37)
	!35 = distinct !DILexicalBlockFile(scope: !7, file: !36, discriminator: 0)
	!36 = !DIFile(filename: "triton_helpers.py", directory: "/usr/local/lib/python3.10/dist-packages/torch/_inductor")
	!37 = !DILocation(line: 42, column: 45, scope: !35)
	!38 = !DILocation(line: 45, column: 20, scope: !7)
	!39 = !DILocation(line: 46, column: 19, scope: !7)
	!40 = !DILocation(line: 47, column: 20, scope: !7)
	!41 = !DILocation(line: 243, column: 36, scope: !29, inlinedAt: !42)
	!42 = !DILocation(line: 50, column: 59, scope: !29)
	!43 = !DILocation(line: 233, column: 15, scope: !27, inlinedAt: !44)
	!44 = !DILocation(line: 243, column: 36, scope: !27, inlinedAt: !45)
	!45 = !DILocation(line: 50, column: 59, scope: !27)
	!46 = !DILocation(line: 8, column: 15, scope: !35, inlinedAt: !47)
	!47 = !DILocation(line: 50, column: 45, scope: !35)
	!48 = !DILocation(line: 53, column: 20, scope: !7)
	!49 = !DILocation(line: 55, column: 20, scope: !7)
	!50 = !DILocation(line: 56, column: 26, scope: !7)
	!51 = !DILocation(line: 57, column: 20, scope: !7)
	!52 = !DILocation(line: 58, column: 20, scope: !7)
	!53 = !DILocation(line: 60, column: 25, scope: !7)
	!54 = !DILocation(line: 60, column: 48, scope: !7)
	!55 = !DILocation(line: 60, column: 4, scope: !7)