1; RUN: opt -S -passes='early-cse<memssa>' %s -o %t 2; RUN: FileCheck --check-prefixes=CSE,CHECK %s < %t 3; Finish compiling to verify that dxil-op-lower removes the globals entirely. 4; RUN: opt -mtriple=dxil-pc-shadermodel6.0-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s 5; RUN: opt -mtriple=dxil-pc-shadermodel6.6-compute -S -dxil-op-lower %t -o - | FileCheck --check-prefixes=DXOP,CHECK %s 6; RUN: llc -mtriple=dxil-pc-shadermodel6.0-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s 7; RUN: llc -mtriple=dxil-pc-shadermodel6.6-compute --filetype=asm -o - %t | FileCheck --check-prefixes=DXOP,CHECK %s 8 9; Ensure that EarlyCSE is able to eliminate unneeded loads of resource globals across typedBufferLoad. 10; Also that DXILOpLowering eliminates the globals entirely. 11 12%"class.hlsl::RWBuffer" = type { target("dx.TypedBuffer", <4 x float>, 1, 0, 0) } 13 14; DXOP-NOT: @In = global 15; DXOP-NOT: @Out = global 16@In = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 17@Out = global %"class.hlsl::RWBuffer" zeroinitializer, align 4 18 19; CHECK-LABEL define void @main() 20define void @main() local_unnamed_addr #0 { 21entry: 22 ; DXOP: %In_h.i1 = call %dx.types.Handle @dx.op.createHandle 23 ; DXOP: %Out_h.i2 = call %dx.types.Handle @dx.op.createHandle 24 %In_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false) 25 store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %In_h.i, ptr @In, align 4 26 %Out_h.i = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v4f32_1_0_0t(i32 4, i32 1, i32 1, i32 0, i1 false) 27 store target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, ptr @Out, align 4 28 ; CSE: call i32 @llvm.dx.flattened.thread.id.in.group() 29 %0 = call i32 @llvm.dx.flattened.thread.id.in.group() 30 ; CHECK-NOT: load {{.*}} ptr @In 31 %1 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 32 ; CSE: call noundef { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t 33 %load = call noundef {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %1, i32 %0) 34 %2 = extractvalue {<4 x float>, i1} %load, 0 35 ; CHECK-NOT: load {{.*}} ptr @In 36 %3 = load target("dx.TypedBuffer", <4 x float>, 1, 0, 0), ptr @In, align 4 37 %load2 = call noundef {<4 x float>, i1} @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %3, i32 %0) 38 %4 = extractvalue {<4 x float>, i1} %load2, 0 39 %add.i = fadd <4 x float> %2, %4 40 call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %Out_h.i, i32 %0, <4 x float> %add.i) 41 ; CHECK: ret void 42 ret void 43} 44 45; CSE-DAG: declare { <4 x float>, i1 } @llvm.dx.resource.load.typedbuffer.v4f32.tdx.TypedBuffer_v4f32_1_0_0t(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32) [[ROAttr:#[0-9]+]] 46; CSE-DAG: declare void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v4f32_1_0_0t.v4f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <4 x float>) [[WOAttr:#[0-9]+]] 47 48attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } 49 50; Just need to split up the DAG searches. 51; CSE: attributes #0 52 53; CSE-DAG: attributes [[ROAttr]] = { {{.*}} memory(read) } 54; CSE-DAG: attributes [[WOAttr]] = { {{.*}} memory(write) } 55