1; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV 2 3; TODO: This test currently fails with LLVM_ENABLE_EXPENSIVE_CHECKS enabled 4; XFAIL: expensive_checks 5 6; CHECK-SPIRV-DAG: %[[#]] = OpGroupAsyncCopy %[[#]] %[[#Scope:]] 7; CHECK-SPIRV-DAG: %[[#Scope]] = OpConstant %[[#]] 8 9%opencl.event_t = type opaque 10 11define spir_kernel void @test_fn(<2 x i8> addrspace(1)* %src, <2 x i8> addrspace(1)* %dst, <2 x i8> addrspace(3)* %localBuffer, i32 %copiesPerWorkgroup, i32 %copiesPerWorkItem) { 12entry: 13 %src.addr = alloca <2 x i8> addrspace(1)*, align 4 14 %dst.addr = alloca <2 x i8> addrspace(1)*, align 4 15 %localBuffer.addr = alloca <2 x i8> addrspace(3)*, align 4 16 %copiesPerWorkgroup.addr = alloca i32, align 4 17 %copiesPerWorkItem.addr = alloca i32, align 4 18 %i = alloca i32, align 4 19 %event = alloca %opencl.event_t*, align 4 20 store <2 x i8> addrspace(1)* %src, <2 x i8> addrspace(1)** %src.addr, align 4 21 store <2 x i8> addrspace(1)* %dst, <2 x i8> addrspace(1)** %dst.addr, align 4 22 store <2 x i8> addrspace(3)* %localBuffer, <2 x i8> addrspace(3)** %localBuffer.addr, align 4 23 store i32 %copiesPerWorkgroup, i32* %copiesPerWorkgroup.addr, align 4 24 store i32 %copiesPerWorkItem, i32* %copiesPerWorkItem.addr, align 4 25 store i32 0, i32* %i, align 4 26 br label %for.cond 27 28for.cond: ; preds = %for.inc, %entry 29 %0 = load i32, i32* %i, align 4 30 %1 = load i32, i32* %copiesPerWorkItem.addr, align 4 31 %cmp = icmp slt i32 %0, %1 32 br i1 %cmp, label %for.body, label %for.end 33 34for.body: ; preds = %for.cond 35 %call = call spir_func i32 @_Z12get_local_idj(i32 0) 36 %2 = load i32, i32* %copiesPerWorkItem.addr, align 4 37 %mul = mul i32 %call, %2 38 %3 = load i32, i32* %i, align 4 39 %add = add i32 %mul, %3 40 %4 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4 41 %arrayidx = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %4, i32 %add 42 store <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* %arrayidx, align 2 43 br label %for.inc 44 45for.inc: ; preds = %for.body 46 %5 = load i32, i32* %i, align 4 47 %inc = add nsw i32 %5, 1 48 store i32 %inc, i32* %i, align 4 49 br label %for.cond 50 51for.end: ; preds = %for.cond 52 call spir_func void @_Z7barrierj(i32 1) 53 store i32 0, i32* %i, align 4 54 br label %for.cond1 55 56for.cond1: ; preds = %for.inc12, %for.end 57 %6 = load i32, i32* %i, align 4 58 %7 = load i32, i32* %copiesPerWorkItem.addr, align 4 59 %cmp2 = icmp slt i32 %6, %7 60 br i1 %cmp2, label %for.body3, label %for.end14 61 62for.body3: ; preds = %for.cond1 63 %call4 = call spir_func i32 @_Z13get_global_idj(i32 0) 64 %8 = load i32, i32* %copiesPerWorkItem.addr, align 4 65 %mul5 = mul i32 %call4, %8 66 %9 = load i32, i32* %i, align 4 67 %add6 = add i32 %mul5, %9 68 %10 = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)** %src.addr, align 4 69 %arrayidx7 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %10, i32 %add6 70 %11 = load <2 x i8>, <2 x i8> addrspace(1)* %arrayidx7, align 2 71 %call8 = call spir_func i32 @_Z12get_local_idj(i32 0) 72 %12 = load i32, i32* %copiesPerWorkItem.addr, align 4 73 %mul9 = mul i32 %call8, %12 74 %13 = load i32, i32* %i, align 4 75 %add10 = add i32 %mul9, %13 76 %14 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4 77 %arrayidx11 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %14, i32 %add10 78 store <2 x i8> %11, <2 x i8> addrspace(3)* %arrayidx11, align 2 79 br label %for.inc12 80 81for.inc12: ; preds = %for.body3 82 %15 = load i32, i32* %i, align 4 83 %inc13 = add nsw i32 %15, 1 84 store i32 %inc13, i32* %i, align 4 85 br label %for.cond1 86 87for.end14: ; preds = %for.cond1 88 call spir_func void @_Z7barrierj(i32 1) 89 %16 = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)** %dst.addr, align 4 90 %17 = load i32, i32* %copiesPerWorkgroup.addr, align 4 91 %call15 = call spir_func i32 @_Z12get_group_idj(i32 0) 92 %mul16 = mul i32 %17, %call15 93 %add.ptr = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %16, i32 %mul16 94 %18 = load <2 x i8> addrspace(3)*, <2 x i8> addrspace(3)** %localBuffer.addr, align 4 95 %19 = load i32, i32* %copiesPerWorkgroup.addr, align 4 96 %call17 = call spir_func %opencl.event_t* @_Z21async_work_group_copyPU3AS1Dv2_cPKU3AS3S_j9ocl_event(<2 x i8> addrspace(1)* %add.ptr, <2 x i8> addrspace(3)* %18, i32 %19, %opencl.event_t* null) 97 store %opencl.event_t* %call17, %opencl.event_t** %event, align 4 98 %20 = addrspacecast %opencl.event_t** %event to %opencl.event_t* addrspace(4)* 99 call spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i32 1, %opencl.event_t* addrspace(4)* %20) 100 ret void 101} 102 103declare spir_func i32 @_Z12get_local_idj(i32) 104 105declare spir_func void @_Z7barrierj(i32) 106 107declare spir_func i32 @_Z13get_global_idj(i32) 108 109declare spir_func %opencl.event_t* @_Z21async_work_group_copyPU3AS1Dv2_cPKU3AS3S_j9ocl_event(<2 x i8> addrspace(1)*, <2 x i8> addrspace(3)*, i32, %opencl.event_t*) 110 111declare spir_func i32 @_Z12get_group_idj(i32) 112 113declare spir_func void @_Z17wait_group_eventsiPU3AS49ocl_event(i32, %opencl.event_t* addrspace(4)*) 114