1; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV 2 3; CHECK-SPIRV: OpEntryPoint Kernel %[[#BlockKer1:]] "__device_side_enqueue_block_invoke_kernel" 4; CHECK-SPIRV: OpEntryPoint Kernel %[[#BlockKer2:]] "__device_side_enqueue_block_invoke_2_kernel" 5; CHECK-SPIRV: OpEntryPoint Kernel %[[#BlockKer3:]] "__device_side_enqueue_block_invoke_3_kernel" 6; CHECK-SPIRV: OpEntryPoint Kernel %[[#BlockKer4:]] "__device_side_enqueue_block_invoke_4_kernel" 7; CHECK-SPIRV: OpEntryPoint Kernel %[[#BlockKer5:]] "__device_side_enqueue_block_invoke_5_kernel" 8; CHECK-SPIRV: OpName %[[#BlockGlb1:]] "__block_literal_global" 9; CHECK-SPIRV: OpName %[[#BlockGlb2:]] "__block_literal_global.1" 10 11; CHECK-SPIRV: %[[#Int32Ty:]] = OpTypeInt 32 12; CHECK-SPIRV: %[[#Int8Ty:]] = OpTypeInt 8 13; CHECK-SPIRV: %[[#VoidTy:]] = OpTypeVoid 14; CHECK-SPIRV: %[[#Int8PtrGenTy:]] = OpTypePointer Generic %[[#Int8Ty]] 15; CHECK-SPIRV: %[[#EventTy:]] = OpTypeDeviceEvent 16; CHECK-SPIRV: %[[#EventPtrTy:]] = OpTypePointer Generic %[[#EventTy]] 17; CHECK-SPIRV: %[[#Int32LocPtrTy:]] = OpTypePointer Function %[[#Int32Ty]] 18; CHECK-SPIRV: %[[#BlockStructTy:]] = OpTypeStruct 19; CHECK-SPIRV: %[[#BlockStructLocPtrTy:]] = OpTypePointer Function %[[#BlockStructTy]] 20; CHECK-SPIRV: %[[#BlockTy1:]] = OpTypeFunction %[[#VoidTy]] %[[#Int8PtrGenTy]] 21; CHECK-SPIRV: %[[#BlockTy2:]] = OpTypeFunction %[[#VoidTy]] %[[#Int8PtrGenTy]] 22; CHECK-SPIRV: %[[#BlockTy3:]] = OpTypeFunction %[[#VoidTy]] %[[#Int8PtrGenTy]] 23 24; CHECK-SPIRV: %[[#ConstInt0:]] = OpConstant %[[#Int32Ty]] 0 25; CHECK-SPIRV: %[[#EventNull:]] = OpConstantNull %[[#EventPtrTy]] 26; CHECK-SPIRV: %[[#ConstInt21:]] = OpConstant %[[#Int32Ty]] 21 27; CHECK-SPIRV: %[[#ConstInt8:]] = OpConstant %[[#Int32Ty]] 8 28; CHECK-SPIRV: %[[#ConstInt24:]] = OpConstant %[[#Int32Ty]] 24 29; CHECK-SPIRV: %[[#ConstInt12:]] = OpConstant %[[#Int32Ty]] 12 30; CHECK-SPIRV: %[[#ConstInt2:]] = OpConstant %[[#Int32Ty]] 2 31 32;; typedef struct {int a;} ndrange_t; 33;; #define NULL ((void*)0) 34 35;; kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { 36;; queue_t default_queue; 37;; unsigned flags = 0; 38;; ndrange_t ndrange; 39;; clk_event_t clk_event; 40;; clk_event_t event_wait_list; 41;; clk_event_t event_wait_list2[] = {clk_event}; 42 43;; Emits block literal on stack and block kernel. 44 45; CHECK-SPIRV: %[[#BlockLitPtr1:]] = OpBitcast %[[#BlockStructLocPtrTy]] 46; CHECK-SPIRV-NEXT: %[[#BlockLit1:]] = OpPtrCastToGeneric %[[#Int8PtrGenTy]] %[[#BlockLitPtr1]] 47; CHECK-SPIRV-NEXT: %[[#]] = OpEnqueueKernel %[[#Int32Ty]] %[[#]] %[[#]] %[[#]] %[[#ConstInt0]] %[[#EventNull]] %[[#EventNull]] %[[#BlockKer1]] %[[#BlockLit1]] %[[#ConstInt21]] %[[#ConstInt8]] 48 49;; enqueue_kernel(default_queue, flags, ndrange, 50;; ^(void) { 51;; a[i] = c0; 52;; }); 53 54;; Emits block literal on stack and block kernel. 55 56; CHECK-SPIRV: %[[#Event1:]] = OpPtrCastToGeneric %[[#EventPtrTy]] 57; CHECK-SPIRV: %[[#Event2:]] = OpPtrCastToGeneric %[[#EventPtrTy]] 58; CHECK-SPIRV: %[[#BlockLitPtr2:]] = OpBitcast %[[#BlockStructLocPtrTy]] 59; CHECK-SPIRV-NEXT: %[[#BlockLit2:]] = OpPtrCastToGeneric %[[#Int8PtrGenTy]] %[[#BlockLitPtr2]] 60; CHECK-SPIRV-NEXT: %[[#]] = OpEnqueueKernel %[[#Int32Ty]] %[[#]] %[[#]] %[[#]] %[[#ConstInt2]] %[[#Event1]] %[[#Event2]] %[[#BlockKer2]] %[[#BlockLit2]] %[[#ConstInt24]] %[[#ConstInt8]] 61 62;; enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event, 63;; ^(void) { 64;; a[i] = b[i]; 65;; }); 66 67;; char c; 68;; Emits global block literal and block kernel. 69 70; CHECK-SPIRV: %[[#Event1:]] = OpPtrCastToGeneric %[[#EventPtrTy]] 71; CHECK-SPIRV: %[[#Event2:]] = OpPtrCastToGeneric %[[#EventPtrTy]] 72; CHECK-SPIRV: %[[#BlockLit3Tmp:]] = OpBitcast %[[#]] %[[#BlockGlb1]] 73; CHECK-SPIRV: %[[#BlockLit3:]] = OpPtrCastToGeneric %[[#Int8PtrGenTy]] %[[#BlockLit3Tmp]] 74; CHECK-SPIRV: %[[#LocalBuf31:]] = OpPtrAccessChain %[[#Int32LocPtrTy]] 75; CHECK-SPIRV: %[[#]] = OpEnqueueKernel %[[#Int32Ty]] %[[#]] %[[#]] %[[#]] %[[#ConstInt2]] %[[#Event1]] %[[#Event2]] %[[#BlockKer3]] %[[#BlockLit3]] %[[#ConstInt12]] %[[#ConstInt8]] %[[#LocalBuf31]] 76 77;; enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, 78;; ^(local void *p) { 79;; return; 80;; }, 81;; c); 82 83;; Emits global block literal and block kernel. 84 85; CHECK-SPIRV: %[[#BlockLit4Tmp:]] = OpBitcast %[[#]] %[[#BlockGlb2]] 86; CHECK-SPIRV: %[[#BlockLit4:]] = OpPtrCastToGeneric %[[#Int8PtrGenTy]] %[[#BlockLit4Tmp]] 87; CHECK-SPIRV: %[[#LocalBuf41:]] = OpPtrAccessChain %[[#Int32LocPtrTy]] 88; CHECK-SPIRV-NEXT: %[[#LocalBuf42:]] = OpPtrAccessChain %[[#Int32LocPtrTy]] 89; CHECK-SPIRV-NEXT: %[[#LocalBuf43:]] = OpPtrAccessChain %[[#Int32LocPtrTy]] 90; CHECK-SPIRV-NEXT: %[[#]] = OpEnqueueKernel %[[#Int32Ty]] %[[#]] %[[#]] %[[#]] %[[#ConstInt0]] %[[#EventNull]] %[[#EventNull]] %[[#BlockKer4]] %[[#BlockLit4]] %[[#ConstInt12]] %[[#ConstInt8]] %[[#LocalBuf41]] %[[#LocalBuf42]] %[[#LocalBuf43]] 91 92;; enqueue_kernel(default_queue, flags, ndrange, 93;; ^(local void *p1, local void *p2, local void *p3) { 94;; return; 95;; }, 96;; 1, 2, 4); 97 98;; Emits block literal on stack and block kernel. 99 100; CHECK-SPIRV: %[[#Event1:]] = OpPtrCastToGeneric %[[#EventPtrTy]] 101; CHECK-SPIRV: %[[#BlockLit5Tmp:]] = OpBitcast %[[#BlockStructLocPtrTy]] 102; CHECK-SPIRV-NEXT: %[[#BlockLit5:]] = OpPtrCastToGeneric %[[#Int8PtrGenTy]] %[[#BlockLit5Tmp]] 103; CHECK-SPIRV-NEXT: %[[#]] = OpEnqueueKernel %[[#Int32Ty]] %[[#]] %[[#]] %[[#]] %[[#ConstInt0]] %[[#EventNull]] %[[#Event1]] %[[#BlockKer5]] %[[#BlockLit5]] %[[#ConstInt24]] %[[#ConstInt8]] 104 105;; enqueue_kernel(default_queue, flags, ndrange, 0, NULL, &clk_event, 106;; ^(void) { 107;; a[i] = b[i]; 108;; }); 109;; } 110 111; CHECK-SPIRV-DAG: %[[#BlockKer1]] = OpFunction %[[#VoidTy]] None %[[#BlockTy1]] 112; CHECK-SPIRV-DAG: %[[#BlockKer2]] = OpFunction %[[#VoidTy]] None %[[#BlockTy1]] 113; CHECK-SPIRV-DAG: %[[#BlockKer3]] = OpFunction %[[#VoidTy]] None %[[#BlockTy3]] 114; CHECK-SPIRV-DAG: %[[#BlockKer4]] = OpFunction %[[#VoidTy]] None %[[#BlockTy2]] 115; CHECK-SPIRV-DAG: %[[#BlockKer5]] = OpFunction %[[#VoidTy]] None %[[#BlockTy1]] 116 117%opencl.queue_t = type opaque 118%struct.ndrange_t = type { i32 } 119%opencl.clk_event_t = type opaque 120%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } 121 122@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4 123@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4 124 125define dso_local spir_kernel void @device_side_enqueue(i32 addrspace(1)* noundef %a, i32 addrspace(1)* noundef %b, i32 noundef %i, i8 noundef signext %c0) { 126entry: 127 %a.addr = alloca i32 addrspace(1)*, align 4 128 %b.addr = alloca i32 addrspace(1)*, align 4 129 %i.addr = alloca i32, align 4 130 %c0.addr = alloca i8, align 1 131 %default_queue = alloca %opencl.queue_t*, align 4 132 %flags = alloca i32, align 4 133 %ndrange = alloca %struct.ndrange_t, align 4 134 %clk_event = alloca %opencl.clk_event_t*, align 4 135 %event_wait_list = alloca %opencl.clk_event_t*, align 4 136 %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4 137 %tmp = alloca %struct.ndrange_t, align 4 138 %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4 139 %tmp3 = alloca %struct.ndrange_t, align 4 140 %block4 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 141 %c = alloca i8, align 1 142 %tmp11 = alloca %struct.ndrange_t, align 4 143 %block_sizes = alloca [1 x i32], align 4 144 %tmp12 = alloca %struct.ndrange_t, align 4 145 %block_sizes13 = alloca [3 x i32], align 4 146 %tmp14 = alloca %struct.ndrange_t, align 4 147 %block15 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 148 store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 149 store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4 150 store i32 %i, i32* %i.addr, align 4 151 store i8 %c0, i8* %c0.addr, align 1 152 store i32 0, i32* %flags, align 4 153 %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 154 %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4 155 store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4 156 %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 157 %2 = load i32, i32* %flags, align 4 158 %3 = bitcast %struct.ndrange_t* %tmp to i8* 159 %4 = bitcast %struct.ndrange_t* %ndrange to i8* 160 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false) 161 %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 162 store i32 21, i32* %block.size, align 4 163 %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 164 store i32 4, i32* %block.align, align 4 165 %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 166 store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4 167 %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 168 %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 169 store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4 170 %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 171 %6 = load i32, i32* %i.addr, align 4 172 store i32 %6, i32* %block.captured1, align 4 173 %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5 174 %7 = load i8, i8* %c0.addr, align 1 175 store i8 %7, i8* %block.captured2, align 4 176 %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic* 177 %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)* 178 %10 = call spir_func i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval(%struct.ndrange_t) %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) 179 %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 180 %12 = load i32, i32* %flags, align 4 181 %13 = bitcast %struct.ndrange_t* %tmp3 to i8* 182 %14 = bitcast %struct.ndrange_t* %ndrange to i8* 183 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false) 184 %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* 185 %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* 186 %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block4, i32 0, i32 0 187 store i32 24, i32* %block.size5, align 4 188 %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block4, i32 0, i32 1 189 store i32 4, i32* %block.align6, align 4 190 %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block4, i32 0, i32 2 191 store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4 192 %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block4, i32 0, i32 3 193 %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 194 store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4 195 %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block4, i32 0, i32 4 196 %18 = load i32, i32* %i.addr, align 4 197 store i32 %18, i32* %block.captured9, align 4 198 %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block4, i32 0, i32 5 199 %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4 200 store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4 201 %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block4 to %struct.__opencl_block_literal_generic* 202 %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)* 203 %22 = call spir_func i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp3, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21) 204 %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 205 %24 = load i32, i32* %flags, align 4 206 %25 = bitcast %struct.ndrange_t* %tmp11 to i8* 207 %26 = bitcast %struct.ndrange_t* %ndrange to i8* 208 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false) 209 %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 210 %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)* 211 %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* 212 %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0 213 %30 = load i8, i8* %c, align 1 214 %31 = zext i8 %30 to i32 215 store i32 %31, i32* %29, align 4 216 %32 = call spir_func i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29) 217 %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 218 %34 = load i32, i32* %flags, align 4 219 %35 = bitcast %struct.ndrange_t* %tmp12 to i8* 220 %36 = bitcast %struct.ndrange_t* %ndrange to i8* 221 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false) 222 %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0 223 store i32 1, i32* %37, align 4 224 %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1 225 store i32 2, i32* %38, align 4 226 %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2 227 store i32 4, i32* %39, align 4 228 %40 = call spir_func i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37) 229 %41 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 230 %42 = load i32, i32* %flags, align 4 231 %43 = bitcast %struct.ndrange_t* %tmp14 to i8* 232 %44 = bitcast %struct.ndrange_t* %ndrange to i8* 233 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %43, i8* align 4 %44, i32 4, i1 false) 234 %45 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* 235 %block.size16 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block15, i32 0, i32 0 236 store i32 24, i32* %block.size16, align 4 237 %block.align17 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block15, i32 0, i32 1 238 store i32 4, i32* %block.align17, align 4 239 %block.invoke18 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block15, i32 0, i32 2 240 store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke18, align 4 241 %block.captured19 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block15, i32 0, i32 3 242 %46 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 243 store i32 addrspace(1)* %46, i32 addrspace(1)** %block.captured19, align 4 244 %block.captured20 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block15, i32 0, i32 4 245 %47 = load i32, i32* %i.addr, align 4 246 store i32 %47, i32* %block.captured20, align 4 247 %block.captured21 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block15, i32 0, i32 5 248 %48 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4 249 store i32 addrspace(1)* %48, i32 addrspace(1)** %block.captured21, align 4 250 %49 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block15 to %struct.__opencl_block_literal_generic* 251 %50 = addrspacecast %struct.__opencl_block_literal_generic* %49 to i8 addrspace(4)* 252 %51 = call spir_func i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %41, i32 %42, %struct.ndrange_t* %tmp14, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* %45, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %50) 253 ret void 254} 255 256declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg) 257 258define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* noundef %.block_descriptor) { 259entry: 260 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 261 %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 262 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 263 %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* 264 store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 265 %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5 266 %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4 267 %conv = sext i8 %0 to i32 268 %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 269 %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4 270 %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 271 %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4 272 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2 273 store i32 %conv, i32 addrspace(1)* %arrayidx, align 4 274 ret void 275} 276 277define spir_kernel void @__device_side_enqueue_block_invoke_kernel(i8 addrspace(4)* %0) { 278entry: 279 call spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %0) 280 ret void 281} 282 283declare spir_func i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i8 addrspace(4)*, i8 addrspace(4)*) 284 285define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* noundef %.block_descriptor) { 286entry: 287 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 288 %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 289 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 290 %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* 291 store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 292 %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5 293 %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4 294 %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 295 %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4 296 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1 297 %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 298 %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 299 %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4 300 %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 301 %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4 302 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4 303 store i32 %2, i32 addrspace(1)* %arrayidx4, align 4 304 ret void 305} 306 307define spir_kernel void @__device_side_enqueue_block_invoke_2_kernel(i8 addrspace(4)* %0) { 308entry: 309 call spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %0) 310 ret void 311} 312 313declare spir_func i32 @__enqueue_kernel_basic_events(%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t* addrspace(4)*, %opencl.clk_event_t* addrspace(4)*, i8 addrspace(4)*, i8 addrspace(4)*) 314 315define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspace(4)* noundef %.block_descriptor, i8 addrspace(3)* noundef %p) { 316entry: 317 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 318 %p.addr = alloca i8 addrspace(3)*, align 4 319 %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 320 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 321 %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* 322 store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4 323 store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 324 ret void 325} 326 327define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)* %0, i8 addrspace(3)* %1) { 328entry: 329 call spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspace(4)* %0, i8 addrspace(3)* %1) 330 ret void 331} 332 333declare spir_func i32 @__enqueue_kernel_events_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*, i32, %opencl.clk_event_t* addrspace(4)*, %opencl.clk_event_t* addrspace(4)*, i8 addrspace(4)*, i8 addrspace(4)*, i32, i32*) 334 335define internal spir_func void @__device_side_enqueue_block_invoke_4(i8 addrspace(4)* noundef %.block_descriptor, i8 addrspace(3)* noundef %p1, i8 addrspace(3)* noundef %p2, i8 addrspace(3)* noundef %p3) { 336entry: 337 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 338 %p1.addr = alloca i8 addrspace(3)*, align 4 339 %p2.addr = alloca i8 addrspace(3)*, align 4 340 %p3.addr = alloca i8 addrspace(3)*, align 4 341 %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 342 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 343 %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* 344 store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4 345 store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4 346 store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4 347 store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 348 ret void 349} 350 351define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)* %0, i8 addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3) { 352entry: 353 call spir_func void @__device_side_enqueue_block_invoke_4(i8 addrspace(4)* %0, i8 addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3) 354 ret void 355} 356 357declare spir_func i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*, i8 addrspace(4)*, i8 addrspace(4)*, i32, i32*) 358 359define internal spir_func void @__device_side_enqueue_block_invoke_5(i8 addrspace(4)* noundef %.block_descriptor) { 360entry: 361 %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 362 %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 363 store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 364 %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* 365 store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 366 %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5 367 %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4 368 %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 369 %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4 370 %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1 371 %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 372 %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 373 %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4 374 %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 375 %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4 376 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4 377 store i32 %2, i32 addrspace(1)* %arrayidx4, align 4 378 ret void 379} 380 381define spir_kernel void @__device_side_enqueue_block_invoke_5_kernel(i8 addrspace(4)* %0) { 382entry: 383 call spir_func void @__device_side_enqueue_block_invoke_5(i8 addrspace(4)* %0) 384 ret void 385} 386