1// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR 2// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR 3// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES 4// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR 5// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR 6// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES 7// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86 8// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86 9// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefix=CHECK-LIFETIMES 10 11#pragma OPENCL EXTENSION cl_khr_subgroups : enable 12 13typedef void (^bl_t)(local void *); 14typedef struct {int a;} ndrange_t; 15 16// For a block global variable, first emit the block literal as a global variable, then emit the block variable itself. 17// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INV_G:@[^ ]+]] to ptr addrspace(4)) } 18// COMMON: @block_G ={{.*}} addrspace(1) constant ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4)) 19 20// For anonymous blocks without captures, emit block literals as global variable. 21// COMMON: [[BLG0:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 22// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 23// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 24// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 25// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 26// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 27// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 28// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 29// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INVG8:@[^ ]+]] to ptr addrspace(4)) } 30// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INVG9:@[^ ]+]] to ptr addrspace(4)) } 31// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 32// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) } 33 34// Emits block literal [[BL_GLOBAL]], invoke function [[INV_G]] and global block variable @block_G 35// COMMON: define internal {{(spir_func )?}}void [[INV_G]](ptr addrspace(4) %{{.*}}, ptr addrspace(3) %{{.*}}) 36const bl_t block_G = (bl_t) ^ (local void *a) {}; 37 38void callee(int id, __global int *out) { 39 out[id] = id; 40} 41 42// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i) 43kernel void device_side_enqueue(global int *a, global int *b, int i) { 44 // SPIR: %default_queue = alloca target("spirv.Queue") 45 // X86: %default_queue = alloca ptr 46 queue_t default_queue; 47 // COMMON: %flags = alloca i32 48 unsigned flags = 0; 49 // COMMON: %ndrange = alloca %struct.ndrange_t 50 ndrange_t ndrange; 51 // SPIR: %clk_event = alloca target("spirv.DeviceEvent") 52 // X86: %clk_event = alloca ptr 53 clk_event_t clk_event; 54 // SPIR: %event_wait_list = alloca target("spirv.DeviceEvent") 55 // X86: %event_wait_list = alloca ptr 56 clk_event_t event_wait_list; 57 // SPIR: %event_wait_list2 = alloca [1 x target("spirv.DeviceEvent")] 58 // X86: %event_wait_list2 = alloca [1 x ptr] 59 clk_event_t event_wait_list2[] = {clk_event}; 60 61 // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4 62 63 // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32] 64 // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] 65 // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] 66 // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32] 67 // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] 68 // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] 69 // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32] 70 // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] 71 // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] 72 // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32] 73 // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] 74 // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] 75 // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32] 76 // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] 77 // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] 78 // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32] 79 // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] 80 // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] 81 // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32] 82 // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] 83 // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] 84 85 // Emits block literal on stack and block kernel [[INVLK1]]. 86 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 87 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 88 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 89 // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke 90 // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr %block to ptr addrspace(4) 91 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic( 92 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 93 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 94 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK1:[^ ]+_kernel]] to ptr addrspace(4)), 95 // COMMON-SAME: ptr addrspace(4) [[BL_I8]]) 96 enqueue_kernel(default_queue, flags, ndrange, 97 ^(void) { 98 a[i] = b[i]; 99 }); 100 101 // Emits block literal on stack and block kernel [[INVLK2]]. 102 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 103 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 104 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 105 // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %event_wait_list to ptr addrspace(4) 106 // COMMON: [[EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4) 107 // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke 108 // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr %block4 to ptr addrspace(4) 109 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic_events 110 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]], 111 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]], 112 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK2:[^ ]+_kernel]] to ptr addrspace(4)), 113 // COMMON-SAME: ptr addrspace(4) [[BL_I8]]) 114 enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event, 115 ^(void) { 116 a[i] = b[i]; 117 }); 118 119 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic_events 120 // SPIR-SAME: (target("spirv.Queue") {{%[0-9]+}}, i32 {{%[0-9]+}}, ptr {{.*}}, i32 1, ptr addrspace(4) null, ptr addrspace(4) null, 121 // X86-SAME: (ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, ptr {{.*}}, i32 1, ptr addrspace(4) null, ptr addrspace(4) null, 122 enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0, 123 ^(void) { 124 return; 125 }); 126 127 // Emits global block literal [[BLG1]] and block kernel [[INVGK1]]. 128 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 129 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 130 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 131 // CHECK-LIFETIMES: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES1]]) 132 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs( 133 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES1]]) 134 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES1]], i32 0, i32 0 135 // B32: store i32 256, ptr %[[TMP]], align 4 136 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES1]], i32 0, i32 0 137 // B64: store i64 256, ptr %[[TMP]], align 8 138 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs( 139 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 140 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 141 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK1:[^ ]+_kernel]] to ptr addrspace(4)), 142 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG1]] to ptr addrspace(4)), i32 1, 143 // B32-SAME: ptr %[[TMP]]) 144 // B64-SAME: ptr %[[TMP]]) 145 enqueue_kernel(default_queue, flags, ndrange, 146 ^(local void *p) { 147 return; 148 }, 149 256); 150 151 char c; 152 // Emits global block literal [[BLG2]] and block kernel [[INVGK2]]. 153 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 154 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 155 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 156 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES2]]) 157 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs( 158 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES2]]) 159 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES2]], i32 0, i32 0 160 // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4 161 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES2]], i32 0, i32 0 162 // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8 163 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs( 164 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 165 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 166 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK2:[^ ]+_kernel]] to ptr addrspace(4)), 167 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG2]] to ptr addrspace(4)), i32 1, 168 // B32-SAME: ptr %[[TMP]]) 169 // B64-SAME: ptr %[[TMP]]) 170 enqueue_kernel(default_queue, flags, ndrange, 171 ^(local void *p) { 172 return; 173 }, 174 c); 175 176 // Emits global block literal [[BLG3]] and block kernel [[INVGK3]]. 177 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 178 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 179 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 180 // SPIR: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x target("spirv.DeviceEvent")], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0 181 // X86: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x ptr], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0 182 // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr [[AD]] to ptr addrspace(4) 183 // COMMON: [[EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4) 184 // CHECK-LIFETIMES: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES3]]) 185 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs( 186 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES3]]) 187 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES3]], i32 0, i32 0 188 // B32: store i32 256, ptr %[[TMP]], align 4 189 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES3]], i32 0, i32 0 190 // B64: store i64 256, ptr %[[TMP]], align 8 191 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs 192 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]], 193 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]], 194 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK3:[^ ]+_kernel]] to ptr addrspace(4)), 195 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG3]] to ptr addrspace(4)), i32 1, 196 // B32-SAME: ptr %[[TMP]]) 197 // B64-SAME: ptr %[[TMP]]) 198 enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, 199 ^(local void *p) { 200 return; 201 }, 202 256); 203 204 // Emits global block literal [[BLG4]] and block kernel [[INVGK4]]. 205 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 206 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 207 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 208 // SPIR: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x target("spirv.DeviceEvent")], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0 209 // X86: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x ptr], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0 210 // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr [[AD]] to ptr addrspace(4) 211 // COMMON: [[EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4) 212 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES4]]) 213 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs( 214 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES4]]) 215 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES4]], i32 0, i32 0 216 // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4 217 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES4]], i32 0, i32 0 218 // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8 219 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs 220 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]], 221 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]], 222 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK4:[^ ]+_kernel]] to ptr addrspace(4)), 223 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG4]] to ptr addrspace(4)), i32 1, 224 // B32-SAME: ptr %[[TMP]]) 225 // B64-SAME: ptr %[[TMP]]) 226 enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, 227 ^(local void *p) { 228 return; 229 }, 230 c); 231 232 long l; 233 // Emits global block literal [[BLG5]] and block kernel [[INVGK5]]. 234 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 235 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 236 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 237 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES5]]) 238 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs( 239 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES5]]) 240 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES5]], i32 0, i32 0 241 // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4 242 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES5]], i32 0, i32 0 243 // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8 244 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs 245 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 246 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 247 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK5:[^ ]+_kernel]] to ptr addrspace(4)), 248 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG5]] to ptr addrspace(4)), i32 1, 249 // B32-SAME: ptr %[[TMP]]) 250 // B64-SAME: ptr %[[TMP]]) 251 enqueue_kernel(default_queue, flags, ndrange, 252 ^(local void *p) { 253 return; 254 }, 255 l); 256 257 // Emits global block literal [[BLG6]] and block kernel [[INVGK6]]. 258 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 259 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 260 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 261 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %[[BLOCK_SIZES6]]) 262 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs( 263 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %[[BLOCK_SIZES6]]) 264 // B32: %[[TMP:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 0 265 // B32: store i32 1, ptr %[[TMP]], align 4 266 // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 1 267 // B32: store i32 2, ptr %[[BLOCK_SIZES62]], align 4 268 // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 2 269 // B32: store i32 4, ptr %[[BLOCK_SIZES63]], align 4 270 // B64: %[[TMP:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 0 271 // B64: store i64 1, ptr %[[TMP]], align 8 272 // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 1 273 // B64: store i64 2, ptr %[[BLOCK_SIZES62]], align 8 274 // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 2 275 // B64: store i64 4, ptr %[[BLOCK_SIZES63]], align 8 276 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs 277 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 278 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 279 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK6:[^ ]+_kernel]] to ptr addrspace(4)), 280 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG6]] to ptr addrspace(4)), i32 3, 281 // B32-SAME: ptr %[[TMP]]) 282 // B64-SAME: ptr %[[TMP]]) 283 enqueue_kernel(default_queue, flags, ndrange, 284 ^(local void *p1, local void *p2, local void *p3) { 285 return; 286 }, 287 1, 2, 4); 288 289 // Emits global block literal [[BLG7]] and block kernel [[INVGK7]]. 290 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 291 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 292 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 293 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES7]]) 294 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs( 295 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES7]]) 296 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES7]], i32 0, i32 0 297 // B32: store i32 0, ptr %[[TMP]], align 4 298 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES7]], i32 0, i32 0 299 // B64: store i64 4294967296, ptr %[[TMP]], align 8 300 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs 301 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 302 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 303 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK7:[^ ]+_kernel]] to ptr addrspace(4)), 304 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG7]] to ptr addrspace(4)), i32 1, 305 // B32-SAME: ptr %[[TMP]]) 306 // B64-SAME: ptr %[[TMP]]) 307 enqueue_kernel(default_queue, flags, ndrange, 308 ^(local void *p) { 309 return; 310 }, 311 4294967296L); 312 313 // Emits global block literal [[BLG8]] and invoke function [[INVG8]]. 314 // The full type of these expressions are long (and repeated elsewhere), so we 315 // capture it as part of the regex for convenience and clarity. 316 // COMMON: store ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)), ptr %block_A 317 void (^const block_A)(void) = ^{ 318 return; 319 }; 320 321 // Emits global block literal [[BLG9]] and invoke function [[INVG9]]. 322 // COMMON: store ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG9]] to ptr addrspace(4)), ptr %block_B 323 void (^const block_B)(local void *) = ^(local void *a) { 324 return; 325 }; 326 327 // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. 328 // COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) [[INVOKE_ATTR:#[0-9]+]] 329 block_A(); 330 331 // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. 332 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 333 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 334 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 335 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic( 336 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 337 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 338 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8:[^ ]+_kernel]] to ptr addrspace(4)), 339 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) 340 enqueue_kernel(default_queue, flags, ndrange, block_A); 341 342 // Uses block kernel [[INVGK8]] and global block literal [[BLG8]]. 343 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_work_group_size_impl( 344 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8]] to ptr addrspace(4)), 345 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) 346 unsigned size = get_kernel_work_group_size(block_A); 347 348 // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. 349 // COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) 350 block_A(); 351 352 // Make sure that block invoke function is resolved correctly after sequence of assignements. 353 // COMMON: store ptr addrspace(4) 354 // COMMON-SAME: addrspacecast (ptr addrspace(1) 355 // COMMON-SAME: [[BL_GLOBAL]] 356 // COMMON-SAME: to ptr addrspace(4)), 357 // COMMON-SAME: ptr %b1, 358 bl_t b1 = block_G; 359 // COMMON: store ptr addrspace(4) 360 // COMMON-SAME: addrspacecast (ptr addrspace(1) 361 // COMMON-SAME: [[BL_GLOBAL]] 362 // COMMON-SAME: to ptr addrspace(4)), 363 // COMMON-SAME: ptr %b2, 364 bl_t b2 = b1; 365 // COMMON: call {{(spir_func )?}}void @block_G_block_invoke(ptr addrspace(4) addrspacecast (ptr addrspace(1) 366 // COMMON-SAME: [[BL_GLOBAL]] 367 // COOMON-SAME: to ptr addrspace(4)), ptr addrspace(3) null) 368 b2(0); 369 // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. 370 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl( 371 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INV_G_K:[^ ]+_kernel]] to ptr addrspace(4)), 372 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4))) 373 size = get_kernel_preferred_work_group_size_multiple(b2); 374 375 void (^block_C)(void) = ^{ 376 callee(i, a); 377 }; 378 // Emits block literal on stack and block kernel [[INVLK3]]. 379 // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke 380 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue 381 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue 382 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags 383 // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr {{.*}} to ptr addrspace(4) 384 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic( 385 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 386 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}}, 387 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK3:[^ ]+_kernel]] to ptr addrspace(4)), 388 // COMMON-SAME: ptr addrspace(4) [[BL_I8]]) 389 enqueue_kernel(default_queue, flags, ndrange, block_C); 390 391 // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]]. 392 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_work_group_size_impl( 393 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK9:[^ ]+_kernel]] to ptr addrspace(4)), 394 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG9]] to ptr addrspace(4))) 395 size = get_kernel_work_group_size(block_B); 396 397 // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted. 398 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl( 399 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8]] to ptr addrspace(4)), 400 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) 401 size = get_kernel_preferred_work_group_size_multiple(block_A); 402 403 // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. 404 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl( 405 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INV_G_K:[^ ]+_kernel]] to ptr addrspace(4)), 406 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4))) 407 size = get_kernel_preferred_work_group_size_multiple(block_G); 408 409 // Emits global block literal [[BLG10]] and block kernel [[INVGK10]]. 410 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr {{[^,]+}}, 411 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK10:[^ ]+_kernel]] to ptr addrspace(4)), 412 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG10]] to ptr addrspace(4))) 413 size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){}); 414 415 // Emits global block literal [[BLG11]] and block kernel [[INVGK11]]. 416 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr {{[^,]+}}, 417 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK11:[^ ]+_kernel]] to ptr addrspace(4)), 418 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG11]] to ptr addrspace(4))) 419 size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){}); 420} 421 422// COMMON: define spir_kernel void [[INVLK1]](ptr addrspace(4) %0) #{{[0-9]+}} { 423// COMMON: entry: 424// COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke(ptr addrspace(4) %0) 425// COMMON: ret void 426// COMMON: } 427// COMMON: define spir_kernel void [[INVLK2]](ptr addrspace(4){{.*}}) 428// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) [[INVOKE_KERNEL_ATTR:#[0-9]+]] 429// COMMON: define spir_kernel void [[INVGK2]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) 430// COMMON: define spir_kernel void [[INVGK3]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) 431// COMMON: define spir_kernel void [[INVGK4]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) 432// COMMON: define spir_kernel void [[INVGK5]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) 433// COMMON: define spir_kernel void [[INVGK6]](ptr addrspace(4) %0, ptr addrspace(3) %1, ptr addrspace(3) %2, ptr addrspace(3) %3) #{{[0-9]+}} { 434// COMMON: entry: 435// COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_9(ptr addrspace(4) %0, ptr addrspace(3) %1, ptr addrspace(3) %2, ptr addrspace(3) %3) 436// COMMON: ret void 437// COMMON: } 438// COMMON: define spir_kernel void [[INVGK7]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) 439// COMMON: define internal {{(spir_func )?}}void [[INVG8]](ptr addrspace(4){{.*}}) [[INVG8_INVOKE_FUNC_ATTR:#[0-9]+]] 440// COMMON: define internal {{(spir_func )?}}void [[INVG9]](ptr addrspace(4){{.*}}, ptr addrspace(3) %{{.*}}) 441// COMMON: define spir_kernel void [[INVGK8]](ptr addrspace(4){{.*}}) 442// COMMON: define spir_kernel void [[INV_G_K]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) 443// COMMON: define spir_kernel void [[INVLK3]](ptr addrspace(4){{.*}}) 444// COMMON: define spir_kernel void [[INVGK9]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) 445// COMMON: define spir_kernel void [[INVGK10]](ptr addrspace(4){{.*}}) 446// COMMON: define spir_kernel void [[INVGK11]](ptr addrspace(4){{.*}}) 447 448// SPIR: attributes [[INVG8_INVOKE_FUNC_ATTR]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } 449// SPIR: attributes [[INVOKE_KERNEL_ATTR]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } 450// X86: attributes [[INVG8_INVOKE_FUNC_ATTR]] = { convergent noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="{{[^"]*}}" } 451// X86: attributes [[INVOKE_KERNEL_ATTR]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="{{[^"]*}}" } 452