xref: /llvm-project/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl (revision 4b50ec43d03d9ba9b43edd9a4743951f6498b964)
1// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
2// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
3// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
4// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
5// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
6// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
7// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
8// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
9// RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefix=CHECK-LIFETIMES
10
11#pragma OPENCL EXTENSION cl_khr_subgroups : enable
12
13typedef void (^bl_t)(local void *);
14typedef struct {int a;} ndrange_t;
15
16// For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
17// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INV_G:@[^ ]+]] to ptr addrspace(4)) }
18// COMMON: @block_G ={{.*}} addrspace(1) constant ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4))
19
20// For anonymous blocks without captures, emit block literals as global variable.
21// COMMON: [[BLG0:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
22// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
23// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
24// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
25// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
26// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
27// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
28// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
29// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INVG8:@[^ ]+]] to ptr addrspace(4)) }
30// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INVG9:@[^ ]+]] to ptr addrspace(4)) }
31// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
32// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
33
34// Emits block literal [[BL_GLOBAL]], invoke function [[INV_G]] and global block variable @block_G
35// COMMON: define internal {{(spir_func )?}}void [[INV_G]](ptr addrspace(4) %{{.*}}, ptr addrspace(3) %{{.*}})
36const bl_t block_G = (bl_t) ^ (local void *a) {};
37
38void callee(int id, __global int *out) {
39  out[id] = id;
40}
41
42// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
43kernel void device_side_enqueue(global int *a, global int *b, int i) {
44  // SPIR: %default_queue = alloca target("spirv.Queue")
45  // X86: %default_queue = alloca ptr
46  queue_t default_queue;
47  // COMMON: %flags = alloca i32
48  unsigned flags = 0;
49  // COMMON: %ndrange = alloca %struct.ndrange_t
50  ndrange_t ndrange;
51  // SPIR: %clk_event = alloca target("spirv.DeviceEvent")
52  // X86: %clk_event = alloca ptr
53  clk_event_t clk_event;
54  // SPIR: %event_wait_list = alloca target("spirv.DeviceEvent")
55  // X86: %event_wait_list = alloca ptr
56  clk_event_t event_wait_list;
57  // SPIR: %event_wait_list2 = alloca [1 x target("spirv.DeviceEvent")]
58  // X86: %event_wait_list2 = alloca [1 x ptr]
59  clk_event_t event_wait_list2[] = {clk_event};
60
61  // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
62
63  // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32]
64  // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
65  // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
66  // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32]
67  // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
68  // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
69  // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32]
70  // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
71  // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
72  // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32]
73  // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
74  // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
75  // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32]
76  // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
77  // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
78  // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32]
79  // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
80  // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
81  // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32]
82  // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
83  // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
84
85  // Emits block literal on stack and block kernel [[INVLK1]].
86  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
87  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
88  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
89  // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke
90  // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr %block to ptr addrspace(4)
91  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic(
92  // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
93  // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
94  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK1:[^ ]+_kernel]] to ptr addrspace(4)),
95  // COMMON-SAME: ptr addrspace(4) [[BL_I8]])
96  enqueue_kernel(default_queue, flags, ndrange,
97                 ^(void) {
98                   a[i] = b[i];
99                 });
100
101  // Emits block literal on stack and block kernel [[INVLK2]].
102  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
103  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
104  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
105  // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %event_wait_list to ptr addrspace(4)
106  // COMMON: [[EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4)
107  // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke
108  // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr %block4 to ptr addrspace(4)
109  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic_events
110  // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]],  ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
111  // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]],  ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
112  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK2:[^ ]+_kernel]] to ptr addrspace(4)),
113  // COMMON-SAME: ptr addrspace(4) [[BL_I8]])
114  enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,
115                 ^(void) {
116                   a[i] = b[i];
117                 });
118
119  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic_events
120  // SPIR-SAME: (target("spirv.Queue") {{%[0-9]+}}, i32 {{%[0-9]+}}, ptr {{.*}}, i32 1, ptr addrspace(4) null, ptr addrspace(4) null,
121  // X86-SAME: (ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, ptr {{.*}}, i32 1, ptr addrspace(4) null, ptr addrspace(4) null,
122  enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0,
123                 ^(void) {
124                   return;
125                 });
126
127  // Emits global block literal [[BLG1]] and block kernel [[INVGK1]].
128  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
129  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
130  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
131  // CHECK-LIFETIMES: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES1]])
132  // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
133  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES1]])
134  // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES1]], i32 0, i32 0
135  // B32: store i32 256, ptr %[[TMP]], align 4
136  // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES1]], i32 0, i32 0
137  // B64: store i64 256, ptr %[[TMP]], align 8
138  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
139  // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
140  // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
141  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK1:[^ ]+_kernel]] to ptr addrspace(4)),
142  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG1]] to ptr addrspace(4)), i32 1,
143  // B32-SAME: ptr %[[TMP]])
144  // B64-SAME: ptr %[[TMP]])
145  enqueue_kernel(default_queue, flags, ndrange,
146                 ^(local void *p) {
147                   return;
148                 },
149                 256);
150
151  char c;
152  // Emits global block literal [[BLG2]] and block kernel [[INVGK2]].
153  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
154  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
155  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
156  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES2]])
157  // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
158  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES2]])
159  // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES2]], i32 0, i32 0
160  // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4
161  // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES2]], i32 0, i32 0
162  // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8
163  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
164  // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
165  // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
166  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK2:[^ ]+_kernel]] to ptr addrspace(4)),
167  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG2]] to ptr addrspace(4)), i32 1,
168  // B32-SAME: ptr %[[TMP]])
169  // B64-SAME: ptr %[[TMP]])
170  enqueue_kernel(default_queue, flags, ndrange,
171                 ^(local void *p) {
172                   return;
173                 },
174                 c);
175
176  // Emits global block literal [[BLG3]] and block kernel [[INVGK3]].
177  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
178  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
179  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
180  // SPIR: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x target("spirv.DeviceEvent")], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
181  // X86: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x ptr], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
182  // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr [[AD]] to ptr addrspace(4)
183  // COMMON: [[EVNT:%[0-9]+]]  ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4)
184  // CHECK-LIFETIMES: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES3]])
185  // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs(
186  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES3]])
187  // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES3]], i32 0, i32 0
188  // B32: store i32 256, ptr %[[TMP]], align 4
189  // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES3]], i32 0, i32 0
190  // B64: store i64 256, ptr %[[TMP]], align 8
191  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs
192  // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]],  ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
193  // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]],  ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
194  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK3:[^ ]+_kernel]] to ptr addrspace(4)),
195  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG3]] to ptr addrspace(4)), i32 1,
196  // B32-SAME: ptr %[[TMP]])
197  // B64-SAME: ptr %[[TMP]])
198  enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
199                 ^(local void *p) {
200                   return;
201                 },
202                 256);
203
204  // Emits global block literal [[BLG4]] and block kernel [[INVGK4]].
205  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
206  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
207  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
208  // SPIR: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x target("spirv.DeviceEvent")], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
209  // X86: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x ptr], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
210  // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr [[AD]] to ptr addrspace(4)
211  // COMMON: [[EVNT:%[0-9]+]]  ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4)
212  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES4]])
213  // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs(
214  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES4]])
215  // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES4]], i32 0, i32 0
216  // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4
217  // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES4]], i32 0, i32 0
218  // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8
219  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs
220  // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]],  ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
221  // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]],  ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
222  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK4:[^ ]+_kernel]] to ptr addrspace(4)),
223  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG4]] to ptr addrspace(4)), i32 1,
224  // B32-SAME: ptr %[[TMP]])
225  // B64-SAME: ptr %[[TMP]])
226  enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
227                 ^(local void *p) {
228                   return;
229                 },
230                 c);
231
232  long l;
233  // Emits global block literal [[BLG5]] and block kernel [[INVGK5]].
234  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
235  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
236  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
237  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES5]])
238  // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
239  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES5]])
240  // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES5]], i32 0, i32 0
241  // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4
242  // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES5]], i32 0, i32 0
243  // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8
244  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs
245  // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
246  // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
247  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK5:[^ ]+_kernel]] to ptr addrspace(4)),
248  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG5]] to ptr addrspace(4)), i32 1,
249  // B32-SAME: ptr %[[TMP]])
250  // B64-SAME: ptr %[[TMP]])
251  enqueue_kernel(default_queue, flags, ndrange,
252                 ^(local void *p) {
253                   return;
254                 },
255                 l);
256
257  // Emits global block literal [[BLG6]] and block kernel [[INVGK6]].
258  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
259  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
260  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
261  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %[[BLOCK_SIZES6]])
262  // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
263  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %[[BLOCK_SIZES6]])
264  // B32: %[[TMP:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 0
265  // B32: store i32 1, ptr %[[TMP]], align 4
266  // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 1
267  // B32: store i32 2, ptr %[[BLOCK_SIZES62]], align 4
268  // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 2
269  // B32: store i32 4, ptr %[[BLOCK_SIZES63]], align 4
270  // B64: %[[TMP:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 0
271  // B64: store i64 1, ptr %[[TMP]], align 8
272  // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 1
273  // B64: store i64 2, ptr %[[BLOCK_SIZES62]], align 8
274  // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 2
275  // B64: store i64 4, ptr %[[BLOCK_SIZES63]], align 8
276  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs
277  // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
278  // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
279  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK6:[^ ]+_kernel]] to ptr addrspace(4)),
280  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG6]] to ptr addrspace(4)), i32 3,
281  // B32-SAME: ptr %[[TMP]])
282  // B64-SAME: ptr %[[TMP]])
283  enqueue_kernel(default_queue, flags, ndrange,
284                 ^(local void *p1, local void *p2, local void *p3) {
285                   return;
286                 },
287                 1, 2, 4);
288
289  // Emits global block literal [[BLG7]] and block kernel [[INVGK7]].
290  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
291  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
292  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
293  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES7]])
294  // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
295  // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES7]])
296  // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES7]], i32 0, i32 0
297  // B32: store i32 0, ptr %[[TMP]], align 4
298  // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES7]], i32 0, i32 0
299  // B64: store i64 4294967296, ptr %[[TMP]], align 8
300  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs
301  // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
302  // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
303  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK7:[^ ]+_kernel]] to ptr addrspace(4)),
304  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG7]] to ptr addrspace(4)), i32 1,
305  // B32-SAME: ptr %[[TMP]])
306  // B64-SAME: ptr %[[TMP]])
307  enqueue_kernel(default_queue, flags, ndrange,
308                 ^(local void *p) {
309                   return;
310                 },
311                 4294967296L);
312
313  // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
314  // The full type of these expressions are long (and repeated elsewhere), so we
315  // capture it as part of the regex for convenience and clarity.
316  // COMMON: store ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)), ptr %block_A
317  void (^const block_A)(void) = ^{
318    return;
319  };
320
321  // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
322  // COMMON: store ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG9]] to ptr addrspace(4)), ptr %block_B
323  void (^const block_B)(local void *) = ^(local void *a) {
324    return;
325  };
326
327  // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
328  // COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) [[INVOKE_ATTR:#[0-9]+]]
329  block_A();
330
331  // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
332  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
333  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
334  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
335  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic(
336  // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
337  // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
338  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8:[^ ]+_kernel]] to ptr addrspace(4)),
339  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
340  enqueue_kernel(default_queue, flags, ndrange, block_A);
341
342  // Uses block kernel [[INVGK8]] and global block literal [[BLG8]].
343  // COMMON: call {{(spir_func )?}}i32 @__get_kernel_work_group_size_impl(
344  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8]] to ptr addrspace(4)),
345  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
346  unsigned size = get_kernel_work_group_size(block_A);
347
348  // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
349  // COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
350  block_A();
351
352  // Make sure that block invoke function is resolved correctly after sequence of assignements.
353  // COMMON: store ptr addrspace(4)
354  // COMMON-SAME: addrspacecast (ptr addrspace(1)
355  // COMMON-SAME: [[BL_GLOBAL]]
356  // COMMON-SAME: to ptr addrspace(4)),
357  // COMMON-SAME: ptr %b1,
358  bl_t b1 = block_G;
359  // COMMON: store ptr addrspace(4)
360  // COMMON-SAME: addrspacecast (ptr addrspace(1)
361  // COMMON-SAME: [[BL_GLOBAL]]
362  // COMMON-SAME: to ptr addrspace(4)),
363  // COMMON-SAME: ptr %b2,
364  bl_t b2 = b1;
365  // COMMON: call {{(spir_func )?}}void @block_G_block_invoke(ptr addrspace(4) addrspacecast (ptr addrspace(1)
366  // COMMON-SAME: [[BL_GLOBAL]]
367  // COOMON-SAME: to ptr addrspace(4)), ptr addrspace(3) null)
368  b2(0);
369  // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
370  // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl(
371  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INV_G_K:[^ ]+_kernel]] to ptr addrspace(4)),
372  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4)))
373  size = get_kernel_preferred_work_group_size_multiple(b2);
374
375  void (^block_C)(void) = ^{
376    callee(i, a);
377  };
378  // Emits block literal on stack and block kernel [[INVLK3]].
379  // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke
380  // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
381  // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
382  // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
383  // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr {{.*}} to ptr addrspace(4)
384  // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic(
385  // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
386  // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
387  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK3:[^ ]+_kernel]] to ptr addrspace(4)),
388  // COMMON-SAME: ptr addrspace(4) [[BL_I8]])
389  enqueue_kernel(default_queue, flags, ndrange, block_C);
390
391  // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
392  // COMMON: call {{(spir_func )?}}i32 @__get_kernel_work_group_size_impl(
393  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK9:[^ ]+_kernel]] to ptr addrspace(4)),
394  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG9]] to ptr addrspace(4)))
395  size = get_kernel_work_group_size(block_B);
396
397  // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted.
398  // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl(
399  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8]] to ptr addrspace(4)),
400  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
401  size = get_kernel_preferred_work_group_size_multiple(block_A);
402
403  // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
404  // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl(
405  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INV_G_K:[^ ]+_kernel]] to ptr addrspace(4)),
406  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4)))
407  size = get_kernel_preferred_work_group_size_multiple(block_G);
408
409  // Emits global block literal [[BLG10]] and block kernel [[INVGK10]].
410  // COMMON: call {{(spir_func )?}}i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr {{[^,]+}},
411  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK10:[^ ]+_kernel]] to ptr addrspace(4)),
412  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG10]] to ptr addrspace(4)))
413  size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
414
415  // Emits global block literal [[BLG11]] and block kernel [[INVGK11]].
416  // COMMON: call {{(spir_func )?}}i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr {{[^,]+}},
417  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK11:[^ ]+_kernel]] to ptr addrspace(4)),
418  // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG11]] to ptr addrspace(4)))
419  size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
420}
421
422// COMMON: define spir_kernel void [[INVLK1]](ptr addrspace(4) %0) #{{[0-9]+}} {
423// COMMON: entry:
424// COMMON:  call {{(spir_func )?}}void @__device_side_enqueue_block_invoke(ptr addrspace(4) %0)
425// COMMON:  ret void
426// COMMON: }
427// COMMON: define spir_kernel void [[INVLK2]](ptr addrspace(4){{.*}})
428// COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})  [[INVOKE_KERNEL_ATTR:#[0-9]+]]
429// COMMON: define spir_kernel void [[INVGK2]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
430// COMMON: define spir_kernel void [[INVGK3]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
431// COMMON: define spir_kernel void [[INVGK4]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
432// COMMON: define spir_kernel void [[INVGK5]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
433// COMMON: define spir_kernel void [[INVGK6]](ptr addrspace(4) %0, ptr addrspace(3) %1, ptr addrspace(3) %2, ptr addrspace(3) %3) #{{[0-9]+}} {
434// COMMON: entry:
435// COMMON:  call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_9(ptr addrspace(4) %0, ptr addrspace(3) %1, ptr addrspace(3) %2, ptr addrspace(3) %3)
436// COMMON:  ret void
437// COMMON: }
438// COMMON: define spir_kernel void [[INVGK7]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
439// COMMON: define internal {{(spir_func )?}}void [[INVG8]](ptr addrspace(4){{.*}}) [[INVG8_INVOKE_FUNC_ATTR:#[0-9]+]]
440// COMMON: define internal {{(spir_func )?}}void [[INVG9]](ptr addrspace(4){{.*}}, ptr addrspace(3) %{{.*}})
441// COMMON: define spir_kernel void [[INVGK8]](ptr addrspace(4){{.*}})
442// COMMON: define spir_kernel void [[INV_G_K]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
443// COMMON: define spir_kernel void [[INVLK3]](ptr addrspace(4){{.*}})
444// COMMON: define spir_kernel void [[INVGK9]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
445// COMMON: define spir_kernel void [[INVGK10]](ptr addrspace(4){{.*}})
446// COMMON: define spir_kernel void [[INVGK11]](ptr addrspace(4){{.*}})
447
448// SPIR: attributes [[INVG8_INVOKE_FUNC_ATTR]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
449// SPIR: attributes [[INVOKE_KERNEL_ATTR]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
450// X86: attributes [[INVG8_INVOKE_FUNC_ATTR]] = { convergent noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="{{[^"]*}}" }
451// X86: attributes [[INVOKE_KERNEL_ATTR]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="{{[^"]*}}" }
452