xref: /llvm-project/llvm/test/CodeGen/SPIRV/transcoding/sub_group_clustered_reduce.ll (revision 13453c98612964809ad8dd771275f14c440aa7ac)
1;; #pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable
2;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable
3;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable
4;;
5;; kernel void testClusteredArithmeticChar(global char* dst)
6;; {
7;;     char v = 0;
8;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
9;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
10;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
11;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
12;; }
13;;
14;; kernel void testClusteredArithmeticUChar(global uchar* dst)
15;; {
16;;     uchar v = 0;
17;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
18;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
19;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
20;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
21;; }
22;;
23;; kernel void testClusteredArithmeticShort(global short* dst)
24;; {
25;;     short v = 0;
26;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
27;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
28;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
29;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
30;; }
31;;
32;; kernel void testClusteredArithmeticUShort(global ushort* dst)
33;; {
34;;     ushort v = 0;
35;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
36;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
37;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
38;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
39;; }
40;;
41;; kernel void testClusteredArithmeticInt(global int* dst)
42;; {
43;;     int v = 0;
44;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
45;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
46;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
47;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
48;; }
49;;
50;; kernel void testClusteredArithmeticUInt(global uint* dst)
51;; {
52;;     uint v = 0;
53;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
54;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
55;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
56;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
57;; }
58;;
59;; kernel void testClusteredArithmeticLong(global long* dst)
60;; {
61;;     long v = 0;
62;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
63;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
64;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
65;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
66;; }
67;;
68;; kernel void testClusteredArithmeticULong(global ulong* dst)
69;; {
70;;     ulong v = 0;
71;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
72;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
73;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
74;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
75;; }
76;;
77;; kernel void testClusteredArithmeticFloat(global float* dst)
78;; {
79;;     float v = 0;
80;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
81;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
82;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
83;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
84;; }
85;;
86;; kernel void testClusteredArithmeticHalf(global half* dst)
87;; {
88;;     half v = 0;
89;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
90;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
91;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
92;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
93;; }
94;;
95;; kernel void testClusteredArithmeticDouble(global double* dst)
96;; {
97;;     double v = 0;
98;;     dst[0] = sub_group_clustered_reduce_add(v, 2);
99;;     dst[1] = sub_group_clustered_reduce_mul(v, 2);
100;;     dst[2] = sub_group_clustered_reduce_min(v, 2);
101;;     dst[3] = sub_group_clustered_reduce_max(v, 2);
102;; }
103;;
104;; kernel void testClusteredBitwiseChar(global char* dst)
105;; {
106;;     char v = 0;
107;;     dst[0] = sub_group_clustered_reduce_and(v, 2);
108;;     dst[1] = sub_group_clustered_reduce_or(v, 2);
109;;     dst[2] = sub_group_clustered_reduce_xor(v, 2);
110;; }
111;;
112;; kernel void testClusteredBitwiseUChar(global uchar* dst)
113;; {
114;;     uchar v = 0;
115;;     dst[0] = sub_group_clustered_reduce_and(v, 2);
116;;     dst[1] = sub_group_clustered_reduce_or(v, 2);
117;;     dst[2] = sub_group_clustered_reduce_xor(v, 2);
118;; }
119;;
120;; kernel void testClusteredBitwiseShort(global short* dst)
121;; {
122;;     short v = 0;
123;;     dst[0] = sub_group_clustered_reduce_and(v, 2);
124;;     dst[1] = sub_group_clustered_reduce_or(v, 2);
125;;     dst[2] = sub_group_clustered_reduce_xor(v, 2);
126;; }
127;;
128;; kernel void testClusteredBitwiseUShort(global ushort* dst)
129;; {
130;;     ushort v = 0;
131;;     dst[0] = sub_group_clustered_reduce_and(v, 2);
132;;     dst[1] = sub_group_clustered_reduce_or(v, 2);
133;;     dst[2] = sub_group_clustered_reduce_xor(v, 2);
134;; }
135;;
136;; kernel void testClusteredBitwiseInt(global int* dst)
137;; {
138;;     int v = 0;
139;;     dst[0] = sub_group_clustered_reduce_and(v, 2);
140;;     dst[1] = sub_group_clustered_reduce_or(v, 2);
141;;     dst[2] = sub_group_clustered_reduce_xor(v, 2);
142;; }
143;;
144;; kernel void testClusteredBitwiseUInt(global uint* dst)
145;; {
146;;     uint v = 0;
147;;     dst[0] = sub_group_clustered_reduce_and(v, 2);
148;;     dst[1] = sub_group_clustered_reduce_or(v, 2);
149;;     dst[2] = sub_group_clustered_reduce_xor(v, 2);
150;; }
151;;
152;; kernel void testClusteredBitwiseLong(global long* dst)
153;; {
154;;     long v = 0;
155;;     dst[0] = sub_group_clustered_reduce_and(v, 2);
156;;     dst[1] = sub_group_clustered_reduce_or(v, 2);
157;;     dst[2] = sub_group_clustered_reduce_xor(v, 2);
158;; }
159;;
160;; kernel void testClusteredBitwiseULong(global ulong* dst)
161;; {
162;;     ulong v = 0;
163;;     dst[0] = sub_group_clustered_reduce_and(v, 2);
164;;     dst[1] = sub_group_clustered_reduce_or(v, 2);
165;;     dst[2] = sub_group_clustered_reduce_xor(v, 2);
166;; }
167;;
168;; kernel void testClusteredLogical(global int* dst)
169;; {
170;;     int v = 0;
171;;     dst[0] = sub_group_clustered_reduce_logical_and(v, 2);
172;;     dst[1] = sub_group_clustered_reduce_logical_or(v, 2);
173;;     dst[2] = sub_group_clustered_reduce_logical_xor(v, 2);
174;; }
175
176; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
177
178; CHECK-SPIRV-DAG: OpCapability GroupNonUniformClustered
179
180; CHECK-SPIRV-DAG: %[[#bool:]] = OpTypeBool
181; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0
182; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0
183; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0
184; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0
185; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16
186; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32
187; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64
188
189; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]]
190; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3
191; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0
192; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0
193; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0
194; CHECK-SPIRV-DAG: %[[#int_2:]] = OpConstant %[[#int]] 2
195; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]]
196; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0
197; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0
198; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0
199
200; CHECK-SPIRV: OpFunction
201; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
202; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
203; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
204; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
205; CHECK-SPIRV: OpFunctionEnd
206
207define dso_local spir_kernel void @testClusteredArithmeticChar(i8 addrspace(1)* nocapture) local_unnamed_addr {
208  %2 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_addcj(i8 signext 0, i32 2)
209  store i8 %2, i8 addrspace(1)* %0, align 1
210  %3 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_mulcj(i8 signext 0, i32 2)
211  %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
212  store i8 %3, i8 addrspace(1)* %4, align 1
213  %5 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_mincj(i8 signext 0, i32 2)
214  %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2
215  store i8 %5, i8 addrspace(1)* %6, align 1
216  %7 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_maxcj(i8 signext 0, i32 2)
217  %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3
218  store i8 %7, i8 addrspace(1)* %8, align 1
219  ret void
220}
221
222declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_addcj(i8 signext, i32) local_unnamed_addr
223
224declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_mulcj(i8 signext, i32) local_unnamed_addr
225
226declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_mincj(i8 signext, i32) local_unnamed_addr
227
228declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_maxcj(i8 signext, i32) local_unnamed_addr
229
230; CHECK-SPIRV: OpFunction
231; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
232; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
233; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
234; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
235; CHECK-SPIRV: OpFunctionEnd
236
237define dso_local spir_kernel void @testClusteredArithmeticUChar(i8 addrspace(1)* nocapture) local_unnamed_addr {
238  %2 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_addhj(i8 zeroext 0, i32 2)
239  store i8 %2, i8 addrspace(1)* %0, align 1
240  %3 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_mulhj(i8 zeroext 0, i32 2)
241  %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
242  store i8 %3, i8 addrspace(1)* %4, align 1
243  %5 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_minhj(i8 zeroext 0, i32 2)
244  %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2
245  store i8 %5, i8 addrspace(1)* %6, align 1
246  %7 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_maxhj(i8 zeroext 0, i32 2)
247  %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3
248  store i8 %7, i8 addrspace(1)* %8, align 1
249  ret void
250}
251
252declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_addhj(i8 zeroext, i32) local_unnamed_addr
253
254declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_mulhj(i8 zeroext, i32) local_unnamed_addr
255
256declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_minhj(i8 zeroext, i32) local_unnamed_addr
257
258declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_maxhj(i8 zeroext, i32) local_unnamed_addr
259
260; CHECK-SPIRV: OpFunction
261; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
262; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
263; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
264; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
265; CHECK-SPIRV: OpFunctionEnd
266
267define dso_local spir_kernel void @testClusteredArithmeticShort(i16 addrspace(1)* nocapture) local_unnamed_addr {
268  %2 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_addsj(i16 signext 0, i32 2)
269  store i16 %2, i16 addrspace(1)* %0, align 2
270  %3 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_mulsj(i16 signext 0, i32 2)
271  %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1
272  store i16 %3, i16 addrspace(1)* %4, align 2
273  %5 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_minsj(i16 signext 0, i32 2)
274  %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2
275  store i16 %5, i16 addrspace(1)* %6, align 2
276  %7 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_maxsj(i16 signext 0, i32 2)
277  %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3
278  store i16 %7, i16 addrspace(1)* %8, align 2
279  ret void
280}
281
282declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_addsj(i16 signext, i32) local_unnamed_addr
283
284declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_mulsj(i16 signext, i32) local_unnamed_addr
285
286declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_minsj(i16 signext, i32) local_unnamed_addr
287
288declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_maxsj(i16 signext, i32) local_unnamed_addr
289
290; CHECK-SPIRV: OpFunction
291; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
292; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
293; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
294; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
295; CHECK-SPIRV: OpFunctionEnd
296
297define dso_local spir_kernel void @testClusteredArithmeticUShort(i16 addrspace(1)* nocapture) local_unnamed_addr {
298  %2 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_addtj(i16 zeroext 0, i32 2)
299  store i16 %2, i16 addrspace(1)* %0, align 2
300  %3 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_multj(i16 zeroext 0, i32 2)
301  %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1
302  store i16 %3, i16 addrspace(1)* %4, align 2
303  %5 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_mintj(i16 zeroext 0, i32 2)
304  %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2
305  store i16 %5, i16 addrspace(1)* %6, align 2
306  %7 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_maxtj(i16 zeroext 0, i32 2)
307  %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3
308  store i16 %7, i16 addrspace(1)* %8, align 2
309  ret void
310}
311
312declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_addtj(i16 zeroext, i32) local_unnamed_addr
313
314declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_multj(i16 zeroext, i32) local_unnamed_addr
315
316declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_mintj(i16 zeroext, i32) local_unnamed_addr
317
318declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_maxtj(i16 zeroext, i32) local_unnamed_addr
319
320; CHECK-SPIRV: OpFunction
321; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
322; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
323; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
324; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
325; CHECK-SPIRV: OpFunctionEnd
326
327define dso_local spir_kernel void @testClusteredArithmeticInt(i32 addrspace(1)* nocapture) local_unnamed_addr {
328  %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_addij(i32 0, i32 2)
329  store i32 %2, i32 addrspace(1)* %0, align 4
330  %3 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_mulij(i32 0, i32 2)
331  %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1
332  store i32 %3, i32 addrspace(1)* %4, align 4
333  %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_minij(i32 0, i32 2)
334  %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2
335  store i32 %5, i32 addrspace(1)* %6, align 4
336  %7 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_maxij(i32 0, i32 2)
337  %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3
338  store i32 %7, i32 addrspace(1)* %8, align 4
339  ret void
340}
341
342declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_addij(i32, i32) local_unnamed_addr
343
344declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_mulij(i32, i32) local_unnamed_addr
345
346declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_minij(i32, i32) local_unnamed_addr
347
348declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_maxij(i32, i32) local_unnamed_addr
349
350; CHECK-SPIRV: OpFunction
351; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
352; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
353; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
354; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
355; CHECK-SPIRV: OpFunctionEnd
356
357define dso_local spir_kernel void @testClusteredArithmeticUInt(i32 addrspace(1)* nocapture) local_unnamed_addr {
358  %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_addjj(i32 0, i32 2)
359  store i32 %2, i32 addrspace(1)* %0, align 4
360  %3 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_muljj(i32 0, i32 2)
361  %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1
362  store i32 %3, i32 addrspace(1)* %4, align 4
363  %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_minjj(i32 0, i32 2)
364  %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2
365  store i32 %5, i32 addrspace(1)* %6, align 4
366  %7 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_maxjj(i32 0, i32 2)
367  %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3
368  store i32 %7, i32 addrspace(1)* %8, align 4
369  ret void
370}
371
372declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_addjj(i32, i32) local_unnamed_addr
373
374declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_muljj(i32, i32) local_unnamed_addr
375
376declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_minjj(i32, i32) local_unnamed_addr
377
378declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_maxjj(i32, i32) local_unnamed_addr
379
380; CHECK-SPIRV: OpFunction
381; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
382; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
383; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
384; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
385; CHECK-SPIRV: OpFunctionEnd
386
387define dso_local spir_kernel void @testClusteredArithmeticLong(i64 addrspace(1)* nocapture) local_unnamed_addr {
388  %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_addlj(i64 0, i32 2)
389  store i64 %2, i64 addrspace(1)* %0, align 8
390  %3 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_mullj(i64 0, i32 2)
391  %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1
392  store i64 %3, i64 addrspace(1)* %4, align 8
393  %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_minlj(i64 0, i32 2)
394  %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2
395  store i64 %5, i64 addrspace(1)* %6, align 8
396  %7 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_maxlj(i64 0, i32 2)
397  %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3
398  store i64 %7, i64 addrspace(1)* %8, align 8
399  ret void
400}
401
402declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_addlj(i64, i32) local_unnamed_addr
403
404declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_mullj(i64, i32) local_unnamed_addr
405
406declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_minlj(i64, i32) local_unnamed_addr
407
408declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_maxlj(i64, i32) local_unnamed_addr
409
410; CHECK-SPIRV: OpFunction
411; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
412; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
413; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
414; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
415; CHECK-SPIRV: OpFunctionEnd
416
417define dso_local spir_kernel void @testClusteredArithmeticULong(i64 addrspace(1)* nocapture) local_unnamed_addr {
418  %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_addmj(i64 0, i32 2)
419  store i64 %2, i64 addrspace(1)* %0, align 8
420  %3 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_mulmj(i64 0, i32 2)
421  %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1
422  store i64 %3, i64 addrspace(1)* %4, align 8
423  %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_minmj(i64 0, i32 2)
424  %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2
425  store i64 %5, i64 addrspace(1)* %6, align 8
426  %7 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_maxmj(i64 0, i32 2)
427  %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3
428  store i64 %7, i64 addrspace(1)* %8, align 8
429  ret void
430}
431
432declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_addmj(i64, i32) local_unnamed_addr
433
434declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_mulmj(i64, i32) local_unnamed_addr
435
436declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_minmj(i64, i32) local_unnamed_addr
437
438declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_maxmj(i64, i32) local_unnamed_addr
439
440; CHECK-SPIRV: OpFunction
441; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]]
442; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]]
443; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]]
444; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]]
445; CHECK-SPIRV: OpFunctionEnd
446
447define dso_local spir_kernel void @testClusteredArithmeticFloat(float addrspace(1)* nocapture) local_unnamed_addr {
448  %2 = tail call spir_func float @_Z30sub_group_clustered_reduce_addfj(float 0.000000e+00, i32 2)
449  store float %2, float addrspace(1)* %0, align 4
450  %3 = tail call spir_func float @_Z30sub_group_clustered_reduce_mulfj(float 0.000000e+00, i32 2)
451  %4 = getelementptr inbounds float, float addrspace(1)* %0, i64 1
452  store float %3, float addrspace(1)* %4, align 4
453  %5 = tail call spir_func float @_Z30sub_group_clustered_reduce_minfj(float 0.000000e+00, i32 2)
454  %6 = getelementptr inbounds float, float addrspace(1)* %0, i64 2
455  store float %5, float addrspace(1)* %6, align 4
456  %7 = tail call spir_func float @_Z30sub_group_clustered_reduce_maxfj(float 0.000000e+00, i32 2)
457  %8 = getelementptr inbounds float, float addrspace(1)* %0, i64 3
458  store float %7, float addrspace(1)* %8, align 4
459  ret void
460}
461
462declare dso_local spir_func float @_Z30sub_group_clustered_reduce_addfj(float, i32) local_unnamed_addr
463
464declare dso_local spir_func float @_Z30sub_group_clustered_reduce_mulfj(float, i32) local_unnamed_addr
465
466declare dso_local spir_func float @_Z30sub_group_clustered_reduce_minfj(float, i32) local_unnamed_addr
467
468declare dso_local spir_func float @_Z30sub_group_clustered_reduce_maxfj(float, i32) local_unnamed_addr
469
470; CHECK-SPIRV: OpFunction
471; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]]
472; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]]
473; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]]
474; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]]
475; CHECK-SPIRV: OpFunctionEnd
476
477define dso_local spir_kernel void @testClusteredArithmeticHalf(half addrspace(1)* nocapture) local_unnamed_addr {
478  %2 = tail call spir_func half @_Z30sub_group_clustered_reduce_addDhj(half 0xH0000, i32 2)
479  store half %2, half addrspace(1)* %0, align 2
480  %3 = tail call spir_func half @_Z30sub_group_clustered_reduce_mulDhj(half 0xH0000, i32 2)
481  %4 = getelementptr inbounds half, half addrspace(1)* %0, i64 1
482  store half %3, half addrspace(1)* %4, align 2
483  %5 = tail call spir_func half @_Z30sub_group_clustered_reduce_minDhj(half 0xH0000, i32 2)
484  %6 = getelementptr inbounds half, half addrspace(1)* %0, i64 2
485  store half %5, half addrspace(1)* %6, align 2
486  %7 = tail call spir_func half @_Z30sub_group_clustered_reduce_maxDhj(half 0xH0000, i32 2)
487  %8 = getelementptr inbounds half, half addrspace(1)* %0, i64 3
488  store half %7, half addrspace(1)* %8, align 2
489  ret void
490}
491
492declare dso_local spir_func half @_Z30sub_group_clustered_reduce_addDhj(half, i32) local_unnamed_addr
493
494declare dso_local spir_func half @_Z30sub_group_clustered_reduce_mulDhj(half, i32) local_unnamed_addr
495
496declare dso_local spir_func half @_Z30sub_group_clustered_reduce_minDhj(half, i32) local_unnamed_addr
497
498declare dso_local spir_func half @_Z30sub_group_clustered_reduce_maxDhj(half, i32) local_unnamed_addr
499
500; CHECK-SPIRV: OpFunction
501; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]]
502; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]]
503; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]]
504; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]]
505; CHECK-SPIRV: OpFunctionEnd
506
507define dso_local spir_kernel void @testClusteredArithmeticDouble(double addrspace(1)* nocapture) local_unnamed_addr {
508  %2 = tail call spir_func double @_Z30sub_group_clustered_reduce_adddj(double 0.000000e+00, i32 2)
509  store double %2, double addrspace(1)* %0, align 8
510  %3 = tail call spir_func double @_Z30sub_group_clustered_reduce_muldj(double 0.000000e+00, i32 2)
511  %4 = getelementptr inbounds double, double addrspace(1)* %0, i64 1
512  store double %3, double addrspace(1)* %4, align 8
513  %5 = tail call spir_func double @_Z30sub_group_clustered_reduce_mindj(double 0.000000e+00, i32 2)
514  %6 = getelementptr inbounds double, double addrspace(1)* %0, i64 2
515  store double %5, double addrspace(1)* %6, align 8
516  %7 = tail call spir_func double @_Z30sub_group_clustered_reduce_maxdj(double 0.000000e+00, i32 2)
517  %8 = getelementptr inbounds double, double addrspace(1)* %0, i64 3
518  store double %7, double addrspace(1)* %8, align 8
519  ret void
520}
521
522declare dso_local spir_func double @_Z30sub_group_clustered_reduce_adddj(double, i32) local_unnamed_addr
523
524declare dso_local spir_func double @_Z30sub_group_clustered_reduce_muldj(double, i32) local_unnamed_addr
525
526declare dso_local spir_func double @_Z30sub_group_clustered_reduce_mindj(double, i32) local_unnamed_addr
527
528declare dso_local spir_func double @_Z30sub_group_clustered_reduce_maxdj(double, i32) local_unnamed_addr
529
530; CHECK-SPIRV: OpFunction
531; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
532; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr  %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
533; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
534; CHECK-SPIRV: OpFunctionEnd
535
536define dso_local spir_kernel void @testClusteredBitwiseChar(i8 addrspace(1)* nocapture) local_unnamed_addr {
537  %2 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_andcj(i8 signext 0, i32 2)
538  store i8 %2, i8 addrspace(1)* %0, align 1
539  %3 = tail call spir_func signext i8 @_Z29sub_group_clustered_reduce_orcj(i8 signext 0, i32 2)
540  %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
541  store i8 %3, i8 addrspace(1)* %4, align 1
542  %5 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_xorcj(i8 signext 0, i32 2)
543  %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2
544  store i8 %5, i8 addrspace(1)* %6, align 1
545  ret void
546}
547
548declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_andcj(i8 signext, i32) local_unnamed_addr
549
550declare dso_local spir_func signext i8 @_Z29sub_group_clustered_reduce_orcj(i8 signext, i32) local_unnamed_addr
551
552declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_xorcj(i8 signext, i32) local_unnamed_addr
553
554; CHECK-SPIRV: OpFunction
555; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
556; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr  %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
557; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]]
558; CHECK-SPIRV: OpFunctionEnd
559
560define dso_local spir_kernel void @testClusteredBitwiseUChar(i8 addrspace(1)* nocapture) local_unnamed_addr {
561  %2 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_andhj(i8 zeroext 0, i32 2)
562  store i8 %2, i8 addrspace(1)* %0, align 1
563  %3 = tail call spir_func zeroext i8 @_Z29sub_group_clustered_reduce_orhj(i8 zeroext 0, i32 2)
564  %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1
565  store i8 %3, i8 addrspace(1)* %4, align 1
566  %5 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_xorhj(i8 zeroext 0, i32 2)
567  %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2
568  store i8 %5, i8 addrspace(1)* %6, align 1
569  ret void
570}
571
572declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_andhj(i8 zeroext, i32) local_unnamed_addr
573
574declare dso_local spir_func zeroext i8 @_Z29sub_group_clustered_reduce_orhj(i8 zeroext, i32) local_unnamed_addr
575
576declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_xorhj(i8 zeroext, i32) local_unnamed_addr
577
578; CHECK-SPIRV: OpFunction
579; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
580; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr  %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
581; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
582; CHECK-SPIRV: OpFunctionEnd
583
584define dso_local spir_kernel void @testClusteredBitwiseShort(i16 addrspace(1)* nocapture) local_unnamed_addr {
585  %2 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_andsj(i16 signext 0, i32 2)
586  store i16 %2, i16 addrspace(1)* %0, align 2
587  %3 = tail call spir_func signext i16 @_Z29sub_group_clustered_reduce_orsj(i16 signext 0, i32 2)
588  %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1
589  store i16 %3, i16 addrspace(1)* %4, align 2
590  %5 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_xorsj(i16 signext 0, i32 2)
591  %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2
592  store i16 %5, i16 addrspace(1)* %6, align 2
593  ret void
594}
595
596declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_andsj(i16 signext, i32) local_unnamed_addr
597
598declare dso_local spir_func signext i16 @_Z29sub_group_clustered_reduce_orsj(i16 signext, i32) local_unnamed_addr
599
600declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_xorsj(i16 signext, i32) local_unnamed_addr
601
602; CHECK-SPIRV: OpFunction
603; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
604; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr  %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
605; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]]
606; CHECK-SPIRV: OpFunctionEnd
607
608define dso_local spir_kernel void @testClusteredBitwiseUShort(i16 addrspace(1)* nocapture) local_unnamed_addr {
609  %2 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_andtj(i16 zeroext 0, i32 2)
610  store i16 %2, i16 addrspace(1)* %0, align 2
611  %3 = tail call spir_func zeroext i16 @_Z29sub_group_clustered_reduce_ortj(i16 zeroext 0, i32 2)
612  %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1
613  store i16 %3, i16 addrspace(1)* %4, align 2
614  %5 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_xortj(i16 zeroext 0, i32 2)
615  %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2
616  store i16 %5, i16 addrspace(1)* %6, align 2
617  ret void
618}
619
620declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_andtj(i16 zeroext, i32) local_unnamed_addr
621
622declare dso_local spir_func zeroext i16 @_Z29sub_group_clustered_reduce_ortj(i16 zeroext, i32) local_unnamed_addr
623
624declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_xortj(i16 zeroext, i32) local_unnamed_addr
625
626; CHECK-SPIRV: OpFunction
627; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
628; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr  %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
629; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
630; CHECK-SPIRV: OpFunctionEnd
631
632define dso_local spir_kernel void @testClusteredBitwiseInt(i32 addrspace(1)* nocapture) local_unnamed_addr {
633  %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_andij(i32 0, i32 2)
634  store i32 %2, i32 addrspace(1)* %0, align 4
635  %3 = tail call spir_func i32 @_Z29sub_group_clustered_reduce_orij(i32 0, i32 2)
636  %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1
637  store i32 %3, i32 addrspace(1)* %4, align 4
638  %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_xorij(i32 0, i32 2)
639  %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2
640  store i32 %5, i32 addrspace(1)* %6, align 4
641  ret void
642}
643
644declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_andij(i32, i32) local_unnamed_addr
645
646declare dso_local spir_func i32 @_Z29sub_group_clustered_reduce_orij(i32, i32) local_unnamed_addr
647
648declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_xorij(i32, i32) local_unnamed_addr
649
650; CHECK-SPIRV: OpFunction
651; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
652; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr  %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
653; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]]
654; CHECK-SPIRV: OpFunctionEnd
655
656define dso_local spir_kernel void @testClusteredBitwiseUInt(i32 addrspace(1)* nocapture) local_unnamed_addr {
657  %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_andjj(i32 0, i32 2)
658  store i32 %2, i32 addrspace(1)* %0, align 4
659  %3 = tail call spir_func i32 @_Z29sub_group_clustered_reduce_orjj(i32 0, i32 2)
660  %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1
661  store i32 %3, i32 addrspace(1)* %4, align 4
662  %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_xorjj(i32 0, i32 2)
663  %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2
664  store i32 %5, i32 addrspace(1)* %6, align 4
665  ret void
666}
667
668declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_andjj(i32, i32) local_unnamed_addr
669
670declare dso_local spir_func i32 @_Z29sub_group_clustered_reduce_orjj(i32, i32) local_unnamed_addr
671
672declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_xorjj(i32, i32) local_unnamed_addr
673
674; CHECK-SPIRV: OpFunction
675; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
676; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr  %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
677; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
678; CHECK-SPIRV: OpFunctionEnd
679
680define dso_local spir_kernel void @testClusteredBitwiseLong(i64 addrspace(1)* nocapture) local_unnamed_addr {
681  %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_andlj(i64 0, i32 2)
682  store i64 %2, i64 addrspace(1)* %0, align 8
683  %3 = tail call spir_func i64 @_Z29sub_group_clustered_reduce_orlj(i64 0, i32 2)
684  %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1
685  store i64 %3, i64 addrspace(1)* %4, align 8
686  %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_xorlj(i64 0, i32 2)
687  %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2
688  store i64 %5, i64 addrspace(1)* %6, align 8
689  ret void
690}
691
692declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_andlj(i64, i32) local_unnamed_addr
693
694declare dso_local spir_func i64 @_Z29sub_group_clustered_reduce_orlj(i64, i32) local_unnamed_addr
695
696declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_xorlj(i64, i32) local_unnamed_addr
697
698; CHECK-SPIRV: OpFunction
699; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
700; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr  %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
701; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]]
702; CHECK-SPIRV: OpFunctionEnd
703
704define dso_local spir_kernel void @testClusteredBitwiseULong(i64 addrspace(1)* nocapture) local_unnamed_addr {
705  %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_andmj(i64 0, i32 2)
706  store i64 %2, i64 addrspace(1)* %0, align 8
707  %3 = tail call spir_func i64 @_Z29sub_group_clustered_reduce_ormj(i64 0, i32 2)
708  %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1
709  store i64 %3, i64 addrspace(1)* %4, align 8
710  %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_xormj(i64 0, i32 2)
711  %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2
712  store i64 %5, i64 addrspace(1)* %6, align 8
713  ret void
714}
715
716declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_andmj(i64, i32) local_unnamed_addr
717
718declare dso_local spir_func i64 @_Z29sub_group_clustered_reduce_ormj(i64, i32) local_unnamed_addr
719
720declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_xormj(i64, i32) local_unnamed_addr
721
722; CHECK-SPIRV: OpFunction
723; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]]
724; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalOr  %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]]
725; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]]
726; CHECK-SPIRV: OpFunctionEnd
727
728define dso_local spir_kernel void @testClusteredLogical(i32 addrspace(1)* nocapture) local_unnamed_addr {
729  %2 = tail call spir_func i32 @_Z38sub_group_clustered_reduce_logical_andij(i32 0, i32 2)
730  store i32 %2, i32 addrspace(1)* %0, align 4
731  %3 = tail call spir_func i32 @_Z37sub_group_clustered_reduce_logical_orij(i32 0, i32 2)
732  %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1
733  store i32 %3, i32 addrspace(1)* %4, align 4
734  %5 = tail call spir_func i32 @_Z38sub_group_clustered_reduce_logical_xorij(i32 0, i32 2)
735  %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2
736  store i32 %5, i32 addrspace(1)* %6, align 4
737  ret void
738}
739
740declare dso_local spir_func i32 @_Z38sub_group_clustered_reduce_logical_andij(i32, i32) local_unnamed_addr
741
742declare dso_local spir_func i32 @_Z37sub_group_clustered_reduce_logical_orij(i32, i32) local_unnamed_addr
743
744declare dso_local spir_func i32 @_Z38sub_group_clustered_reduce_logical_xorij(i32, i32) local_unnamed_addr
745