1;; #pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable 2;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable 3;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable 4;; 5;; kernel void testClusteredArithmeticChar(global char* dst) 6;; { 7;; char v = 0; 8;; dst[0] = sub_group_clustered_reduce_add(v, 2); 9;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 10;; dst[2] = sub_group_clustered_reduce_min(v, 2); 11;; dst[3] = sub_group_clustered_reduce_max(v, 2); 12;; } 13;; 14;; kernel void testClusteredArithmeticUChar(global uchar* dst) 15;; { 16;; uchar v = 0; 17;; dst[0] = sub_group_clustered_reduce_add(v, 2); 18;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 19;; dst[2] = sub_group_clustered_reduce_min(v, 2); 20;; dst[3] = sub_group_clustered_reduce_max(v, 2); 21;; } 22;; 23;; kernel void testClusteredArithmeticShort(global short* dst) 24;; { 25;; short v = 0; 26;; dst[0] = sub_group_clustered_reduce_add(v, 2); 27;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 28;; dst[2] = sub_group_clustered_reduce_min(v, 2); 29;; dst[3] = sub_group_clustered_reduce_max(v, 2); 30;; } 31;; 32;; kernel void testClusteredArithmeticUShort(global ushort* dst) 33;; { 34;; ushort v = 0; 35;; dst[0] = sub_group_clustered_reduce_add(v, 2); 36;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 37;; dst[2] = sub_group_clustered_reduce_min(v, 2); 38;; dst[3] = sub_group_clustered_reduce_max(v, 2); 39;; } 40;; 41;; kernel void testClusteredArithmeticInt(global int* dst) 42;; { 43;; int v = 0; 44;; dst[0] = sub_group_clustered_reduce_add(v, 2); 45;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 46;; dst[2] = sub_group_clustered_reduce_min(v, 2); 47;; dst[3] = sub_group_clustered_reduce_max(v, 2); 48;; } 49;; 50;; kernel void testClusteredArithmeticUInt(global uint* dst) 51;; { 52;; uint v = 0; 53;; dst[0] = sub_group_clustered_reduce_add(v, 2); 54;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 55;; dst[2] = sub_group_clustered_reduce_min(v, 2); 56;; dst[3] = sub_group_clustered_reduce_max(v, 2); 57;; } 58;; 59;; kernel void testClusteredArithmeticLong(global long* dst) 60;; { 61;; long v = 0; 62;; dst[0] = sub_group_clustered_reduce_add(v, 2); 63;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 64;; dst[2] = sub_group_clustered_reduce_min(v, 2); 65;; dst[3] = sub_group_clustered_reduce_max(v, 2); 66;; } 67;; 68;; kernel void testClusteredArithmeticULong(global ulong* dst) 69;; { 70;; ulong v = 0; 71;; dst[0] = sub_group_clustered_reduce_add(v, 2); 72;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 73;; dst[2] = sub_group_clustered_reduce_min(v, 2); 74;; dst[3] = sub_group_clustered_reduce_max(v, 2); 75;; } 76;; 77;; kernel void testClusteredArithmeticFloat(global float* dst) 78;; { 79;; float v = 0; 80;; dst[0] = sub_group_clustered_reduce_add(v, 2); 81;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 82;; dst[2] = sub_group_clustered_reduce_min(v, 2); 83;; dst[3] = sub_group_clustered_reduce_max(v, 2); 84;; } 85;; 86;; kernel void testClusteredArithmeticHalf(global half* dst) 87;; { 88;; half v = 0; 89;; dst[0] = sub_group_clustered_reduce_add(v, 2); 90;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 91;; dst[2] = sub_group_clustered_reduce_min(v, 2); 92;; dst[3] = sub_group_clustered_reduce_max(v, 2); 93;; } 94;; 95;; kernel void testClusteredArithmeticDouble(global double* dst) 96;; { 97;; double v = 0; 98;; dst[0] = sub_group_clustered_reduce_add(v, 2); 99;; dst[1] = sub_group_clustered_reduce_mul(v, 2); 100;; dst[2] = sub_group_clustered_reduce_min(v, 2); 101;; dst[3] = sub_group_clustered_reduce_max(v, 2); 102;; } 103;; 104;; kernel void testClusteredBitwiseChar(global char* dst) 105;; { 106;; char v = 0; 107;; dst[0] = sub_group_clustered_reduce_and(v, 2); 108;; dst[1] = sub_group_clustered_reduce_or(v, 2); 109;; dst[2] = sub_group_clustered_reduce_xor(v, 2); 110;; } 111;; 112;; kernel void testClusteredBitwiseUChar(global uchar* dst) 113;; { 114;; uchar v = 0; 115;; dst[0] = sub_group_clustered_reduce_and(v, 2); 116;; dst[1] = sub_group_clustered_reduce_or(v, 2); 117;; dst[2] = sub_group_clustered_reduce_xor(v, 2); 118;; } 119;; 120;; kernel void testClusteredBitwiseShort(global short* dst) 121;; { 122;; short v = 0; 123;; dst[0] = sub_group_clustered_reduce_and(v, 2); 124;; dst[1] = sub_group_clustered_reduce_or(v, 2); 125;; dst[2] = sub_group_clustered_reduce_xor(v, 2); 126;; } 127;; 128;; kernel void testClusteredBitwiseUShort(global ushort* dst) 129;; { 130;; ushort v = 0; 131;; dst[0] = sub_group_clustered_reduce_and(v, 2); 132;; dst[1] = sub_group_clustered_reduce_or(v, 2); 133;; dst[2] = sub_group_clustered_reduce_xor(v, 2); 134;; } 135;; 136;; kernel void testClusteredBitwiseInt(global int* dst) 137;; { 138;; int v = 0; 139;; dst[0] = sub_group_clustered_reduce_and(v, 2); 140;; dst[1] = sub_group_clustered_reduce_or(v, 2); 141;; dst[2] = sub_group_clustered_reduce_xor(v, 2); 142;; } 143;; 144;; kernel void testClusteredBitwiseUInt(global uint* dst) 145;; { 146;; uint v = 0; 147;; dst[0] = sub_group_clustered_reduce_and(v, 2); 148;; dst[1] = sub_group_clustered_reduce_or(v, 2); 149;; dst[2] = sub_group_clustered_reduce_xor(v, 2); 150;; } 151;; 152;; kernel void testClusteredBitwiseLong(global long* dst) 153;; { 154;; long v = 0; 155;; dst[0] = sub_group_clustered_reduce_and(v, 2); 156;; dst[1] = sub_group_clustered_reduce_or(v, 2); 157;; dst[2] = sub_group_clustered_reduce_xor(v, 2); 158;; } 159;; 160;; kernel void testClusteredBitwiseULong(global ulong* dst) 161;; { 162;; ulong v = 0; 163;; dst[0] = sub_group_clustered_reduce_and(v, 2); 164;; dst[1] = sub_group_clustered_reduce_or(v, 2); 165;; dst[2] = sub_group_clustered_reduce_xor(v, 2); 166;; } 167;; 168;; kernel void testClusteredLogical(global int* dst) 169;; { 170;; int v = 0; 171;; dst[0] = sub_group_clustered_reduce_logical_and(v, 2); 172;; dst[1] = sub_group_clustered_reduce_logical_or(v, 2); 173;; dst[2] = sub_group_clustered_reduce_logical_xor(v, 2); 174;; } 175 176; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV 177 178; CHECK-SPIRV-DAG: OpCapability GroupNonUniformClustered 179 180; CHECK-SPIRV-DAG: %[[#bool:]] = OpTypeBool 181; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 182; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 183; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 184; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 185; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 186; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 187; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 188 189; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]] 190; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 191; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 192; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 193; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 194; CHECK-SPIRV-DAG: %[[#int_2:]] = OpConstant %[[#int]] 2 195; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] 196; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 197; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 198; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 199 200; CHECK-SPIRV: OpFunction 201; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 202; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 203; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 204; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 205; CHECK-SPIRV: OpFunctionEnd 206 207define dso_local spir_kernel void @testClusteredArithmeticChar(i8 addrspace(1)* nocapture) local_unnamed_addr { 208 %2 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_addcj(i8 signext 0, i32 2) 209 store i8 %2, i8 addrspace(1)* %0, align 1 210 %3 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_mulcj(i8 signext 0, i32 2) 211 %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 212 store i8 %3, i8 addrspace(1)* %4, align 1 213 %5 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_mincj(i8 signext 0, i32 2) 214 %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 215 store i8 %5, i8 addrspace(1)* %6, align 1 216 %7 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_maxcj(i8 signext 0, i32 2) 217 %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 218 store i8 %7, i8 addrspace(1)* %8, align 1 219 ret void 220} 221 222declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_addcj(i8 signext, i32) local_unnamed_addr 223 224declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_mulcj(i8 signext, i32) local_unnamed_addr 225 226declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_mincj(i8 signext, i32) local_unnamed_addr 227 228declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_maxcj(i8 signext, i32) local_unnamed_addr 229 230; CHECK-SPIRV: OpFunction 231; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 232; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 233; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 234; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 235; CHECK-SPIRV: OpFunctionEnd 236 237define dso_local spir_kernel void @testClusteredArithmeticUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { 238 %2 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_addhj(i8 zeroext 0, i32 2) 239 store i8 %2, i8 addrspace(1)* %0, align 1 240 %3 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_mulhj(i8 zeroext 0, i32 2) 241 %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 242 store i8 %3, i8 addrspace(1)* %4, align 1 243 %5 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_minhj(i8 zeroext 0, i32 2) 244 %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 245 store i8 %5, i8 addrspace(1)* %6, align 1 246 %7 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_maxhj(i8 zeroext 0, i32 2) 247 %8 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 3 248 store i8 %7, i8 addrspace(1)* %8, align 1 249 ret void 250} 251 252declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_addhj(i8 zeroext, i32) local_unnamed_addr 253 254declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_mulhj(i8 zeroext, i32) local_unnamed_addr 255 256declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_minhj(i8 zeroext, i32) local_unnamed_addr 257 258declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_maxhj(i8 zeroext, i32) local_unnamed_addr 259 260; CHECK-SPIRV: OpFunction 261; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 262; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 263; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 264; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 265; CHECK-SPIRV: OpFunctionEnd 266 267define dso_local spir_kernel void @testClusteredArithmeticShort(i16 addrspace(1)* nocapture) local_unnamed_addr { 268 %2 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_addsj(i16 signext 0, i32 2) 269 store i16 %2, i16 addrspace(1)* %0, align 2 270 %3 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_mulsj(i16 signext 0, i32 2) 271 %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 272 store i16 %3, i16 addrspace(1)* %4, align 2 273 %5 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_minsj(i16 signext 0, i32 2) 274 %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 275 store i16 %5, i16 addrspace(1)* %6, align 2 276 %7 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_maxsj(i16 signext 0, i32 2) 277 %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 278 store i16 %7, i16 addrspace(1)* %8, align 2 279 ret void 280} 281 282declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_addsj(i16 signext, i32) local_unnamed_addr 283 284declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_mulsj(i16 signext, i32) local_unnamed_addr 285 286declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_minsj(i16 signext, i32) local_unnamed_addr 287 288declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_maxsj(i16 signext, i32) local_unnamed_addr 289 290; CHECK-SPIRV: OpFunction 291; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 292; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 293; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 294; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 295; CHECK-SPIRV: OpFunctionEnd 296 297define dso_local spir_kernel void @testClusteredArithmeticUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { 298 %2 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_addtj(i16 zeroext 0, i32 2) 299 store i16 %2, i16 addrspace(1)* %0, align 2 300 %3 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_multj(i16 zeroext 0, i32 2) 301 %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 302 store i16 %3, i16 addrspace(1)* %4, align 2 303 %5 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_mintj(i16 zeroext 0, i32 2) 304 %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 305 store i16 %5, i16 addrspace(1)* %6, align 2 306 %7 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_maxtj(i16 zeroext 0, i32 2) 307 %8 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 3 308 store i16 %7, i16 addrspace(1)* %8, align 2 309 ret void 310} 311 312declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_addtj(i16 zeroext, i32) local_unnamed_addr 313 314declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_multj(i16 zeroext, i32) local_unnamed_addr 315 316declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_mintj(i16 zeroext, i32) local_unnamed_addr 317 318declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_maxtj(i16 zeroext, i32) local_unnamed_addr 319 320; CHECK-SPIRV: OpFunction 321; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 322; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 323; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 324; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 325; CHECK-SPIRV: OpFunctionEnd 326 327define dso_local spir_kernel void @testClusteredArithmeticInt(i32 addrspace(1)* nocapture) local_unnamed_addr { 328 %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_addij(i32 0, i32 2) 329 store i32 %2, i32 addrspace(1)* %0, align 4 330 %3 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_mulij(i32 0, i32 2) 331 %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 332 store i32 %3, i32 addrspace(1)* %4, align 4 333 %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_minij(i32 0, i32 2) 334 %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 335 store i32 %5, i32 addrspace(1)* %6, align 4 336 %7 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_maxij(i32 0, i32 2) 337 %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 338 store i32 %7, i32 addrspace(1)* %8, align 4 339 ret void 340} 341 342declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_addij(i32, i32) local_unnamed_addr 343 344declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_mulij(i32, i32) local_unnamed_addr 345 346declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_minij(i32, i32) local_unnamed_addr 347 348declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_maxij(i32, i32) local_unnamed_addr 349 350; CHECK-SPIRV: OpFunction 351; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 352; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 353; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 354; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 355; CHECK-SPIRV: OpFunctionEnd 356 357define dso_local spir_kernel void @testClusteredArithmeticUInt(i32 addrspace(1)* nocapture) local_unnamed_addr { 358 %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_addjj(i32 0, i32 2) 359 store i32 %2, i32 addrspace(1)* %0, align 4 360 %3 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_muljj(i32 0, i32 2) 361 %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 362 store i32 %3, i32 addrspace(1)* %4, align 4 363 %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_minjj(i32 0, i32 2) 364 %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 365 store i32 %5, i32 addrspace(1)* %6, align 4 366 %7 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_maxjj(i32 0, i32 2) 367 %8 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 368 store i32 %7, i32 addrspace(1)* %8, align 4 369 ret void 370} 371 372declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_addjj(i32, i32) local_unnamed_addr 373 374declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_muljj(i32, i32) local_unnamed_addr 375 376declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_minjj(i32, i32) local_unnamed_addr 377 378declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_maxjj(i32, i32) local_unnamed_addr 379 380; CHECK-SPIRV: OpFunction 381; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 382; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 383; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMin %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 384; CHECK-SPIRV: %[[#]] = OpGroupNonUniformSMax %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 385; CHECK-SPIRV: OpFunctionEnd 386 387define dso_local spir_kernel void @testClusteredArithmeticLong(i64 addrspace(1)* nocapture) local_unnamed_addr { 388 %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_addlj(i64 0, i32 2) 389 store i64 %2, i64 addrspace(1)* %0, align 8 390 %3 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_mullj(i64 0, i32 2) 391 %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 392 store i64 %3, i64 addrspace(1)* %4, align 8 393 %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_minlj(i64 0, i32 2) 394 %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 395 store i64 %5, i64 addrspace(1)* %6, align 8 396 %7 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_maxlj(i64 0, i32 2) 397 %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3 398 store i64 %7, i64 addrspace(1)* %8, align 8 399 ret void 400} 401 402declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_addlj(i64, i32) local_unnamed_addr 403 404declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_mullj(i64, i32) local_unnamed_addr 405 406declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_minlj(i64, i32) local_unnamed_addr 407 408declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_maxlj(i64, i32) local_unnamed_addr 409 410; CHECK-SPIRV: OpFunction 411; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIAdd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 412; CHECK-SPIRV: %[[#]] = OpGroupNonUniformIMul %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 413; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMin %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 414; CHECK-SPIRV: %[[#]] = OpGroupNonUniformUMax %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 415; CHECK-SPIRV: OpFunctionEnd 416 417define dso_local spir_kernel void @testClusteredArithmeticULong(i64 addrspace(1)* nocapture) local_unnamed_addr { 418 %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_addmj(i64 0, i32 2) 419 store i64 %2, i64 addrspace(1)* %0, align 8 420 %3 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_mulmj(i64 0, i32 2) 421 %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 422 store i64 %3, i64 addrspace(1)* %4, align 8 423 %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_minmj(i64 0, i32 2) 424 %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 425 store i64 %5, i64 addrspace(1)* %6, align 8 426 %7 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_maxmj(i64 0, i32 2) 427 %8 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 3 428 store i64 %7, i64 addrspace(1)* %8, align 8 429 ret void 430} 431 432declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_addmj(i64, i32) local_unnamed_addr 433 434declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_mulmj(i64, i32) local_unnamed_addr 435 436declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_minmj(i64, i32) local_unnamed_addr 437 438declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_maxmj(i64, i32) local_unnamed_addr 439 440; CHECK-SPIRV: OpFunction 441; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]] 442; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]] 443; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]] 444; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#float]] %[[#ScopeSubgroup]] ClusteredReduce %[[#float_0]] %[[#int_2]] 445; CHECK-SPIRV: OpFunctionEnd 446 447define dso_local spir_kernel void @testClusteredArithmeticFloat(float addrspace(1)* nocapture) local_unnamed_addr { 448 %2 = tail call spir_func float @_Z30sub_group_clustered_reduce_addfj(float 0.000000e+00, i32 2) 449 store float %2, float addrspace(1)* %0, align 4 450 %3 = tail call spir_func float @_Z30sub_group_clustered_reduce_mulfj(float 0.000000e+00, i32 2) 451 %4 = getelementptr inbounds float, float addrspace(1)* %0, i64 1 452 store float %3, float addrspace(1)* %4, align 4 453 %5 = tail call spir_func float @_Z30sub_group_clustered_reduce_minfj(float 0.000000e+00, i32 2) 454 %6 = getelementptr inbounds float, float addrspace(1)* %0, i64 2 455 store float %5, float addrspace(1)* %6, align 4 456 %7 = tail call spir_func float @_Z30sub_group_clustered_reduce_maxfj(float 0.000000e+00, i32 2) 457 %8 = getelementptr inbounds float, float addrspace(1)* %0, i64 3 458 store float %7, float addrspace(1)* %8, align 4 459 ret void 460} 461 462declare dso_local spir_func float @_Z30sub_group_clustered_reduce_addfj(float, i32) local_unnamed_addr 463 464declare dso_local spir_func float @_Z30sub_group_clustered_reduce_mulfj(float, i32) local_unnamed_addr 465 466declare dso_local spir_func float @_Z30sub_group_clustered_reduce_minfj(float, i32) local_unnamed_addr 467 468declare dso_local spir_func float @_Z30sub_group_clustered_reduce_maxfj(float, i32) local_unnamed_addr 469 470; CHECK-SPIRV: OpFunction 471; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]] 472; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]] 473; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]] 474; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#half]] %[[#ScopeSubgroup]] ClusteredReduce %[[#half_0]] %[[#int_2]] 475; CHECK-SPIRV: OpFunctionEnd 476 477define dso_local spir_kernel void @testClusteredArithmeticHalf(half addrspace(1)* nocapture) local_unnamed_addr { 478 %2 = tail call spir_func half @_Z30sub_group_clustered_reduce_addDhj(half 0xH0000, i32 2) 479 store half %2, half addrspace(1)* %0, align 2 480 %3 = tail call spir_func half @_Z30sub_group_clustered_reduce_mulDhj(half 0xH0000, i32 2) 481 %4 = getelementptr inbounds half, half addrspace(1)* %0, i64 1 482 store half %3, half addrspace(1)* %4, align 2 483 %5 = tail call spir_func half @_Z30sub_group_clustered_reduce_minDhj(half 0xH0000, i32 2) 484 %6 = getelementptr inbounds half, half addrspace(1)* %0, i64 2 485 store half %5, half addrspace(1)* %6, align 2 486 %7 = tail call spir_func half @_Z30sub_group_clustered_reduce_maxDhj(half 0xH0000, i32 2) 487 %8 = getelementptr inbounds half, half addrspace(1)* %0, i64 3 488 store half %7, half addrspace(1)* %8, align 2 489 ret void 490} 491 492declare dso_local spir_func half @_Z30sub_group_clustered_reduce_addDhj(half, i32) local_unnamed_addr 493 494declare dso_local spir_func half @_Z30sub_group_clustered_reduce_mulDhj(half, i32) local_unnamed_addr 495 496declare dso_local spir_func half @_Z30sub_group_clustered_reduce_minDhj(half, i32) local_unnamed_addr 497 498declare dso_local spir_func half @_Z30sub_group_clustered_reduce_maxDhj(half, i32) local_unnamed_addr 499 500; CHECK-SPIRV: OpFunction 501; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFAdd %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]] 502; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMul %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]] 503; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMin %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]] 504; CHECK-SPIRV: %[[#]] = OpGroupNonUniformFMax %[[#double]] %[[#ScopeSubgroup]] ClusteredReduce %[[#double_0]] %[[#int_2]] 505; CHECK-SPIRV: OpFunctionEnd 506 507define dso_local spir_kernel void @testClusteredArithmeticDouble(double addrspace(1)* nocapture) local_unnamed_addr { 508 %2 = tail call spir_func double @_Z30sub_group_clustered_reduce_adddj(double 0.000000e+00, i32 2) 509 store double %2, double addrspace(1)* %0, align 8 510 %3 = tail call spir_func double @_Z30sub_group_clustered_reduce_muldj(double 0.000000e+00, i32 2) 511 %4 = getelementptr inbounds double, double addrspace(1)* %0, i64 1 512 store double %3, double addrspace(1)* %4, align 8 513 %5 = tail call spir_func double @_Z30sub_group_clustered_reduce_mindj(double 0.000000e+00, i32 2) 514 %6 = getelementptr inbounds double, double addrspace(1)* %0, i64 2 515 store double %5, double addrspace(1)* %6, align 8 516 %7 = tail call spir_func double @_Z30sub_group_clustered_reduce_maxdj(double 0.000000e+00, i32 2) 517 %8 = getelementptr inbounds double, double addrspace(1)* %0, i64 3 518 store double %7, double addrspace(1)* %8, align 8 519 ret void 520} 521 522declare dso_local spir_func double @_Z30sub_group_clustered_reduce_adddj(double, i32) local_unnamed_addr 523 524declare dso_local spir_func double @_Z30sub_group_clustered_reduce_muldj(double, i32) local_unnamed_addr 525 526declare dso_local spir_func double @_Z30sub_group_clustered_reduce_mindj(double, i32) local_unnamed_addr 527 528declare dso_local spir_func double @_Z30sub_group_clustered_reduce_maxdj(double, i32) local_unnamed_addr 529 530; CHECK-SPIRV: OpFunction 531; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 532; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 533; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 534; CHECK-SPIRV: OpFunctionEnd 535 536define dso_local spir_kernel void @testClusteredBitwiseChar(i8 addrspace(1)* nocapture) local_unnamed_addr { 537 %2 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_andcj(i8 signext 0, i32 2) 538 store i8 %2, i8 addrspace(1)* %0, align 1 539 %3 = tail call spir_func signext i8 @_Z29sub_group_clustered_reduce_orcj(i8 signext 0, i32 2) 540 %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 541 store i8 %3, i8 addrspace(1)* %4, align 1 542 %5 = tail call spir_func signext i8 @_Z30sub_group_clustered_reduce_xorcj(i8 signext 0, i32 2) 543 %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 544 store i8 %5, i8 addrspace(1)* %6, align 1 545 ret void 546} 547 548declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_andcj(i8 signext, i32) local_unnamed_addr 549 550declare dso_local spir_func signext i8 @_Z29sub_group_clustered_reduce_orcj(i8 signext, i32) local_unnamed_addr 551 552declare dso_local spir_func signext i8 @_Z30sub_group_clustered_reduce_xorcj(i8 signext, i32) local_unnamed_addr 553 554; CHECK-SPIRV: OpFunction 555; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 556; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 557; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#char]] %[[#ScopeSubgroup]] ClusteredReduce %[[#char_0]] %[[#int_2]] 558; CHECK-SPIRV: OpFunctionEnd 559 560define dso_local spir_kernel void @testClusteredBitwiseUChar(i8 addrspace(1)* nocapture) local_unnamed_addr { 561 %2 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_andhj(i8 zeroext 0, i32 2) 562 store i8 %2, i8 addrspace(1)* %0, align 1 563 %3 = tail call spir_func zeroext i8 @_Z29sub_group_clustered_reduce_orhj(i8 zeroext 0, i32 2) 564 %4 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 1 565 store i8 %3, i8 addrspace(1)* %4, align 1 566 %5 = tail call spir_func zeroext i8 @_Z30sub_group_clustered_reduce_xorhj(i8 zeroext 0, i32 2) 567 %6 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 2 568 store i8 %5, i8 addrspace(1)* %6, align 1 569 ret void 570} 571 572declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_andhj(i8 zeroext, i32) local_unnamed_addr 573 574declare dso_local spir_func zeroext i8 @_Z29sub_group_clustered_reduce_orhj(i8 zeroext, i32) local_unnamed_addr 575 576declare dso_local spir_func zeroext i8 @_Z30sub_group_clustered_reduce_xorhj(i8 zeroext, i32) local_unnamed_addr 577 578; CHECK-SPIRV: OpFunction 579; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 580; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 581; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 582; CHECK-SPIRV: OpFunctionEnd 583 584define dso_local spir_kernel void @testClusteredBitwiseShort(i16 addrspace(1)* nocapture) local_unnamed_addr { 585 %2 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_andsj(i16 signext 0, i32 2) 586 store i16 %2, i16 addrspace(1)* %0, align 2 587 %3 = tail call spir_func signext i16 @_Z29sub_group_clustered_reduce_orsj(i16 signext 0, i32 2) 588 %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 589 store i16 %3, i16 addrspace(1)* %4, align 2 590 %5 = tail call spir_func signext i16 @_Z30sub_group_clustered_reduce_xorsj(i16 signext 0, i32 2) 591 %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 592 store i16 %5, i16 addrspace(1)* %6, align 2 593 ret void 594} 595 596declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_andsj(i16 signext, i32) local_unnamed_addr 597 598declare dso_local spir_func signext i16 @_Z29sub_group_clustered_reduce_orsj(i16 signext, i32) local_unnamed_addr 599 600declare dso_local spir_func signext i16 @_Z30sub_group_clustered_reduce_xorsj(i16 signext, i32) local_unnamed_addr 601 602; CHECK-SPIRV: OpFunction 603; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 604; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 605; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#short]] %[[#ScopeSubgroup]] ClusteredReduce %[[#short_0]] %[[#int_2]] 606; CHECK-SPIRV: OpFunctionEnd 607 608define dso_local spir_kernel void @testClusteredBitwiseUShort(i16 addrspace(1)* nocapture) local_unnamed_addr { 609 %2 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_andtj(i16 zeroext 0, i32 2) 610 store i16 %2, i16 addrspace(1)* %0, align 2 611 %3 = tail call spir_func zeroext i16 @_Z29sub_group_clustered_reduce_ortj(i16 zeroext 0, i32 2) 612 %4 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 1 613 store i16 %3, i16 addrspace(1)* %4, align 2 614 %5 = tail call spir_func zeroext i16 @_Z30sub_group_clustered_reduce_xortj(i16 zeroext 0, i32 2) 615 %6 = getelementptr inbounds i16, i16 addrspace(1)* %0, i64 2 616 store i16 %5, i16 addrspace(1)* %6, align 2 617 ret void 618} 619 620declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_andtj(i16 zeroext, i32) local_unnamed_addr 621 622declare dso_local spir_func zeroext i16 @_Z29sub_group_clustered_reduce_ortj(i16 zeroext, i32) local_unnamed_addr 623 624declare dso_local spir_func zeroext i16 @_Z30sub_group_clustered_reduce_xortj(i16 zeroext, i32) local_unnamed_addr 625 626; CHECK-SPIRV: OpFunction 627; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 628; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 629; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 630; CHECK-SPIRV: OpFunctionEnd 631 632define dso_local spir_kernel void @testClusteredBitwiseInt(i32 addrspace(1)* nocapture) local_unnamed_addr { 633 %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_andij(i32 0, i32 2) 634 store i32 %2, i32 addrspace(1)* %0, align 4 635 %3 = tail call spir_func i32 @_Z29sub_group_clustered_reduce_orij(i32 0, i32 2) 636 %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 637 store i32 %3, i32 addrspace(1)* %4, align 4 638 %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_xorij(i32 0, i32 2) 639 %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 640 store i32 %5, i32 addrspace(1)* %6, align 4 641 ret void 642} 643 644declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_andij(i32, i32) local_unnamed_addr 645 646declare dso_local spir_func i32 @_Z29sub_group_clustered_reduce_orij(i32, i32) local_unnamed_addr 647 648declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_xorij(i32, i32) local_unnamed_addr 649 650; CHECK-SPIRV: OpFunction 651; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 652; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 653; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#int]] %[[#ScopeSubgroup]] ClusteredReduce %[[#int_0]] %[[#int_2]] 654; CHECK-SPIRV: OpFunctionEnd 655 656define dso_local spir_kernel void @testClusteredBitwiseUInt(i32 addrspace(1)* nocapture) local_unnamed_addr { 657 %2 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_andjj(i32 0, i32 2) 658 store i32 %2, i32 addrspace(1)* %0, align 4 659 %3 = tail call spir_func i32 @_Z29sub_group_clustered_reduce_orjj(i32 0, i32 2) 660 %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 661 store i32 %3, i32 addrspace(1)* %4, align 4 662 %5 = tail call spir_func i32 @_Z30sub_group_clustered_reduce_xorjj(i32 0, i32 2) 663 %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 664 store i32 %5, i32 addrspace(1)* %6, align 4 665 ret void 666} 667 668declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_andjj(i32, i32) local_unnamed_addr 669 670declare dso_local spir_func i32 @_Z29sub_group_clustered_reduce_orjj(i32, i32) local_unnamed_addr 671 672declare dso_local spir_func i32 @_Z30sub_group_clustered_reduce_xorjj(i32, i32) local_unnamed_addr 673 674; CHECK-SPIRV: OpFunction 675; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 676; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 677; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 678; CHECK-SPIRV: OpFunctionEnd 679 680define dso_local spir_kernel void @testClusteredBitwiseLong(i64 addrspace(1)* nocapture) local_unnamed_addr { 681 %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_andlj(i64 0, i32 2) 682 store i64 %2, i64 addrspace(1)* %0, align 8 683 %3 = tail call spir_func i64 @_Z29sub_group_clustered_reduce_orlj(i64 0, i32 2) 684 %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 685 store i64 %3, i64 addrspace(1)* %4, align 8 686 %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_xorlj(i64 0, i32 2) 687 %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 688 store i64 %5, i64 addrspace(1)* %6, align 8 689 ret void 690} 691 692declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_andlj(i64, i32) local_unnamed_addr 693 694declare dso_local spir_func i64 @_Z29sub_group_clustered_reduce_orlj(i64, i32) local_unnamed_addr 695 696declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_xorlj(i64, i32) local_unnamed_addr 697 698; CHECK-SPIRV: OpFunction 699; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseAnd %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 700; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseOr %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 701; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBitwiseXor %[[#long]] %[[#ScopeSubgroup]] ClusteredReduce %[[#long_0]] %[[#int_2]] 702; CHECK-SPIRV: OpFunctionEnd 703 704define dso_local spir_kernel void @testClusteredBitwiseULong(i64 addrspace(1)* nocapture) local_unnamed_addr { 705 %2 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_andmj(i64 0, i32 2) 706 store i64 %2, i64 addrspace(1)* %0, align 8 707 %3 = tail call spir_func i64 @_Z29sub_group_clustered_reduce_ormj(i64 0, i32 2) 708 %4 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 1 709 store i64 %3, i64 addrspace(1)* %4, align 8 710 %5 = tail call spir_func i64 @_Z30sub_group_clustered_reduce_xormj(i64 0, i32 2) 711 %6 = getelementptr inbounds i64, i64 addrspace(1)* %0, i64 2 712 store i64 %5, i64 addrspace(1)* %6, align 8 713 ret void 714} 715 716declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_andmj(i64, i32) local_unnamed_addr 717 718declare dso_local spir_func i64 @_Z29sub_group_clustered_reduce_ormj(i64, i32) local_unnamed_addr 719 720declare dso_local spir_func i64 @_Z30sub_group_clustered_reduce_xormj(i64, i32) local_unnamed_addr 721 722; CHECK-SPIRV: OpFunction 723; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalAnd %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]] 724; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalOr %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]] 725; CHECK-SPIRV: %[[#]] = OpGroupNonUniformLogicalXor %[[#bool]] %[[#ScopeSubgroup]] ClusteredReduce %[[#false]] %[[#int_2]] 726; CHECK-SPIRV: OpFunctionEnd 727 728define dso_local spir_kernel void @testClusteredLogical(i32 addrspace(1)* nocapture) local_unnamed_addr { 729 %2 = tail call spir_func i32 @_Z38sub_group_clustered_reduce_logical_andij(i32 0, i32 2) 730 store i32 %2, i32 addrspace(1)* %0, align 4 731 %3 = tail call spir_func i32 @_Z37sub_group_clustered_reduce_logical_orij(i32 0, i32 2) 732 %4 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 733 store i32 %3, i32 addrspace(1)* %4, align 4 734 %5 = tail call spir_func i32 @_Z38sub_group_clustered_reduce_logical_xorij(i32 0, i32 2) 735 %6 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 736 store i32 %5, i32 addrspace(1)* %6, align 4 737 ret void 738} 739 740declare dso_local spir_func i32 @_Z38sub_group_clustered_reduce_logical_andij(i32, i32) local_unnamed_addr 741 742declare dso_local spir_func i32 @_Z37sub_group_clustered_reduce_logical_orij(i32, i32) local_unnamed_addr 743 744declare dso_local spir_func i32 @_Z38sub_group_clustered_reduce_logical_xorij(i32, i32) local_unnamed_addr 745