1; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV 2; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val --target-env spv1.4 %} 3 4; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV 5; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val --target-env spv1.4 %} 6 7; CHECK-SPIRV: OpCapability Groups 8; CHECK-SPIRV-DAG: %[[#Int32Ty:]] = OpTypeInt 32 0 9; CHECK-SPIRV-DAG: %[[#Int64Ty:]] = OpTypeInt 64 0 10; CHECK-SPIRV-DAG: %[[#Float32Ty:]] = OpTypeFloat 32 11; CHECK-SPIRV-DAG: %[[#Vec2Int32Ty:]] = OpTypeVector %[[#Int32Ty]] 2 12; CHECK-SPIRV-DAG: %[[#Vec3Int32Ty:]] = OpTypeVector %[[#Int32Ty]] 3 13; CHECK-SPIRV-DAG: %[[#Vec2Int64Ty:]] = OpTypeVector %[[#Int64Ty]] 2 14; CHECK-SPIRV-DAG: %[[#C2:]] = OpConstant %[[#Int32Ty]] 2 15 16; CHECK-SPIRV: OpFunction 17; CHECK-SPIRV: %[[#Val:]] = OpFunctionParameter %[[#Int32Ty]] 18; CHECK-SPIRV: %[[#X:]] = OpFunctionParameter %[[#Int32Ty]] 19; CHECK-SPIRV: %[[#Y:]] = OpFunctionParameter %[[#Int32Ty]] 20; CHECK-SPIRV: %[[#Z:]] = OpFunctionParameter %[[#Int32Ty]] 21; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#Int32Ty]] %[[#C2]] %[[#Val]] %[[#X]] 22; CHECK-SPIRV: %[[#XY:]] = OpCompositeConstruct %[[#Vec2Int32Ty]] %[[#X]] %[[#Y]] 23; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#Int32Ty]] %[[#C2]] %[[#Val]] %[[#XY]] 24; CHECK-SPIRV: %[[#XYZ:]] = OpCompositeConstruct %[[#Vec3Int32Ty]] %[[#X]] %[[#Y]] %[[#Z]] 25; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#Int32Ty]] %[[#C2]] %[[#Val]] %[[#XYZ]] 26define spir_kernel void @test_broadcast_xyz(i32 noundef %a, i32 noundef %x, i32 noundef %y, i32 noundef %z) { 27entry: 28 %call1 = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %x) 29 %call2 = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %x, i32 noundef %y) 30 %call3 = call spir_func i32 @_Z20work_group_broadcastjj(i32 noundef %a, i32 noundef %x, i32 noundef %y, i32 noundef %z) 31 ret void 32} 33 34declare spir_func i32 @_Z20work_group_broadcastjj(i32, i32) 35declare spir_func i32 @_Z20work_group_broadcastjjj(i32, i32, i32) 36declare spir_func i32 @_Z20work_group_broadcastjjjj(i32, i32, i32, i32) 37 38; CHECK-SPIRV: OpFunction 39; CHECK-SPIRV: OpInBoundsPtrAccessChain 40; CHECK-SPIRV: %[[#LoadedVal:]] = OpLoad %[[#Float32Ty]] %[[#]] 41; CHECK-SPIRV: %[[#IdX:]] = OpCompositeExtract %[[#Int64Ty]] %[[#]] 0 42; CHECK-SPIRV: %[[#IdY:]] = OpCompositeExtract %[[#Int64Ty]] %[[#]] 1 43; CHECK-SPIRV: %[[#LocIdsVec:]] = OpCompositeConstruct %[[#Vec2Int64Ty]] %[[#IdX]] %[[#IdY]] 44; CHECK-SPIRV: %[[#]] = OpGroupBroadcast %[[#Float32Ty]] %[[#C2]] %[[#LoadedVal]] %[[#LocIdsVec]] 45define spir_kernel void @test_wg_broadcast_2D(ptr addrspace(1) %input, ptr addrspace(1) %output) #0 !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_type_qual !10 !kernel_arg_base_type !9 !spirv.ParameterDecorations !11 { 46entry: 47 %0 = call spir_func i64 @_Z13get_global_idj(i32 0) #1 48 %1 = insertelement <3 x i64> undef, i64 %0, i32 0 49 %2 = call spir_func i64 @_Z13get_global_idj(i32 1) #1 50 %3 = insertelement <3 x i64> %1, i64 %2, i32 1 51 %4 = call spir_func i64 @_Z13get_global_idj(i32 2) #1 52 %5 = insertelement <3 x i64> %3, i64 %4, i32 2 53 %call = extractelement <3 x i64> %5, i32 0 54 %6 = call spir_func i64 @_Z13get_global_idj(i32 0) #1 55 %7 = insertelement <3 x i64> undef, i64 %6, i32 0 56 %8 = call spir_func i64 @_Z13get_global_idj(i32 1) #1 57 %9 = insertelement <3 x i64> %7, i64 %8, i32 1 58 %10 = call spir_func i64 @_Z13get_global_idj(i32 2) #1 59 %11 = insertelement <3 x i64> %9, i64 %10, i32 2 60 %call1 = extractelement <3 x i64> %11, i32 1 61 %12 = call spir_func i64 @_Z12get_group_idj(i32 0) #1 62 %13 = insertelement <3 x i64> undef, i64 %12, i32 0 63 %14 = call spir_func i64 @_Z12get_group_idj(i32 1) #1 64 %15 = insertelement <3 x i64> %13, i64 %14, i32 1 65 %16 = call spir_func i64 @_Z12get_group_idj(i32 2) #1 66 %17 = insertelement <3 x i64> %15, i64 %16, i32 2 67 %call2 = extractelement <3 x i64> %17, i32 0 68 %18 = call spir_func i64 @_Z14get_local_sizej(i32 0) #1 69 %19 = insertelement <3 x i64> undef, i64 %18, i32 0 70 %20 = call spir_func i64 @_Z14get_local_sizej(i32 1) #1 71 %21 = insertelement <3 x i64> %19, i64 %20, i32 1 72 %22 = call spir_func i64 @_Z14get_local_sizej(i32 2) #1 73 %23 = insertelement <3 x i64> %21, i64 %22, i32 2 74 %call3 = extractelement <3 x i64> %23, i32 0 75 %rem = urem i64 %call2, %call3 76 %24 = call spir_func i64 @_Z12get_group_idj(i32 0) #1 77 %25 = insertelement <3 x i64> undef, i64 %24, i32 0 78 %26 = call spir_func i64 @_Z12get_group_idj(i32 1) #1 79 %27 = insertelement <3 x i64> %25, i64 %26, i32 1 80 %28 = call spir_func i64 @_Z12get_group_idj(i32 2) #1 81 %29 = insertelement <3 x i64> %27, i64 %28, i32 2 82 %call4 = extractelement <3 x i64> %29, i32 1 83 %30 = call spir_func i64 @_Z14get_local_sizej(i32 0) #1 84 %31 = insertelement <3 x i64> undef, i64 %30, i32 0 85 %32 = call spir_func i64 @_Z14get_local_sizej(i32 1) #1 86 %33 = insertelement <3 x i64> %31, i64 %32, i32 1 87 %34 = call spir_func i64 @_Z14get_local_sizej(i32 2) #1 88 %35 = insertelement <3 x i64> %33, i64 %34, i32 2 89 %call5 = extractelement <3 x i64> %35, i32 1 90 %rem6 = urem i64 %call4, %call5 91 %36 = call spir_func i64 @_Z15get_global_sizej(i32 0) #1 92 %37 = insertelement <3 x i64> undef, i64 %36, i32 0 93 %38 = call spir_func i64 @_Z15get_global_sizej(i32 1) #1 94 %39 = insertelement <3 x i64> %37, i64 %38, i32 1 95 %40 = call spir_func i64 @_Z15get_global_sizej(i32 2) #1 96 %41 = insertelement <3 x i64> %39, i64 %40, i32 2 97 %call7 = extractelement <3 x i64> %41, i32 0 98 %mul = mul i64 %call1, %call7 99 %add = add i64 %mul, %call 100 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %input, i64 %add 101 %42 = load float, ptr addrspace(1) %arrayidx, align 4 102 %.splatinsert = insertelement <2 x i64> undef, i64 %rem, i32 0 103 %.splat = shufflevector <2 x i64> %.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer 104 %43 = insertelement <2 x i64> %.splat, i64 %rem6, i32 1 105 %44 = extractelement <2 x i64> %43, i32 0 106 %45 = extractelement <2 x i64> %43, i32 1 107 %call8 = call spir_func float @_Z20work_group_broadcastfmm(float %42, i64 %44, i64 %45) #2 108 %arrayidx9 = getelementptr inbounds float, ptr addrspace(1) %output, i64 %add 109 store float %call8, ptr addrspace(1) %arrayidx9, align 4 110 ret void 111} 112 113; Function Attrs: nounwind willreturn memory(none) 114declare spir_func i64 @_Z13get_global_idj(i32) #1 115 116; Function Attrs: nounwind willreturn memory(none) 117declare spir_func i64 @_Z12get_group_idj(i32) #1 118 119; Function Attrs: nounwind willreturn memory(none) 120declare spir_func i64 @_Z14get_local_sizej(i32) #1 121 122; Function Attrs: nounwind willreturn memory(none) 123declare spir_func i64 @_Z15get_global_sizej(i32) #1 124 125; Function Attrs: convergent nounwind 126declare spir_func float @_Z20work_group_broadcastfmm(float, i64, i64) #2 127 128attributes #0 = { nounwind } 129attributes #1 = { nounwind willreturn memory(none) } 130attributes #2 = { convergent nounwind } 131 132!spirv.MemoryModel = !{!0} 133!opencl.enable.FP_CONTRACT = !{} 134!spirv.Source = !{!1} 135!opencl.spir.version = !{!2} 136!opencl.ocl.version = !{!3} 137!opencl.used.extensions = !{!4} 138!opencl.used.optional.core.features = !{!5} 139!spirv.Generator = !{!6} 140 141!0 = !{i32 2, i32 2} 142!1 = !{i32 3, i32 300000} 143!2 = !{i32 2, i32 0} 144!3 = !{i32 3, i32 0} 145!4 = !{!"cl_khr_subgroups"} 146!5 = !{} 147!6 = !{i16 6, i16 14} 148!7 = !{i32 1, i32 1} 149!8 = !{!"none", !"none"} 150!9 = !{!"float*", !"float*"} 151!10 = !{!"", !""} 152!11 = !{!5, !5} 153