1; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN,O2 %s 2; RUN: llc -O0 -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; GCN-LABEL: {{^}}zext_grp_size_128: 5; GCN-NOT: and_b32 6define amdgpu_kernel void @zext_grp_size_128(ptr addrspace(1) nocapture %arg) #0 { 7bb: 8 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 9 %tmp1 = and i32 %tmp, 127 10 store i32 %tmp1, ptr addrspace(1) %arg, align 4 11 %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() 12 %tmp3 = and i32 %tmp2, 127 13 %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 14 store i32 %tmp3, ptr addrspace(1) %tmp4, align 4 15 %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() 16 %tmp6 = and i32 %tmp5, 127 17 %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 18 store i32 %tmp6, ptr addrspace(1) %tmp7, align 4 19 ret void 20} 21 22; GCN-LABEL: {{^}}zext_grp_size_32x4x1: 23; GCN-NOT: and_b32 24define amdgpu_kernel void @zext_grp_size_32x4x1(ptr addrspace(1) nocapture %arg) #0 !reqd_work_group_size !0 { 25bb: 26 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 27 %tmp1 = and i32 %tmp, 31 28 store i32 %tmp1, ptr addrspace(1) %arg, align 4 29 %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() 30 %tmp3 = and i32 %tmp2, 3 31 %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 32 store i32 %tmp3, ptr addrspace(1) %tmp4, align 4 33 %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() 34 %tmp6 = and i32 %tmp5, 1 35 %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 36 store i32 %tmp6, ptr addrspace(1) %tmp7, align 4 37 ret void 38} 39 40; GCN-LABEL: {{^}}zext_grp_size_1x1x1: 41; GCN-NOT: and_b32 42 43; When EarlyCSE is not run this call produces a range max with 0 active bits, 44; which is a special case as an AssertZext from width 0 is invalid. 45define amdgpu_kernel void @zext_grp_size_1x1x1(ptr addrspace(1) nocapture %arg) #0 !reqd_work_group_size !1 { 46 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 47 %tmp1 = and i32 %tmp, 1 48 store i32 %tmp1, ptr addrspace(1) %arg, align 4 49 ret void 50} 51 52; GCN-LABEL: {{^}}zext_grp_size_512: 53; GCN-NOT: and_b32 54define amdgpu_kernel void @zext_grp_size_512(ptr addrspace(1) nocapture %arg) #1 { 55bb: 56 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 57 %tmp1 = and i32 %tmp, 65535 58 store i32 %tmp1, ptr addrspace(1) %arg, align 4 59 %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() 60 %tmp3 = and i32 %tmp2, 65535 61 %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1 62 store i32 %tmp3, ptr addrspace(1) %tmp4, align 4 63 %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() 64 %tmp6 = and i32 %tmp5, 65535 65 %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2 66 store i32 %tmp6, ptr addrspace(1) %tmp7, align 4 67 ret void 68} 69 70; GCN-LABEL: {{^}}func_test_workitem_id_x_known_max_range: 71; O2-NOT: and_b32 72; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff, 73; O2-NOT: and_b32 74define void @func_test_workitem_id_x_known_max_range(ptr addrspace(1) nocapture %out) #0 { 75entry: 76 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 77 %and = and i32 %id, 1023 78 store i32 %and, ptr addrspace(1) %out, align 4 79 ret void 80} 81 82; GCN-LABEL: {{^}}func_test_workitem_id_x_default_range: 83; O2-NOT: and_b32 84; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff, 85; O2-NOT: and_b32 86define void @func_test_workitem_id_x_default_range(ptr addrspace(1) nocapture %out) #4 { 87entry: 88 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 89 %and = and i32 %id, 1023 90 store i32 %and, ptr addrspace(1) %out, align 4 91 ret void 92} 93 94declare i32 @llvm.amdgcn.workitem.id.x() #2 95 96declare i32 @llvm.amdgcn.workitem.id.y() #2 97 98declare i32 @llvm.amdgcn.workitem.id.z() #2 99 100attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" } 101attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" } 102attributes #2 = { nounwind readnone speculatable } 103attributes #3 = { nounwind readnone } 104attributes #4 = { nounwind } 105 106!0 = !{i32 32, i32 4, i32 1} 107!1 = !{i32 1, i32 1, i32 1} 108