xref: /llvm-project/llvm/test/CodeGen/AMDGPU/zext-lid.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefixes=GCN,O2 %s
2; RUN: llc -O0 -mtriple=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3
4; GCN-LABEL: {{^}}zext_grp_size_128:
5; GCN-NOT: and_b32
6define amdgpu_kernel void @zext_grp_size_128(ptr addrspace(1) nocapture %arg) #0 {
7bb:
8  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
9  %tmp1 = and i32 %tmp, 127
10  store i32 %tmp1, ptr addrspace(1) %arg, align 4
11  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
12  %tmp3 = and i32 %tmp2, 127
13  %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
14  store i32 %tmp3, ptr addrspace(1) %tmp4, align 4
15  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
16  %tmp6 = and i32 %tmp5, 127
17  %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2
18  store i32 %tmp6, ptr addrspace(1) %tmp7, align 4
19  ret void
20}
21
22; GCN-LABEL: {{^}}zext_grp_size_32x4x1:
23; GCN-NOT: and_b32
24define amdgpu_kernel void @zext_grp_size_32x4x1(ptr addrspace(1) nocapture %arg) #0 !reqd_work_group_size !0 {
25bb:
26  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
27  %tmp1 = and i32 %tmp, 31
28  store i32 %tmp1, ptr addrspace(1) %arg, align 4
29  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
30  %tmp3 = and i32 %tmp2, 3
31  %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
32  store i32 %tmp3, ptr addrspace(1) %tmp4, align 4
33  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
34  %tmp6 = and i32 %tmp5, 1
35  %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2
36  store i32 %tmp6, ptr addrspace(1) %tmp7, align 4
37  ret void
38}
39
40; GCN-LABEL: {{^}}zext_grp_size_1x1x1:
41; GCN-NOT: and_b32
42
43; When EarlyCSE is not run this call produces a range max with 0 active bits,
44; which is a special case as an AssertZext from width 0 is invalid.
45define amdgpu_kernel void @zext_grp_size_1x1x1(ptr addrspace(1) nocapture %arg) #0 !reqd_work_group_size !1 {
46  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
47  %tmp1 = and i32 %tmp, 1
48  store i32 %tmp1, ptr addrspace(1) %arg, align 4
49  ret void
50}
51
52; GCN-LABEL: {{^}}zext_grp_size_512:
53; GCN-NOT: and_b32
54define amdgpu_kernel void @zext_grp_size_512(ptr addrspace(1) nocapture %arg) #1 {
55bb:
56  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
57  %tmp1 = and i32 %tmp, 65535
58  store i32 %tmp1, ptr addrspace(1) %arg, align 4
59  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y()
60  %tmp3 = and i32 %tmp2, 65535
61  %tmp4 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 1
62  store i32 %tmp3, ptr addrspace(1) %tmp4, align 4
63  %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z()
64  %tmp6 = and i32 %tmp5, 65535
65  %tmp7 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 2
66  store i32 %tmp6, ptr addrspace(1) %tmp7, align 4
67  ret void
68}
69
70; GCN-LABEL: {{^}}func_test_workitem_id_x_known_max_range:
71; O2-NOT: and_b32
72; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
73; O2-NOT: and_b32
74define void @func_test_workitem_id_x_known_max_range(ptr addrspace(1) nocapture %out) #0 {
75entry:
76  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
77  %and = and i32 %id, 1023
78  store i32 %and, ptr addrspace(1) %out, align 4
79  ret void
80}
81
82; GCN-LABEL: {{^}}func_test_workitem_id_x_default_range:
83; O2-NOT: and_b32
84; O2: v_and_b32_e32 v{{[0-9]+}}, 0x3ff,
85; O2-NOT: and_b32
86define void @func_test_workitem_id_x_default_range(ptr addrspace(1) nocapture %out) #4 {
87entry:
88  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
89  %and = and i32 %id, 1023
90  store i32 %and, ptr addrspace(1) %out, align 4
91  ret void
92}
93
94declare i32 @llvm.amdgcn.workitem.id.x() #2
95
96declare i32 @llvm.amdgcn.workitem.id.y() #2
97
98declare i32 @llvm.amdgcn.workitem.id.z() #2
99
100attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" }
101attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
102attributes #2 = { nounwind readnone speculatable }
103attributes #3 = { nounwind readnone }
104attributes #4 = { nounwind }
105
106!0 = !{i32 32, i32 4, i32 1}
107!1 = !{i32 1, i32 1, i32 1}
108