xref: /llvm-project/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll (revision 890e481358d6cb4e81629742eda32f9a1d6785d2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; gfx8 required knowing no overflow happened to fold the addressing mode
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5
6define void @gep_noflags_alloca(i32 %idx, i32 %val) #0 {
7; GFX8-LABEL: gep_noflags_alloca:
8; GFX8:       ; %bb.0:
9; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
11; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
12; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
13; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
14; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
15; GFX8-NEXT:    s_waitcnt vmcnt(0)
16; GFX8-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX9-LABEL: gep_noflags_alloca:
19; GFX9:       ; %bb.0:
20; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX9-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
22; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
23; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
24; GFX9-NEXT:    s_waitcnt vmcnt(0)
25; GFX9-NEXT:    s_setpc_b64 s[30:31]
26  %alloca = alloca [32 x i32], addrspace(5)
27  %gep0 = getelementptr [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
28  %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
29  store volatile i32 %val, ptr addrspace(5) %gep1
30  ret void
31}
32
33define void @gep_inbounds_alloca(i32 %idx, i32 %val) #0 {
34; GFX8-LABEL: gep_inbounds_alloca:
35; GFX8:       ; %bb.0:
36; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
38; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
39; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
40; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
41; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
42; GFX8-NEXT:    s_waitcnt vmcnt(0)
43; GFX8-NEXT:    s_setpc_b64 s[30:31]
44;
45; GFX9-LABEL: gep_inbounds_alloca:
46; GFX9:       ; %bb.0:
47; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX9-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
49; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
50; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
51; GFX9-NEXT:    s_waitcnt vmcnt(0)
52; GFX9-NEXT:    s_setpc_b64 s[30:31]
53  %alloca = alloca [32 x i32], addrspace(5)
54  %gep0 = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
55  %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
56  store volatile i32 %val, ptr addrspace(5) %gep1
57  ret void
58}
59
60define void @gep_nuw_alloca(i32 %idx, i32 %val) #0 {
61; GFX8-LABEL: gep_nuw_alloca:
62; GFX8:       ; %bb.0:
63; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
65; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
66; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
67; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
68; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
69; GFX8-NEXT:    s_waitcnt vmcnt(0)
70; GFX8-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX9-LABEL: gep_nuw_alloca:
73; GFX9:       ; %bb.0:
74; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX9-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
76; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
77; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
78; GFX9-NEXT:    s_waitcnt vmcnt(0)
79; GFX9-NEXT:    s_setpc_b64 s[30:31]
80  %alloca = alloca [32 x i32], addrspace(5)
81  %gep0 = getelementptr nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
82  %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
83  store volatile i32 %val, ptr addrspace(5) %gep1
84  ret void
85}
86
87define void @gep_nusw_alloca(i32 %idx, i32 %val) #0 {
88; GFX8-LABEL: gep_nusw_alloca:
89; GFX8:       ; %bb.0:
90; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
92; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
93; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
94; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
95; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
96; GFX8-NEXT:    s_waitcnt vmcnt(0)
97; GFX8-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX9-LABEL: gep_nusw_alloca:
100; GFX9:       ; %bb.0:
101; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX9-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
103; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
104; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
105; GFX9-NEXT:    s_waitcnt vmcnt(0)
106; GFX9-NEXT:    s_setpc_b64 s[30:31]
107  %alloca = alloca [32 x i32], addrspace(5)
108  %gep0 = getelementptr nusw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
109  %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
110  store volatile i32 %val, ptr addrspace(5) %gep1
111  ret void
112}
113
114define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
115; GFX8-LABEL: gep_inbounds_nuw_alloca:
116; GFX8:       ; %bb.0:
117; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
119; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
120; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
121; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
122; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
123; GFX8-NEXT:    s_waitcnt vmcnt(0)
124; GFX8-NEXT:    s_setpc_b64 s[30:31]
125;
126; GFX9-LABEL: gep_inbounds_nuw_alloca:
127; GFX9:       ; %bb.0:
128; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GFX9-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
130; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
131; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
132; GFX9-NEXT:    s_waitcnt vmcnt(0)
133; GFX9-NEXT:    s_setpc_b64 s[30:31]
134  %alloca = alloca [32 x i32], addrspace(5)
135  %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
136  %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
137  store volatile i32 %val, ptr addrspace(5) %gep1
138  ret void
139}
140
141define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
142; GFX8-LABEL: gep_nusw_nuw_alloca:
143; GFX8:       ; %bb.0:
144; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
146; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
147; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
148; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
149; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
150; GFX8-NEXT:    s_waitcnt vmcnt(0)
151; GFX8-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX9-LABEL: gep_nusw_nuw_alloca:
154; GFX9:       ; %bb.0:
155; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX9-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
157; GFX9-NEXT:    v_lshl_add_u32 v0, v0, 2, v2
158; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
159; GFX9-NEXT:    s_waitcnt vmcnt(0)
160; GFX9-NEXT:    s_setpc_b64 s[30:31]
161  %alloca = alloca [32 x i32], addrspace(5)
162  %gep0 = getelementptr nusw nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
163  %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
164  store volatile i32 %val, ptr addrspace(5) %gep1
165  ret void
166}
167
168define void @gep_inbounds_nuw_alloca_nonpow2_scale(i32 %idx, i32 %val) #0 {
169; GFX8-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
170; GFX8:       ; %bb.0:
171; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172; GFX8-NEXT:    s_movk_i32 s4, 0x84
173; GFX8-NEXT:    v_mul_lo_u32 v0, v0, s4
174; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
175; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
176; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
177; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
178; GFX8-NEXT:    s_waitcnt vmcnt(0)
179; GFX8-NEXT:    s_setpc_b64 s[30:31]
180;
181; GFX9-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
182; GFX9:       ; %bb.0:
183; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184; GFX9-NEXT:    s_movk_i32 s4, 0x84
185; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s4
186; GFX9-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
187; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
188; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
189; GFX9-NEXT:    s_waitcnt vmcnt(0)
190; GFX9-NEXT:    s_setpc_b64 s[30:31]
191  %alloca = alloca [5 x [33 x i32]], align 4, addrspace(5)
192  %gep1 = getelementptr inbounds nuw [5 x [33 x i32]], ptr addrspace(5) %alloca, i32 0, i32 %idx, i32 4
193  store volatile i32 %val, ptr addrspace(5) %gep1, align 4
194  ret void
195}
196
197attributes #0 = { nounwind }
198